From 66e18c93e1ceda6f307333ab0522a1807c15a35a Mon Sep 17 00:00:00 2001 From: jajupmochi Date: Wed, 8 Apr 2020 18:05:35 +0200 Subject: [PATCH] 1. add function to get median node/edge label in MedianGraphEstimator. 2. update load_tud function. 3. update MedianPreimageGenerator. --- gklearn/ged/median/median_graph_estimator.py | 147 +++++++++++++++++++++++++- gklearn/preimage/median_preimage_generator.py | 63 ++++++----- gklearn/preimage/utils.py | 10 +- gklearn/utils/dataset.py | 6 +- gklearn/utils/graph_files.py | 20 ++-- 5 files changed, 201 insertions(+), 45 deletions(-) diff --git a/gklearn/ged/median/median_graph_estimator.py b/gklearn/ged/median/median_graph_estimator.py index 84cd64d..0b0cfe2 100644 --- a/gklearn/ged/median/median_graph_estimator.py +++ b/gklearn/ged/median/median_graph_estimator.py @@ -666,7 +666,8 @@ class MedianGraphEstimator(object): # Compute the median label and update the median. if len(node_labels) > 0: - median_label = self.__ged_env.get_median_node_label(node_labels) +# median_label = self.__ged_env.get_median_node_label(node_labels) + median_label = self.__get_median_node_label(node_labels) if self.__ged_env.get_node_rel_cost(median.nodes[i], median_label) > self.__epsilon: nx.set_node_attributes(median, {i: median_label}) @@ -701,7 +702,7 @@ class MedianGraphEstimator(object): if median.has_edge(i, j): median_label = median.edges[(i, j)] if self.__labeled_edges and len(edge_labels) > 0: - new_median_label = self.__ged_env.median_edge_label(edge_labels) + new_median_label = self.__get_median_edge_label(edge_labels) if self.__ged_env.get_edge_rel_cost(median_label, new_median_label) > self.__epsilon: median_label = new_median_label for edge_label in edge_labels: @@ -821,4 +822,144 @@ class MedianGraphEstimator(object): def compute_my_cost(g, h, node_map): cost = 0.0 for node in g.nodes: - cost += 0 \ No newline at end of file + cost += 0 + + + def __get_median_node_label(self, node_labels): + if True: + return self.__get_median_label_nonsymbolic(node_labels) + else: + return self.__get_median_node_label_symbolic(node_labels) + + + def __get_median_edge_label(self, edge_labels): + if True: + return self.__get_median_label_nonsymbolic(edge_labels) + else: + return self.__get_median_edge_label_symbolic(edge_labels) + + + def __get_median_label_nonsymbolic(self, labels): + if len(labels) == 0: + return {} # @todo + else: + # Transform the labels into coordinates and compute mean label as initial solution. + labels_as_coords = [] + sums = {} + for key, val in labels[0].items(): + sums[key] = 0 + for label in labels: + coords = {} + for key, val in label.items(): + label = float(val) + sums[key] += label + coords[key] = label + labels_as_coords.append(coords) + median = {} + for key, val in sums.items(): + median[key] = val / len(labels) + + # Run main loop of Weiszfeld's Algorithm. + epsilon = 0.0001 + delta = 1.0 + num_itrs = 0 + all_equal = False + while ((delta > epsilon) and (num_itrs < 100) and (not all_equal)): + numerator = {} + for key, val in sums.items(): + numerator[key] = 0 + denominator = 0 + for label_as_coord in labels_as_coords: + norm = 0 + for key, val in label_as_coord.items(): + norm += (val - median[key]) ** 2 + norm += np.sqrt(norm) + if norm > 0: + for key, val in label_as_coord.items(): + numerator[key] += val / norm + denominator += 1.0 / norm + if denominator == 0: + all_equal = True + else: + new_median = {} + delta = 0.0 + for key, val in numerator.items(): + this_median = val / denominator + new_median[key] = this_median + delta += np.abs(median[key] - this_median) + median = new_median + + num_itrs += 1 + + # Transform the solution to strings and return it. + median_label = {} + for key, val in median.items(): + median_label[key] = str(val) + return median_label + + + def __get_median_node_label_symbolic(self, node_labels): + pass + + + def __get_median_edge_label_symbolic(self, edge_labels): + pass + + +# def __get_median_edge_label_nonsymbolic(self, edge_labels): +# if len(edge_labels) == 0: +# return {} +# else: +# # Transform the labels into coordinates and compute mean label as initial solution. +# edge_labels_as_coords = [] +# sums = {} +# for key, val in edge_labels[0].items(): +# sums[key] = 0 +# for edge_label in edge_labels: +# coords = {} +# for key, val in edge_label.items(): +# label = float(val) +# sums[key] += label +# coords[key] = label +# edge_labels_as_coords.append(coords) +# median = {} +# for key, val in sums.items(): +# median[key] = val / len(edge_labels) +# +# # Run main loop of Weiszfeld's Algorithm. +# epsilon = 0.0001 +# delta = 1.0 +# num_itrs = 0 +# all_equal = False +# while ((delta > epsilon) and (num_itrs < 100) and (not all_equal)): +# numerator = {} +# for key, val in sums.items(): +# numerator[key] = 0 +# denominator = 0 +# for edge_label_as_coord in edge_labels_as_coords: +# norm = 0 +# for key, val in edge_label_as_coord.items(): +# norm += (val - median[key]) ** 2 +# norm += np.sqrt(norm) +# if norm > 0: +# for key, val in edge_label_as_coord.items(): +# numerator[key] += val / norm +# denominator += 1.0 / norm +# if denominator == 0: +# all_equal = True +# else: +# new_median = {} +# delta = 0.0 +# for key, val in numerator.items(): +# this_median = val / denominator +# new_median[key] = this_median +# delta += np.abs(median[key] - this_median) +# median = new_median +# +# num_itrs += 1 +# +# # Transform the solution to ged::GXLLabel and return it. +# median_label = {} +# for key, val in median.items(): +# median_label[key] = str(val) +# return median_label \ No newline at end of file diff --git a/gklearn/preimage/median_preimage_generator.py b/gklearn/preimage/median_preimage_generator.py index ef1d57a..98eaa81 100644 --- a/gklearn/preimage/median_preimage_generator.py +++ b/gklearn/preimage/median_preimage_generator.py @@ -96,7 +96,10 @@ class MedianPreimageGenerator(PreimageGenerator): if self.__runtime_precompute_gm is None: raise Exception('Parameter "runtime_precompute_gm" must be given when using pre-computed Gram matrix.') self._graph_kernel.gram_matrix_unnorm = self.__gram_matrix_unnorm - self._graph_kernel.gram_matrix = self._graph_kernel.normalize_gm(np.copy(self.__gram_matrix_unnorm)) + if self._kernel_options['normalize']: + self._graph_kernel.gram_matrix = self._graph_kernel.normalize_gm(np.copy(self.__gram_matrix_unnorm)) + else: + self._graph_kernel.gram_matrix = np.copy(self.__gram_matrix_unnorm) end_precompute_gm = time.time() start -= self.__runtime_precompute_gm @@ -447,31 +450,7 @@ class MedianPreimageGenerator(PreimageGenerator): constraints = [x >= [0.001 for i in range(nb_cost_mat_new.shape[1])], np.array([1.0, 1.0, -1.0, 0.0, 0.0]).T@x >= 0.0] prob = cp.Problem(cp.Minimize(cost_fun), constraints) - try: - prob.solve(verbose=True) - except MemoryError as error0: - if self._verbose >= 2: - print('\nUsing solver "OSQP" caused a memory error.') - print('the original error message is\n', error0) - print('solver status: ', prob.status) - print('trying solver "CVXOPT" instead...\n') - try: - prob.solve(solver=cp.CVXOPT, verbose=True) - except Exception as error1: - if self._verbose >= 2: - print('\nAn error occured when using solver "CVXOPT".') - print('the original error message is\n', error1) - print('solver status: ', prob.status) - print('trying solver "MOSEK" instead. Notice this solver is commercial and a lisence is required.\n') - prob.solve(solver=cp.MOSEK, verbose=True) - else: - if self._verbose >= 2: - print('solver status: ', prob.status) - else: - if self._verbose >= 2: - print('solver status: ', prob.status) - if self._verbose >= 2: - print() + self.__execute_cvx(prob) edit_costs_new = x.value residual = np.sqrt(prob.value) elif rw_constraints == '2constraints': @@ -551,9 +530,7 @@ class MedianPreimageGenerator(PreimageGenerator): constraints = [x >= [0.001 for i in range(nb_cost_mat_new.shape[1])], np.array([1.0, 1.0, -1.0, 0.0, 0.0]).T@x >= 0.0] prob = cp.Problem(cp.Minimize(cost_fun), constraints) - prob.solve() - if self._verbose >= 2: - print(x.value) + self.execute_cvx(prob) edit_costs_new = np.concatenate((x.value, np.array([0.0]))) residual = np.sqrt(prob.value) elif not is_n_attr and is_e_attr: @@ -616,6 +593,34 @@ class MedianPreimageGenerator(PreimageGenerator): return edit_costs_new, residual + def __execute_cvx(self, prob): + try: + prob.solve(verbose=(self._verbose>=2)) + except MemoryError as error0: + if self._verbose >= 2: + print('\nUsing solver "OSQP" caused a memory error.') + print('the original error message is\n', error0) + print('solver status: ', prob.status) + print('trying solver "CVXOPT" instead...\n') + try: + prob.solve(solver=cp.CVXOPT, verbose=(self._verbose>=2)) + except Exception as error1: + if self._verbose >= 2: + print('\nAn error occured when using solver "CVXOPT".') + print('the original error message is\n', error1) + print('solver status: ', prob.status) + print('trying solver "MOSEK" instead. Notice this solver is commercial and a lisence is required.\n') + prob.solve(solver=cp.MOSEK, verbose=(self._verbose>=2)) + else: + if self._verbose >= 2: + print('solver status: ', prob.status) + else: + if self._verbose >= 2: + print('solver status: ', prob.status) + if self._verbose >= 2: + print() + + def __generate_preimage_iam(self): # Set up the ged environment. ged_env = gedlibpy.GEDEnv() # @todo: maybe create a ged_env as a private varible. diff --git a/gklearn/preimage/utils.py b/gklearn/preimage/utils.py index a3a661e..0cd50ef 100644 --- a/gklearn/preimage/utils.py +++ b/gklearn/preimage/utils.py @@ -67,8 +67,8 @@ def generate_median_preimages_by_class(ds_name, mpg_options, kernel_options, ged gm_fname = dir_save + 'gram_matrix_unnorm.' + ds_name + '.' + kernel_options['name'] + '.gm.npz' gmfile_exist = os.path.isfile(os.path.abspath(gm_fname)) if gmfile_exist: - gmfile = np.load(gm_fname) - gram_matrix_unnorm_list = gmfile['gram_matrix_unnorm_list'] + gmfile = np.load(gm_fname, allow_pickle=True) # @todo: may not be safe. + gram_matrix_unnorm_list = [item for item in gmfile['gram_matrix_unnorm_list']] time_precompute_gm_list = gmfile['run_time_list'].tolist() else: gram_matrix_unnorm_list = [] @@ -87,6 +87,7 @@ def generate_median_preimages_by_class(ds_name, mpg_options, kernel_options, ged print('start generating preimage for each class of target...') + idx_offset = 0 for idx, dataset in enumerate(datasets): target = dataset.targets[0] print('\ntarget =', target, '\n') @@ -96,14 +97,15 @@ def generate_median_preimages_by_class(ds_name, mpg_options, kernel_options, ged num_graphs = len(dataset.graphs) if num_graphs < 2: print('\nnumber of graphs = ', num_graphs, ', skip.\n') + idx_offset += 1 continue # 2. set parameters. print('2. initializing mpg and setting parameters...') if load_gm: if gmfile_exist: - mpg_options['gram_matrix_unnorm'] = gram_matrix_unnorm_list[idx] - mpg_options['runtime_precompute_gm'] = time_precompute_gm_list[idx] + mpg_options['gram_matrix_unnorm'] = gram_matrix_unnorm_list[idx - idx_offset] + mpg_options['runtime_precompute_gm'] = time_precompute_gm_list[idx - idx_offset] mpg = MedianPreimageGenerator() mpg.dataset = dataset mpg.set_options(**mpg_options.copy()) diff --git a/gklearn/utils/dataset.py b/gklearn/utils/dataset.py index 6f5389c..6d5250d 100644 --- a/gklearn/utils/dataset.py +++ b/gklearn/utils/dataset.py @@ -92,9 +92,11 @@ class Dataset(object): elif ds_name == 'COIL-RAG': pass elif ds_name == 'COLORS-3': - pass + ds_file = current_path + '../../datasets/COLORS-3/COLORS-3_A.txt' + self.__graphs, self.__targets, label_names = load_dataset(ds_file) elif ds_name == 'FRANKENSTEIN': - pass + ds_file = current_path + '../../datasets/FRANKENSTEIN/FRANKENSTEIN_A.txt' + self.__graphs, self.__targets, label_names = load_dataset(ds_file) self.__node_labels = label_names['node_labels'] self.__node_attrs = label_names['node_attrs'] diff --git a/gklearn/utils/graph_files.py b/gklearn/utils/graph_files.py index c00149e..a713958 100644 --- a/gklearn/utils/graph_files.py +++ b/gklearn/utils/graph_files.py @@ -541,10 +541,21 @@ def load_tud(filename): content_gi = open(fgi).read().splitlines() # graph indicator content_am = open(fam).read().splitlines() # adjacency matrix - content_gl = open(fgl).read().splitlines() # graph labels + + # load targets. + if 'fgl' in locals(): + content_targets = open(fgl).read().splitlines() # targets (classification) + targets = [float(i) for i in content_targets] + elif 'fga' in locals(): + content_targets = open(fga).read().splitlines() # targets (regression) + targets = [int(i) for i in content_targets] + if 'class_label_map' in locals(): + targets = [class_label_map[t] for t in targets] + else: + raise Exception('Can not find targets file. Please make sure there is a "', ds_name, '_graph_labels.txt" or "', ds_name, '_graph_attributes.txt"', 'file in your dataset folder.') # create graphs and add nodes - data = [nx.Graph(name=str(i)) for i in range(0, len(content_gl))] + data = [nx.Graph(name=str(i)) for i in range(0, len(content_targets))] if 'fnl' in locals(): content_nl = open(fnl).read().splitlines() # node labels for idx, line in enumerate(content_gi): @@ -619,11 +630,6 @@ def load_tud(filename): for i, a_name in enumerate(label_names['edge_attrs']): data[g].edges[n[0], n[1]][a_name] = attrs[i] - # load targets. - targets = [int(i) for i in content_gl] - if 'class_label_map' in locals(): - targets = [class_label_map[t] for t in targets] - return data, targets, label_names