diff --git a/gklearn/ged/util/util.py b/gklearn/ged/util/util.py index 2ff0103..22601dd 100644 --- a/gklearn/ged/util/util.py +++ b/gklearn/ged/util/util.py @@ -57,7 +57,9 @@ def compute_geds(graphs, options={}, parallel=False): ged_env.set_method(options['method'], ged_options_to_string(options)) ged_env.init_method() - # compute ged. + # compute ged. + neo_options = {'edit_cost': options['edit_cost'], + 'node_attrs': options['node_attrs'], 'edge_attrs': options['edge_attrs']} ged_mat = np.zeros((len(graphs), len(graphs))) if parallel: len_itr = int(len(graphs) * (len(graphs) - 1) / 2) @@ -74,7 +76,7 @@ def compute_geds(graphs, options={}, parallel=False): G_graphs = graphs_toshare G_ged_env = ged_env_toshare G_listID = listID_toshare - do_partial = partial(_wrapper_compute_ged_parallel, options) + do_partial = partial(_wrapper_compute_ged_parallel, neo_options) pool = Pool(processes=n_jobs, initializer=init_worker, initargs=(graphs, ged_env, listID)) iterator = tqdm(pool.imap_unordered(do_partial, itr, chunksize), desc='computing GEDs', file=sys.stdout) @@ -100,7 +102,7 @@ def compute_geds(graphs, options={}, parallel=False): ged_vec.append(dis) ged_mat[i][j] = dis ged_mat[j][i] = dis - n_eo_tmp = get_nb_edit_operations(graphs[i], graphs[j], pi_forward, pi_backward, edit_cost=options['edit_cost']) + n_eo_tmp = get_nb_edit_operations(graphs[i], graphs[j], pi_forward, pi_backward, **neo_options) n_edit_operations.append(n_eo_tmp) return ged_vec, ged_mat, n_edit_operations @@ -115,7 +117,7 @@ def _wrapper_compute_ged_parallel(options, itr): def _compute_ged_parallel(env, gid1, gid2, g1, g2, options): dis, pi_forward, pi_backward = _compute_ged(env, gid1, gid2, g1, g2) - n_eo_tmp = get_nb_edit_operations(g1, g2, pi_forward, pi_backward, edit_cost=options['edit_cost']) # [0,0,0,0,0,0] + n_eo_tmp = get_nb_edit_operations(g1, g2, pi_forward, pi_backward, **options) # [0,0,0,0,0,0] return dis, n_eo_tmp @@ -137,11 +139,14 @@ def _compute_ged(env, gid1, gid2, g1, g2): return dis, pi_forward, pi_backward -def get_nb_edit_operations(g1, g2, forward_map, backward_map, edit_cost=None): +def get_nb_edit_operations(g1, g2, forward_map, backward_map, edit_cost=None, **kwargs): if edit_cost == 'LETTER' or edit_cost == 'LETTER2': return get_nb_edit_operations_letter(g1, g2, forward_map, backward_map) elif edit_cost == 'NON_SYMBOLIC': - return get_nb_edit_operations_nonsymbolic(g1, g2, forward_map, backward_map) + node_attrs = kwargs.get('node_attrs', []) + edge_attrs = kwargs.get('edge_attrs', []) + return get_nb_edit_operations_nonsymbolic(g1, g2, forward_map, backward_map, + node_attrs=node_attrs, edge_attrs=edge_attrs) else: return get_nb_edit_operations_symbolic(g1, g2, forward_map, backward_map) @@ -242,7 +247,8 @@ def get_nb_edit_operations_letter(g1, g2, forward_map, backward_map): return n_vi, n_vr, n_vs, sod_vs, n_ei, n_er -def get_nb_edit_operations_nonsymbolic(g1, g2, forward_map, backward_map): +def get_nb_edit_operations_nonsymbolic(g1, g2, forward_map, backward_map, + node_attrs=[], edge_attrs=[]): """Compute the number of each edit operations. """ n_vi = 0 @@ -261,7 +267,7 @@ def get_nb_edit_operations_nonsymbolic(g1, g2, forward_map, backward_map): else: n_vs += 1 sum_squares = 0 - for a_name in g1.graph['node_attrs']: + for a_name in node_attrs: diff = float(g1.nodes[nodes1[i]][a_name]) - float(g2.nodes[map_i][a_name]) sum_squares += np.square(diff) sod_vs += np.sqrt(sum_squares) @@ -284,15 +290,15 @@ def get_nb_edit_operations_nonsymbolic(g1, g2, forward_map, backward_map): elif (n1_g2, n2_g2) in g2.edges(): n_es += 1 sum_squares = 0 - for a_name in g1.graph['edge_attrs']: - diff = float(g1.edges[n1, n2][a_name]) - float(g2.nodes[n1_g2, n2_g2][a_name]) + for a_name in edge_attrs: + diff = float(g1.edges[n1, n2][a_name]) - float(g2.edges[n1_g2, n2_g2][a_name]) sum_squares += np.square(diff) sod_es += np.sqrt(sum_squares) elif (n2_g2, n1_g2) in g2.edges(): n_es += 1 sum_squares = 0 - for a_name in g1.graph['edge_attrs']: - diff = float(g1.edges[n2, n1][a_name]) - float(g2.nodes[n2_g2, n1_g2][a_name]) + for a_name in edge_attrs: + diff = float(g1.edges[n2, n1][a_name]) - float(g2.edges[n2_g2, n1_g2][a_name]) sum_squares += np.square(diff) sod_es += np.sqrt(sum_squares) # corresponding nodes are in g2, however the edge is removed. diff --git a/gklearn/preimage/median_preimage_generator.py b/gklearn/preimage/median_preimage_generator.py index 98eaa81..916cf8a 100644 --- a/gklearn/preimage/median_preimage_generator.py +++ b/gklearn/preimage/median_preimage_generator.py @@ -262,6 +262,8 @@ class MedianPreimageGenerator(PreimageGenerator): self.__edit_cost_constants = self.__init_ecc options = self.__ged_options.copy() options['edit_cost_constants'] = self.__edit_cost_constants # @todo + options['node_attrs'] = self._dataset.node_attrs + options['edge_attrs'] = self._dataset.edge_attrs ged_vec_init, ged_mat, n_edit_operations = compute_geds(graphs, options=options, parallel=self.__parallel) residual_list = [np.sqrt(np.sum(np.square(np.array(ged_vec_init) - dis_k_vec)))] time_list = [time.time() - time0] @@ -300,6 +302,8 @@ class MedianPreimageGenerator(PreimageGenerator): # compute new GEDs and numbers of edit operations. options = self.__ged_options.copy() # np.array([self.__edit_cost_constants[0], self.__edit_cost_constants[1], 0.75]) options['edit_cost_constants'] = self.__edit_cost_constants # @todo + options['node_attrs'] = self._dataset.node_attrs + options['edge_attrs'] = self._dataset.edge_attrs ged_vec, ged_mat, n_edit_operations = compute_geds(graphs, options=options, parallel=self.__parallel) residual_list.append(np.sqrt(np.sum(np.square(np.array(ged_vec) - dis_k_vec)))) time_list.append(time.time() - time0) diff --git a/gklearn/utils/dataset.py b/gklearn/utils/dataset.py index 6d5250d..c90073f 100644 --- a/gklearn/utils/dataset.py +++ b/gklearn/utils/dataset.py @@ -90,7 +90,8 @@ class Dataset(object): ds_file = current_path + '../../datasets/COIL-DEL/COIL-DEL_A.txt' self.__graphs, self.__targets, label_names = load_dataset(ds_file) elif ds_name == 'COIL-RAG': - pass + ds_file = current_path + '../../datasets/COIL-RAG/COIL-RAG_A.txt' + self.__graphs, self.__targets, label_names = load_dataset(ds_file) elif ds_name == 'COLORS-3': ds_file = current_path + '../../datasets/COLORS-3/COLORS-3_A.txt' self.__graphs, self.__targets, label_names = load_dataset(ds_file) diff --git a/gklearn/utils/graph_files.py b/gklearn/utils/graph_files.py index a713958..7f424d6 100644 --- a/gklearn/utils/graph_files.py +++ b/gklearn/utils/graph_files.py @@ -474,6 +474,7 @@ def load_tud(filename): label_names = {'node_labels': [], 'node_attrs': [], 'edge_labels': [], 'edge_attrs': []} + class_label_map = None class_label_map_strings = [] content_rm = open(frm).read().splitlines() i = 0 @@ -538,6 +539,7 @@ def load_tud(filename): else: label_names = {'node_labels': [], 'node_attrs': [], 'edge_labels': [], 'edge_attrs': []} + class_label_map = None content_gi = open(fgi).read().splitlines() # graph indicator content_am = open(fam).read().splitlines() # adjacency matrix @@ -549,7 +551,7 @@ def load_tud(filename): elif 'fga' in locals(): content_targets = open(fga).read().splitlines() # targets (regression) targets = [int(i) for i in content_targets] - if 'class_label_map' in locals(): + if class_label_map is not None: targets = [class_label_map[t] for t in targets] else: raise Exception('Can not find targets file. Please make sure there is a "', ds_name, '_graph_labels.txt" or "', ds_name, '_graph_attributes.txt"', 'file in your dataset folder.') @@ -562,7 +564,7 @@ def load_tud(filename): # transfer to int first in case of unexpected blanks data[int(line) - 1].add_node(idx) labels = [l.strip() for l in content_nl[idx].split(',')] - if label_names['node_labels'] == []: + if label_names['node_labels'] == []: # @todo: need fix bug. for i, label in enumerate(labels): l_name = 'label_' + str(i) data[int(line) - 1].nodes[idx][l_name] = label