1. fix bugs for function of getting numbers of edit operations for non-symbolic labels.

2. fix bugs for load_tud.
5 years ago · 45052568eb
--- a/gklearn/ged/util/util.py
+++ b/gklearn/ged/util/util.py
@@ -57,7 +57,9 @@ def compute_geds(graphs, options={}, parallel=False):
 	ged_env.set_method(options['method'], ged_options_to_string(options))
 	ged_env.init_method()

 	# compute ged.	
 	# compute ged.
 	neo_options = {'edit_cost': options['edit_cost'], 
 				'node_attrs': options['node_attrs'], 'edge_attrs': options['edge_attrs']}
 	ged_mat = np.zeros((len(graphs), len(graphs)))
 	if parallel:
 		len_itr = int(len(graphs) * (len(graphs) - 1) / 2)
@@ -74,7 +76,7 @@ def compute_geds(graphs, options={}, parallel=False):
 			G_graphs = graphs_toshare
 			G_ged_env = ged_env_toshare
 			G_listID = listID_toshare
 		do_partial = partial(_wrapper_compute_ged_parallel, options)
 		do_partial = partial(_wrapper_compute_ged_parallel, neo_options)
 		pool = Pool(processes=n_jobs, initializer=init_worker, initargs=(graphs, ged_env, listID))
 		iterator = tqdm(pool.imap_unordered(do_partial, itr, chunksize),
 						desc='computing GEDs', file=sys.stdout)
@@ -100,7 +102,7 @@ def compute_geds(graphs, options={}, parallel=False):
 				ged_vec.append(dis)
 				ged_mat[i][j] = dis
 				ged_mat[j][i] = dis
 				n_eo_tmp = get_nb_edit_operations(graphs[i], graphs[j], pi_forward, pi_backward, edit_cost=options['edit_cost'])
 				n_eo_tmp = get_nb_edit_operations(graphs[i], graphs[j], pi_forward, pi_backward, **neo_options)
 				n_edit_operations.append(n_eo_tmp)
 					
 	return ged_vec, ged_mat, n_edit_operations
@@ -115,7 +117,7 @@ def _wrapper_compute_ged_parallel(options, itr):

 def _compute_ged_parallel(env, gid1, gid2, g1, g2, options):
 	dis, pi_forward, pi_backward = _compute_ged(env, gid1, gid2, g1, g2)
 	n_eo_tmp = get_nb_edit_operations(g1, g2, pi_forward, pi_backward, edit_cost=options['edit_cost']) # [0,0,0,0,0,0]
 	n_eo_tmp = get_nb_edit_operations(g1, g2, pi_forward, pi_backward, **options) # [0,0,0,0,0,0]
 	return dis, n_eo_tmp


@@ -137,11 +139,14 @@ def _compute_ged(env, gid1, gid2, g1, g2):
 	return dis, pi_forward, pi_backward


 def get_nb_edit_operations(g1, g2, forward_map, backward_map, edit_cost=None):
 def get_nb_edit_operations(g1, g2, forward_map, backward_map, edit_cost=None, **kwargs):
 	if edit_cost == 'LETTER' or edit_cost == 'LETTER2':
 		return get_nb_edit_operations_letter(g1, g2, forward_map, backward_map)
 	elif edit_cost == 'NON_SYMBOLIC':
 		return get_nb_edit_operations_nonsymbolic(g1, g2, forward_map, backward_map)
 		node_attrs = kwargs.get('node_attrs', [])
 		edge_attrs = kwargs.get('edge_attrs', [])
 		return get_nb_edit_operations_nonsymbolic(g1, g2, forward_map, backward_map, 
 											node_attrs=node_attrs, edge_attrs=edge_attrs)
 	else: 
 		return get_nb_edit_operations_symbolic(g1, g2, forward_map, backward_map)
 	
@@ -242,7 +247,8 @@ def get_nb_edit_operations_letter(g1, g2, forward_map, backward_map):
 	return n_vi, n_vr, n_vs, sod_vs, n_ei, n_er


 def get_nb_edit_operations_nonsymbolic(g1, g2, forward_map, backward_map):
 def get_nb_edit_operations_nonsymbolic(g1, g2, forward_map, backward_map,
 									   node_attrs=[], edge_attrs=[]):
 	"""Compute the number of each edit operations.
 	"""
 	n_vi = 0
@@ -261,7 +267,7 @@ def get_nb_edit_operations_nonsymbolic(g1, g2, forward_map, backward_map):
 		else:
 			n_vs += 1
 			sum_squares = 0
 			for a_name in g1.graph['node_attrs']:
 			for a_name in node_attrs:
 				diff = float(g1.nodes[nodes1[i]][a_name]) - float(g2.nodes[map_i][a_name])
 				sum_squares += np.square(diff)
 			sod_vs += np.sqrt(sum_squares)
@@ -284,15 +290,15 @@ def get_nb_edit_operations_nonsymbolic(g1, g2, forward_map, backward_map):
 		elif (n1_g2, n2_g2) in g2.edges():
 			n_es += 1
 			sum_squares = 0
 			for a_name in g1.graph['edge_attrs']:
 				diff = float(g1.edges[n1, n2][a_name]) - float(g2.nodes[n1_g2, n2_g2][a_name])
 			for a_name in edge_attrs:
 				diff = float(g1.edges[n1, n2][a_name]) - float(g2.edges[n1_g2, n2_g2][a_name])
 				sum_squares += np.square(diff)
 			sod_es += np.sqrt(sum_squares)
 		elif (n2_g2, n1_g2) in g2.edges():
 			n_es += 1
 			sum_squares = 0
 			for a_name in g1.graph['edge_attrs']:
 				diff = float(g1.edges[n2, n1][a_name]) - float(g2.nodes[n2_g2, n1_g2][a_name])
 			for a_name in edge_attrs:
 				diff = float(g1.edges[n2, n1][a_name]) - float(g2.edges[n2_g2, n1_g2][a_name])
 				sum_squares += np.square(diff)
 			sod_es += np.sqrt(sum_squares)
 		# corresponding nodes are in g2, however the edge is removed.
--- a/gklearn/preimage/median_preimage_generator.py
+++ b/gklearn/preimage/median_preimage_generator.py
@@ -262,6 +262,8 @@ class MedianPreimageGenerator(PreimageGenerator):
 		self.__edit_cost_constants = self.__init_ecc
 		options = self.__ged_options.copy()
 		options['edit_cost_constants'] = self.__edit_cost_constants # @todo
 		options['node_attrs'] = self._dataset.node_attrs
 		options['edge_attrs'] = self._dataset.edge_attrs
 		ged_vec_init, ged_mat, n_edit_operations = compute_geds(graphs, options=options, parallel=self.__parallel)
 		residual_list = [np.sqrt(np.sum(np.square(np.array(ged_vec_init) - dis_k_vec)))]	
 		time_list = [time.time() - time0]
@@ -300,6 +302,8 @@ class MedianPreimageGenerator(PreimageGenerator):
 			# compute new GEDs and numbers of edit operations.
 			options = self.__ged_options.copy() # np.array([self.__edit_cost_constants[0], self.__edit_cost_constants[1], 0.75])
 			options['edit_cost_constants'] = self.__edit_cost_constants # @todo
 			options['node_attrs'] = self._dataset.node_attrs
 			options['edge_attrs'] = self._dataset.edge_attrs
 			ged_vec, ged_mat, n_edit_operations = compute_geds(graphs, options=options, parallel=self.__parallel)
 			residual_list.append(np.sqrt(np.sum(np.square(np.array(ged_vec) - dis_k_vec))))
 			time_list.append(time.time() - time0)
--- a/gklearn/utils/dataset.py
+++ b/gklearn/utils/dataset.py
@@ -90,7 +90,8 @@ class Dataset(object):
 			ds_file = current_path + '../../datasets/COIL-DEL/COIL-DEL_A.txt'
 			self.__graphs, self.__targets, label_names = load_dataset(ds_file)
 		elif ds_name == 'COIL-RAG':
 			pass
 			ds_file = current_path + '../../datasets/COIL-RAG/COIL-RAG_A.txt'
 			self.__graphs, self.__targets, label_names = load_dataset(ds_file)
 		elif ds_name == 'COLORS-3':
 			ds_file = current_path + '../../datasets/COLORS-3/COLORS-3_A.txt'
 			self.__graphs, self.__targets, label_names = load_dataset(ds_file)
--- a/gklearn/utils/graph_files.py
+++ b/gklearn/utils/graph_files.py
@@ -474,6 +474,7 @@ def load_tud(filename):

 		label_names = {'node_labels': [], 'node_attrs': [], 
 					   'edge_labels': [], 'edge_attrs': []}
 		class_label_map = None
 		class_label_map_strings = []
 		content_rm = open(frm).read().splitlines()
 		i = 0
@@ -538,6 +539,7 @@ def load_tud(filename):
 	else:
 		label_names = {'node_labels': [], 'node_attrs': [], 
 					   'edge_labels': [], 'edge_attrs': []}
 		class_label_map = None

 	content_gi = open(fgi).read().splitlines()  # graph indicator
 	content_am = open(fam).read().splitlines()  # adjacency matrix
@@ -549,7 +551,7 @@ def load_tud(filename):
 	elif 'fga' in locals():
 		content_targets = open(fga).read().splitlines()  # targets (regression)
 		targets = [int(i) for i in content_targets]
 		if 'class_label_map' in locals():
 		if class_label_map is not None:
 			targets = [class_label_map[t] for t in targets]
 	else:
 		raise Exception('Can not find targets file. Please make sure there is a "', ds_name, '_graph_labels.txt" or "', ds_name, '_graph_attributes.txt"', 'file in your dataset folder.')
@@ -562,7 +564,7 @@ def load_tud(filename):
 			# transfer to int first in case of unexpected blanks
 			data[int(line) - 1].add_node(idx)
 			labels = [l.strip() for l in content_nl[idx].split(',')]
 			if label_names['node_labels'] == []:
 			if label_names['node_labels'] == []: # @todo: need fix bug.
 				for i, label in enumerate(labels):
 					l_name = 'label_' + str(i)
 					data[int(line) - 1].nodes[idx][l_name] = label