Browse Source

1. fix bugs for function of getting numbers of edit operations for non-symbolic labels.

2. fix bugs for load_tud.
v0.2.x
jajupmochi 5 years ago
parent
commit
45052568eb
4 changed files with 28 additions and 15 deletions
  1. +18
    -12
      gklearn/ged/util/util.py
  2. +4
    -0
      gklearn/preimage/median_preimage_generator.py
  3. +2
    -1
      gklearn/utils/dataset.py
  4. +4
    -2
      gklearn/utils/graph_files.py

+ 18
- 12
gklearn/ged/util/util.py View File

@@ -57,7 +57,9 @@ def compute_geds(graphs, options={}, parallel=False):
ged_env.set_method(options['method'], ged_options_to_string(options))
ged_env.init_method()

# compute ged.
# compute ged.
neo_options = {'edit_cost': options['edit_cost'],
'node_attrs': options['node_attrs'], 'edge_attrs': options['edge_attrs']}
ged_mat = np.zeros((len(graphs), len(graphs)))
if parallel:
len_itr = int(len(graphs) * (len(graphs) - 1) / 2)
@@ -74,7 +76,7 @@ def compute_geds(graphs, options={}, parallel=False):
G_graphs = graphs_toshare
G_ged_env = ged_env_toshare
G_listID = listID_toshare
do_partial = partial(_wrapper_compute_ged_parallel, options)
do_partial = partial(_wrapper_compute_ged_parallel, neo_options)
pool = Pool(processes=n_jobs, initializer=init_worker, initargs=(graphs, ged_env, listID))
iterator = tqdm(pool.imap_unordered(do_partial, itr, chunksize),
desc='computing GEDs', file=sys.stdout)
@@ -100,7 +102,7 @@ def compute_geds(graphs, options={}, parallel=False):
ged_vec.append(dis)
ged_mat[i][j] = dis
ged_mat[j][i] = dis
n_eo_tmp = get_nb_edit_operations(graphs[i], graphs[j], pi_forward, pi_backward, edit_cost=options['edit_cost'])
n_eo_tmp = get_nb_edit_operations(graphs[i], graphs[j], pi_forward, pi_backward, **neo_options)
n_edit_operations.append(n_eo_tmp)
return ged_vec, ged_mat, n_edit_operations
@@ -115,7 +117,7 @@ def _wrapper_compute_ged_parallel(options, itr):

def _compute_ged_parallel(env, gid1, gid2, g1, g2, options):
dis, pi_forward, pi_backward = _compute_ged(env, gid1, gid2, g1, g2)
n_eo_tmp = get_nb_edit_operations(g1, g2, pi_forward, pi_backward, edit_cost=options['edit_cost']) # [0,0,0,0,0,0]
n_eo_tmp = get_nb_edit_operations(g1, g2, pi_forward, pi_backward, **options) # [0,0,0,0,0,0]
return dis, n_eo_tmp


@@ -137,11 +139,14 @@ def _compute_ged(env, gid1, gid2, g1, g2):
return dis, pi_forward, pi_backward


def get_nb_edit_operations(g1, g2, forward_map, backward_map, edit_cost=None):
def get_nb_edit_operations(g1, g2, forward_map, backward_map, edit_cost=None, **kwargs):
if edit_cost == 'LETTER' or edit_cost == 'LETTER2':
return get_nb_edit_operations_letter(g1, g2, forward_map, backward_map)
elif edit_cost == 'NON_SYMBOLIC':
return get_nb_edit_operations_nonsymbolic(g1, g2, forward_map, backward_map)
node_attrs = kwargs.get('node_attrs', [])
edge_attrs = kwargs.get('edge_attrs', [])
return get_nb_edit_operations_nonsymbolic(g1, g2, forward_map, backward_map,
node_attrs=node_attrs, edge_attrs=edge_attrs)
else:
return get_nb_edit_operations_symbolic(g1, g2, forward_map, backward_map)
@@ -242,7 +247,8 @@ def get_nb_edit_operations_letter(g1, g2, forward_map, backward_map):
return n_vi, n_vr, n_vs, sod_vs, n_ei, n_er


def get_nb_edit_operations_nonsymbolic(g1, g2, forward_map, backward_map):
def get_nb_edit_operations_nonsymbolic(g1, g2, forward_map, backward_map,
node_attrs=[], edge_attrs=[]):
"""Compute the number of each edit operations.
"""
n_vi = 0
@@ -261,7 +267,7 @@ def get_nb_edit_operations_nonsymbolic(g1, g2, forward_map, backward_map):
else:
n_vs += 1
sum_squares = 0
for a_name in g1.graph['node_attrs']:
for a_name in node_attrs:
diff = float(g1.nodes[nodes1[i]][a_name]) - float(g2.nodes[map_i][a_name])
sum_squares += np.square(diff)
sod_vs += np.sqrt(sum_squares)
@@ -284,15 +290,15 @@ def get_nb_edit_operations_nonsymbolic(g1, g2, forward_map, backward_map):
elif (n1_g2, n2_g2) in g2.edges():
n_es += 1
sum_squares = 0
for a_name in g1.graph['edge_attrs']:
diff = float(g1.edges[n1, n2][a_name]) - float(g2.nodes[n1_g2, n2_g2][a_name])
for a_name in edge_attrs:
diff = float(g1.edges[n1, n2][a_name]) - float(g2.edges[n1_g2, n2_g2][a_name])
sum_squares += np.square(diff)
sod_es += np.sqrt(sum_squares)
elif (n2_g2, n1_g2) in g2.edges():
n_es += 1
sum_squares = 0
for a_name in g1.graph['edge_attrs']:
diff = float(g1.edges[n2, n1][a_name]) - float(g2.nodes[n2_g2, n1_g2][a_name])
for a_name in edge_attrs:
diff = float(g1.edges[n2, n1][a_name]) - float(g2.edges[n2_g2, n1_g2][a_name])
sum_squares += np.square(diff)
sod_es += np.sqrt(sum_squares)
# corresponding nodes are in g2, however the edge is removed.


+ 4
- 0
gklearn/preimage/median_preimage_generator.py View File

@@ -262,6 +262,8 @@ class MedianPreimageGenerator(PreimageGenerator):
self.__edit_cost_constants = self.__init_ecc
options = self.__ged_options.copy()
options['edit_cost_constants'] = self.__edit_cost_constants # @todo
options['node_attrs'] = self._dataset.node_attrs
options['edge_attrs'] = self._dataset.edge_attrs
ged_vec_init, ged_mat, n_edit_operations = compute_geds(graphs, options=options, parallel=self.__parallel)
residual_list = [np.sqrt(np.sum(np.square(np.array(ged_vec_init) - dis_k_vec)))]
time_list = [time.time() - time0]
@@ -300,6 +302,8 @@ class MedianPreimageGenerator(PreimageGenerator):
# compute new GEDs and numbers of edit operations.
options = self.__ged_options.copy() # np.array([self.__edit_cost_constants[0], self.__edit_cost_constants[1], 0.75])
options['edit_cost_constants'] = self.__edit_cost_constants # @todo
options['node_attrs'] = self._dataset.node_attrs
options['edge_attrs'] = self._dataset.edge_attrs
ged_vec, ged_mat, n_edit_operations = compute_geds(graphs, options=options, parallel=self.__parallel)
residual_list.append(np.sqrt(np.sum(np.square(np.array(ged_vec) - dis_k_vec))))
time_list.append(time.time() - time0)


+ 2
- 1
gklearn/utils/dataset.py View File

@@ -90,7 +90,8 @@ class Dataset(object):
ds_file = current_path + '../../datasets/COIL-DEL/COIL-DEL_A.txt'
self.__graphs, self.__targets, label_names = load_dataset(ds_file)
elif ds_name == 'COIL-RAG':
pass
ds_file = current_path + '../../datasets/COIL-RAG/COIL-RAG_A.txt'
self.__graphs, self.__targets, label_names = load_dataset(ds_file)
elif ds_name == 'COLORS-3':
ds_file = current_path + '../../datasets/COLORS-3/COLORS-3_A.txt'
self.__graphs, self.__targets, label_names = load_dataset(ds_file)


+ 4
- 2
gklearn/utils/graph_files.py View File

@@ -474,6 +474,7 @@ def load_tud(filename):

label_names = {'node_labels': [], 'node_attrs': [],
'edge_labels': [], 'edge_attrs': []}
class_label_map = None
class_label_map_strings = []
content_rm = open(frm).read().splitlines()
i = 0
@@ -538,6 +539,7 @@ def load_tud(filename):
else:
label_names = {'node_labels': [], 'node_attrs': [],
'edge_labels': [], 'edge_attrs': []}
class_label_map = None

content_gi = open(fgi).read().splitlines() # graph indicator
content_am = open(fam).read().splitlines() # adjacency matrix
@@ -549,7 +551,7 @@ def load_tud(filename):
elif 'fga' in locals():
content_targets = open(fga).read().splitlines() # targets (regression)
targets = [int(i) for i in content_targets]
if 'class_label_map' in locals():
if class_label_map is not None:
targets = [class_label_map[t] for t in targets]
else:
raise Exception('Can not find targets file. Please make sure there is a "', ds_name, '_graph_labels.txt" or "', ds_name, '_graph_attributes.txt"', 'file in your dataset folder.')
@@ -562,7 +564,7 @@ def load_tud(filename):
# transfer to int first in case of unexpected blanks
data[int(line) - 1].add_node(idx)
labels = [l.strip() for l in content_nl[idx].split(',')]
if label_names['node_labels'] == []:
if label_names['node_labels'] == []: # @todo: need fix bug.
for i, label in enumerate(labels):
l_name = 'label_' + str(i)
data[int(line) - 1].nodes[idx][l_name] = label


Loading…
Cancel
Save