Browse Source

Add option to sort graphs by size when computing GEDs for the sake of the stability.

v0.2.x
jajupmochi 5 years ago
parent
commit
06c35201b2
7 changed files with 166 additions and 69 deletions
  1. +19
    -9
      gklearn/ged/env/node_map.py
  2. +110
    -36
      gklearn/ged/median/median_graph_estimator.py
  3. +2
    -0
      gklearn/ged/median/utils.py
  4. +15
    -9
      gklearn/ged/util/util.py
  5. +7
    -2
      gklearn/preimage/experiments/tools/analyze_results_of_random_edit_costs.py
  6. +12
    -12
      gklearn/preimage/experiments/xp_1nn_init10_trianglerule.py
  7. +1
    -1
      gklearn/preimage/kernel_knn_cv.py

+ 19
- 9
gklearn/ged/env/node_map.py View File

@@ -39,14 +39,6 @@ class NodeMap(object):
return np.inf
def get_forward_map(self):
return self.__forward_map
def get_backward_map(self):
return self.__backward_map
def as_relation(self, relation):
relation.clear()
for i in range(0, len(self.__forward_map)):
@@ -77,4 +69,22 @@ class NodeMap(object):
def induced_cost(self):
return self.__induced_cost
return self.__induced_cost
@property
def forward_map(self):
return self.__forward_map

@forward_map.setter
def forward_map(self, value):
self.__forward_map = value
@property
def backward_map(self):
return self.__backward_map

@backward_map.setter
def backward_map(self, value):
self.__backward_map = value

+ 110
- 36
gklearn/ged/median/median_graph_estimator.py View File

@@ -52,6 +52,7 @@ class MedianGraphEstimator(object): # @todo: differ dummy_node from undifined no
self.__seed = 0
self.__parallel = True
self.__update_order = True
self.__sort_graphs = True # sort graphs by size when computing GEDs.
self.__refine = True
self.__time_limit_in_sec = 0
self.__epsilon = 0.0001
@@ -150,6 +151,16 @@ class MedianGraphEstimator(object): # @todo: differ dummy_node from undifined no
else:
raise Exception('Invalid argument "' + opt_val + '" for option update-order. Usage: options = "[--update-order TRUE|FALSE] [...]"')
elif opt_name == 'sort-graphs':
if opt_val == 'TRUE':
self.__sort_graphs = True
elif opt_val == 'FALSE':
self.__sort_graphs = False
else:
raise Exception('Invalid argument "' + opt_val + '" for option sort-graphs. Usage: options = "[--sort-graphs TRUE|FALSE] [...]"')
elif opt_name == 'refine':
if opt_val == 'TRUE':
self.__refine = True
@@ -537,18 +548,31 @@ class MedianGraphEstimator(object): # @todo: differ dummy_node from undifined no
progress.update(1)
# Improving the node maps.
nb_nodes_median = self.__ged_env.get_graph_num_nodes(self.__gen_median_id)
for graph_id, node_map in self.__node_maps_from_median.items():
if time.expired():
if self.__state == AlgorithmState.TERMINATED:
self.__state = AlgorithmState.CONVERGED
break
self.__ged_env.run_method(self.__gen_median_id, graph_id)
if self.__ged_env.get_upper_bound(self.__gen_median_id, graph_id) < node_map.induced_cost():
self.__node_maps_from_median[graph_id] = self.__ged_env.get_node_map(self.__gen_median_id, graph_id)
self.__sum_of_distances += self.__node_maps_from_median[graph_id].induced_cost()

nb_nodes_g = self.__ged_env.get_graph_num_nodes(graph_id)
if nb_nodes_median <= nb_nodes_g or not self.__sort_graphs:
self.__ged_env.run_method(self.__gen_median_id, graph_id)
if self.__ged_env.get_upper_bound(self.__gen_median_id, graph_id) < node_map.induced_cost():
self.__node_maps_from_median[graph_id] = self.__ged_env.get_node_map(self.__gen_median_id, graph_id)
else:
self.__ged_env.run_method(graph_id, self.__gen_median_id)
if self.__ged_env.get_upper_bound(graph_id, self.__gen_median_id) < node_map.induced_cost():
node_map_tmp = self.__ged_env.get_node_map(graph_id, self.__gen_median_id)
node_map_tmp.forward_map, node_map_tmp.backward_map = node_map_tmp.backward_map, node_map_tmp.forward_map
self.__node_maps_from_median[graph_id] = node_map_tmp
self.__sum_of_distances += self.__node_maps_from_median[graph_id].induced_cost()

# Print information.
if self.__print_to_stdout == 2:
progress.update(1)

self.__sum_of_distances = 0.0
for key, val in self.__node_maps_from_median.items():
self.__sum_of_distances += val.induced_cost()
@@ -636,6 +660,7 @@ class MedianGraphEstimator(object): # @todo: differ dummy_node from undifined no
self.__seed = 0
self.__parallel = True
self.__update_order = True
self.__sort_graphs = True
self.__refine = True
self.__time_limit_in_sec = 0
self.__epsilon = 0.0001
@@ -695,7 +720,7 @@ class MedianGraphEstimator(object): # @todo: differ dummy_node from undifined no
def init_worker(ged_env_toshare):
global G_ged_env
G_ged_env = ged_env_toshare
do_fun = partial(_compute_medoid_parallel, graph_ids)
do_fun = partial(_compute_medoid_parallel, graph_ids, self.__sort_graphs)
pool = Pool(processes=n_jobs, initializer=init_worker, initargs=(self.__ged_env,))
if self.__print_to_stdout == 2:
iterator = tqdm(pool.imap_unordered(do_fun, itr, chunksize),
@@ -723,10 +748,16 @@ class MedianGraphEstimator(object): # @todo: differ dummy_node from undifined no
if timer.expired():
self.__state = AlgorithmState.CALLED
break
nb_nodes_g = self.__ged_env.get_graph_num_nodes(g_id)
sum_of_distances = 0
for h_id in graph_ids:
self.__ged_env.run_method(g_id, h_id)
sum_of_distances += self.__ged_env.get_upper_bound(g_id, h_id)
for h_id in graph_ids:
nb_nodes_h = self.__ged_env.get_graph_num_nodes(h_id)
if nb_nodes_g <= nb_nodes_h or not self.__sort_graphs:
self.__ged_env.run_method(g_id, h_id)
sum_of_distances += self.__ged_env.get_upper_bound(g_id, h_id)
else:
self.__ged_env.run_method(h_id, g_id)
sum_of_distances += self.__ged_env.get_upper_bound(h_id, g_id)
if sum_of_distances < best_sum_of_distances:
best_sum_of_distances = sum_of_distances
medoid_id = g_id
@@ -760,7 +791,8 @@ class MedianGraphEstimator(object): # @todo: differ dummy_node from undifined no
def init_worker(ged_env_toshare):
global G_ged_env
G_ged_env = ged_env_toshare
do_fun = partial(_compute_init_node_maps_parallel, gen_median_id)
nb_nodes_median = self.__ged_env.get_graph_num_nodes(gen_median_id)
do_fun = partial(_compute_init_node_maps_parallel, gen_median_id, self.__sort_graphs, nb_nodes_median)
pool = Pool(processes=n_jobs, initializer=init_worker, initargs=(self.__ged_env,))
if self.__print_to_stdout == 2:
iterator = tqdm(pool.imap_unordered(do_fun, itr, chunksize),
@@ -783,9 +815,17 @@ class MedianGraphEstimator(object): # @todo: differ dummy_node from undifined no
self.__sum_of_distances = 0
self.__node_maps_from_median.clear()
nb_nodes_median = self.__ged_env.get_graph_num_nodes(gen_median_id)
for graph_id in graph_ids:
self.__ged_env.run_method(gen_median_id, graph_id)
self.__node_maps_from_median[graph_id] = self.__ged_env.get_node_map(gen_median_id, graph_id)
nb_nodes_g = self.__ged_env.get_graph_num_nodes(graph_id)
if nb_nodes_median <= nb_nodes_g or not self.__sort_graphs:
self.__ged_env.run_method(gen_median_id, graph_id)
self.__node_maps_from_median[graph_id] = self.__ged_env.get_node_map(gen_median_id, graph_id)
else:
self.__ged_env.run_method(graph_id, gen_median_id)
node_map_tmp = self.__ged_env.get_node_map(graph_id, gen_median_id)
node_map_tmp.forward_map, node_map_tmp.backward_map = node_map_tmp.backward_map, node_map_tmp.forward_map
self.__node_maps_from_median[graph_id] = node_map_tmp
# print(self.__node_maps_from_median[graph_id])
self.__sum_of_distances += self.__node_maps_from_median[graph_id].induced_cost()
# print(self.__sum_of_distances)
@@ -843,7 +883,7 @@ class MedianGraphEstimator(object): # @todo: differ dummy_node from undifined no
for graph_id, graph in graphs.items():
# print('graph_id: ', graph_id)
# print(self.__node_maps_from_median[graph_id])
# print(self.__node_maps_from_median[graph_id].get_forward_map(), self.__node_maps_from_median[graph_id].get_backward_map())
# print(self.__node_maps_from_median[graph_id].forward_map, self.__node_maps_from_median[graph_id].backward_map)
k = self.__node_maps_from_median[graph_id].image(i)
# print('k: ', k)
if k != np.inf:
@@ -920,7 +960,8 @@ class MedianGraphEstimator(object): # @todo: differ dummy_node from undifined no
def init_worker(ged_env_toshare):
global G_ged_env
G_ged_env = ged_env_toshare
do_fun = partial(_update_node_maps_parallel, self.__median_id, self.__epsilon)
nb_nodes_median = self.__ged_env.get_graph_num_nodes(self.__median_id)
do_fun = partial(_update_node_maps_parallel, self.__median_id, self.__epsilon, self.__sort_graphs, nb_nodes_median)
pool = Pool(processes=n_jobs, initializer=init_worker, initargs=(self.__ged_env,))
if self.__print_to_stdout == 2:
iterator = tqdm(pool.imap_unordered(do_fun, itr, chunksize),
@@ -941,13 +982,25 @@ class MedianGraphEstimator(object): # @todo: differ dummy_node from undifined no
progress = tqdm(desc='Updating node maps', total=len(self.__node_maps_from_median), file=sys.stdout)
node_maps_were_modified = False
nb_nodes_median = self.__ged_env.get_graph_num_nodes(self.__median_id)
for graph_id, node_map in self.__node_maps_from_median.items():
self.__ged_env.run_method(self.__median_id, graph_id)
if self.__ged_env.get_upper_bound(self.__median_id, graph_id) < node_map.induced_cost() - self.__epsilon:
# xxx = self.__node_maps_from_median[graph_id]
self.__node_maps_from_median[graph_id] = self.__ged_env.get_node_map(self.__median_id, graph_id)
# yyy = self.__node_maps_from_median[graph_id]
node_maps_were_modified = True
nb_nodes_g = self.__ged_env.get_graph_num_nodes(graph_id)
if nb_nodes_median <= nb_nodes_g or not self.__sort_graphs:
self.__ged_env.run_method(self.__median_id, graph_id)
if self.__ged_env.get_upper_bound(self.__median_id, graph_id) < node_map.induced_cost() - self.__epsilon:
# xxx = self.__node_maps_from_median[graph_id]
self.__node_maps_from_median[graph_id] = self.__ged_env.get_node_map(self.__median_id, graph_id)
node_maps_were_modified = True
else:
self.__ged_env.run_method(graph_id, self.__median_id)
if self.__ged_env.get_upper_bound(graph_id, self.__median_id) < node_map.induced_cost() - self.__epsilon:
node_map_tmp = self.__ged_env.get_node_map(graph_id, self.__median_id)
node_map_tmp.forward_map, node_map_tmp.backward_map = node_map_tmp.backward_map, node_map_tmp.forward_map
self.__node_maps_from_median[graph_id] = node_map_tmp
node_maps_were_modified = True
# Print information about current iteration.
if self.__print_to_stdout == 2:
progress.update(1)
@@ -1047,8 +1100,8 @@ class MedianGraphEstimator(object): # @todo: differ dummy_node from undifined no
for k in range(0, node_map.num_target_nodes()):
if is_unassigned_target_node[k]:
new_node_map.add_assignment(np.inf, k)
# print(self.__node_maps_from_median[key].get_forward_map(), self.__node_maps_from_median[key].get_backward_map())
# print(new_node_map.get_forward_map(), new_node_map.get_backward_map())
# print(self.__node_maps_from_median[key].forward_map, self.__node_maps_from_median[key].backward_map)
# print(new_node_map.forward_map, new_node_map.backward_map
self.__node_maps_from_median[key] = new_node_map
# Increase overall number of decreases.
@@ -1599,37 +1652,58 @@ class MedianGraphEstimator(object): # @todo: differ dummy_node from undifined no
# return median_label


def _compute_medoid_parallel(graph_ids, itr):
def _compute_medoid_parallel(graph_ids, sort, itr):
g_id = itr[0]
i = itr[1]
# @todo: timer not considered here.
# if timer.expired():
# self.__state = AlgorithmState.CALLED
# break
nb_nodes_g = G_ged_env.get_graph_num_nodes(g_id)
sum_of_distances = 0
for h_id in graph_ids:
G_ged_env.run_method(g_id, h_id)
sum_of_distances += G_ged_env.get_upper_bound(g_id, h_id)
nb_nodes_h = G_ged_env.get_graph_num_nodes(h_id)
if nb_nodes_g <= nb_nodes_h or not sort:
G_ged_env.run_method(g_id, h_id)
sum_of_distances += G_ged_env.get_upper_bound(g_id, h_id)
else:
G_ged_env.run_method(h_id, g_id)
sum_of_distances += G_ged_env.get_upper_bound(h_id, g_id)
return i, sum_of_distances


def _compute_init_node_maps_parallel(gen_median_id, itr):
def _compute_init_node_maps_parallel(gen_median_id, sort, nb_nodes_median, itr):
graph_id = itr
G_ged_env.run_method(gen_median_id, graph_id)
node_maps_from_median = G_ged_env.get_node_map(gen_median_id, graph_id)
nb_nodes_g = G_ged_env.get_graph_num_nodes(graph_id)
if nb_nodes_median <= nb_nodes_g or not sort:
G_ged_env.run_method(gen_median_id, graph_id)
node_map = G_ged_env.get_node_map(gen_median_id, graph_id)
# print(self.__node_maps_from_median[graph_id])
sum_of_distance = node_maps_from_median.induced_cost()
else:
G_ged_env.run_method(graph_id, gen_median_id)
node_map = G_ged_env.get_node_map(graph_id, gen_median_id)
node_map.forward_map, node_map.backward_map = node_map.backward_map, node_map.forward_map
sum_of_distance = node_map.induced_cost()
# print(self.__sum_of_distances)
return graph_id, sum_of_distance, node_maps_from_median

return graph_id, sum_of_distance, node_map

def _update_node_maps_parallel(median_id, epsilon, itr):
def _update_node_maps_parallel(median_id, epsilon, sort, nb_nodes_median, itr):
graph_id = itr[0]
node_map = itr[1]

node_maps_were_modified = False
G_ged_env.run_method(median_id, graph_id)
if G_ged_env.get_upper_bound(median_id, graph_id) < node_map.induced_cost() - epsilon:
node_map = G_ged_env.get_node_map(median_id, graph_id)
node_maps_were_modified = True
nb_nodes_g = G_ged_env.get_graph_num_nodes(graph_id)
if nb_nodes_median <= nb_nodes_g or not sort:
G_ged_env.run_method(median_id, graph_id)
if G_ged_env.get_upper_bound(median_id, graph_id) < node_map.induced_cost() - epsilon:
node_map = G_ged_env.get_node_map(median_id, graph_id)
node_maps_were_modified = True
else:
G_ged_env.run_method(graph_id, median_id)
if G_ged_env.get_upper_bound(graph_id, median_id) < node_map.induced_cost() - epsilon:
node_map = G_ged_env.get_node_map(graph_id, median_id)
node_map.forward_map, node_map.backward_map = node_map.backward_map, node_map.forward_map
node_maps_were_modified = True
return graph_id, node_map, node_maps_were_modified

+ 2
- 0
gklearn/ged/median/utils.py View File

@@ -34,6 +34,8 @@ def mge_options_to_string(options):
opt_str += '--parallel ' + ('TRUE' if val else 'FALSE') + ' '
elif key == 'update_order':
opt_str += '--update-order ' + ('TRUE' if val else 'FALSE') + ' '
elif key == 'sort_graphs':
opt_str += '--sort-graphs ' + ('TRUE' if val else 'FALSE') + ' '
elif key == 'refine':
opt_str += '--refine ' + ('TRUE' if val else 'FALSE') + ' '
elif key == 'time_limit':


+ 15
- 9
gklearn/ged/util/util.py View File

@@ -46,7 +46,7 @@ def compute_ged(g1, g2, options):
return dis, pi_forward, pi_backward


def compute_geds(graphs, options={}, parallel=False, verbose=True):
def compute_geds(graphs, options={}, sort=True, parallel=False, verbose=True):
# initialize ged env.
ged_env = gedlibpy.GEDEnv()
ged_env.set_edit_cost(options['edit_cost'], edit_cost_constant=options['edit_cost_constants'])
@@ -79,7 +79,7 @@ def compute_geds(graphs, options={}, parallel=False, verbose=True):
G_graphs = graphs_toshare
G_ged_env = ged_env_toshare
G_listID = listID_toshare
do_partial = partial(_wrapper_compute_ged_parallel, neo_options)
do_partial = partial(_wrapper_compute_ged_parallel, neo_options, sort)
pool = Pool(processes=n_jobs, initializer=init_worker, initargs=(graphs, ged_env, listID))
if verbose:
iterator = tqdm(pool.imap_unordered(do_partial, itr, chunksize),
@@ -108,25 +108,31 @@ def compute_geds(graphs, options={}, parallel=False, verbose=True):
for i in iterator:
# for i in range(len(graphs)):
for j in range(i + 1, len(graphs)):
dis, pi_forward, pi_backward = _compute_ged(ged_env, listID[i], listID[j], graphs[i], graphs[j])
if nx.number_of_nodes(graphs[i]) <= nx.number_of_nodes(graphs[j]) or not sort:
dis, pi_forward, pi_backward = _compute_ged(ged_env, listID[i], listID[j], graphs[i], graphs[j])
else:
dis, pi_backward, pi_forward = _compute_ged(ged_env, listID[j], listID[i], graphs[j], graphs[i])
ged_vec.append(dis)
ged_mat[i][j] = dis
ged_mat[j][i] = dis
n_eo_tmp = get_nb_edit_operations(graphs[i], graphs[j], pi_forward, pi_backward, **neo_options)
n_eo_tmp = get_nb_edit_operations(graphs[i], graphs[j], pi_forward, pi_backward, **neo_options)
n_edit_operations.append(n_eo_tmp)
return ged_vec, ged_mat, n_edit_operations


def _wrapper_compute_ged_parallel(options, itr):
def _wrapper_compute_ged_parallel(options, sort, itr):
i = itr[0]
j = itr[1]
dis, n_eo_tmp = _compute_ged_parallel(G_ged_env, G_listID[i], G_listID[j], G_graphs[i], G_graphs[j], options)
dis, n_eo_tmp = _compute_ged_parallel(G_ged_env, G_listID[i], G_listID[j], G_graphs[i], G_graphs[j], options, sort)
return i, j, dis, n_eo_tmp


def _compute_ged_parallel(env, gid1, gid2, g1, g2, options):
dis, pi_forward, pi_backward = _compute_ged(env, gid1, gid2, g1, g2)
def _compute_ged_parallel(env, gid1, gid2, g1, g2, options, sort):
if nx.number_of_nodes(g1) <= nx.number_of_nodes(g2) or not sort:
dis, pi_forward, pi_backward = _compute_ged(env, gid1, gid2, g1, g2)
else:
dis, pi_backward, pi_forward = _compute_ged(env, gid2, gid1, g2, g1)
n_eo_tmp = get_nb_edit_operations(g1, g2, pi_forward, pi_backward, **options) # [0,0,0,0,0,0]
return dis, n_eo_tmp



+ 7
- 2
gklearn/preimage/experiments/tools/analyze_results_of_random_edit_costs.py View File

@@ -82,5 +82,10 @@ def compute_for_all_experiments(data_dir):

if __name__ == '__main__':
# data_dir = '../results/xp_median_preimage.update_order/'
data_dir = '../../results/CRIANN/xp_median_preimage.init10/'
compute_for_all_experiments(data_dir)
root_dir_tnz = '../../results/CRIANN/xp_median_preimage.init10/'
root_dir_ntnz = '../../results/CRIANN/xp_median_preimage.init10.no_triangle_rule/'
root_dir_tz = '../../results/CRIANN/xp_median_preimage.init10.triangle_rule.allow_zeros/'
root_dir_ntz = '../../results/CRIANN/xp_median_preimage.init10.no_triangle_rule.allow_zeros/'
data_dirs = [root_dir_tnz, root_dir_ntnz, root_dir_tz, root_dir_ntz]
for data_dir in data_dirs:
compute_for_all_experiments(data_dir)

+ 12
- 12
gklearn/preimage/experiments/xp_1nn_init10_trianglerule.py View File

@@ -2958,18 +2958,6 @@ if __name__ == "__main__":
# #### xp 7_1: MUTAG, StructuralSP, using CONSTANT.
xp_median_preimage_7_1()

# #### xp 8_2: Monoterpenoides, PathUpToH, using CONSTANT.
xp_median_preimage_8_2()

# #### xp 8_3: Monoterpenoides, Treelet, using CONSTANT.
xp_median_preimage_8_3()

# #### xp 8_4: Monoterpenoides, WeisfeilerLehman, using CONSTANT.
xp_median_preimage_8_4()
# #### xp 8_1: Monoterpenoides, StructuralSP, using CONSTANT.
xp_median_preimage_8_1()

# #### xp 9_2: MAO, PathUpToH, using CONSTANT, symbolic only.
xp_median_preimage_9_2()

@@ -2999,6 +2987,18 @@ if __name__ == "__main__":
# #### xp 6_1: COIL-RAG, StructuralSP, using NON_SYMBOLIC.
xp_median_preimage_6_1()
# #### xp 8_2: Monoterpenoides, PathUpToH, using CONSTANT.
xp_median_preimage_8_2()

# #### xp 8_3: Monoterpenoides, Treelet, using CONSTANT.
xp_median_preimage_8_3()

# #### xp 8_4: Monoterpenoides, WeisfeilerLehman, using CONSTANT.
xp_median_preimage_8_4()
# #### xp 8_1: Monoterpenoides, StructuralSP, using CONSTANT.
xp_median_preimage_8_1()
#
# #### xp 2_1: COIL-DEL, StructuralSP, using LETTER2, only node attrs.
xp_median_preimage_2_1()


+ 1
- 1
gklearn/preimage/kernel_knn_cv.py View File

@@ -151,7 +151,7 @@ def __kernel_knn_cv_median(dataset_all, ds_name, knn_options, mpg_options, kerne
# write result summary for each letter.
if save_results:
f_summary = open(dir_save + fn_output_summary, 'a')
for i, median_type in enumerate('set-median', 'gen median', 'gen median uo'):
for i, median_type in enumerate(['set-median', 'gen median', 'gen median uo']):
csv.writer(f_summary).writerow([ds_name, kernel_options['name'],
train_examples + ': ' + median_type,
knn_options['n_neighbors'],


Loading…
Cancel
Save