|
- #!/usr/bin/env python3
- # -*- coding: utf-8 -*-
- """
- Created on Tue Mar 31 17:06:22 2020
-
- @author: ljia
- """
- import numpy as np
- from itertools import combinations
- import multiprocessing
- from multiprocessing import Pool
- from functools import partial
- import sys
- from tqdm import tqdm
- import networkx as nx
- from gklearn.gedlib import librariesImport, gedlibpy
-
-
- def compute_ged(g1, g2, options):
- ged_env = gedlibpy.GEDEnv()
- ged_env.set_edit_cost(options['edit_cost'], edit_cost_constant=options['edit_cost_constants'])
- ged_env.add_nx_graph(g1, '')
- ged_env.add_nx_graph(g2, '')
- listID = ged_env.get_all_graph_ids()
- ged_env.init()
- ged_env.set_method(options['method'], ged_options_to_string(options))
- ged_env.init_method()
-
- g = listID[0]
- h = listID[1]
- ged_env.run_method(g, h)
- pi_forward = ged_env.get_forward_map(g, h)
- pi_backward = ged_env.get_backward_map(g, h)
- upper = ged_env.get_upper_bound(g, h)
- dis = upper
-
- # make the map label correct (label remove map as np.inf)
- nodes1 = [n for n in g1.nodes()]
- nodes2 = [n for n in g2.nodes()]
- nb1 = nx.number_of_nodes(g1)
- nb2 = nx.number_of_nodes(g2)
- pi_forward = [nodes2[pi] if pi < nb2 else np.inf for pi in pi_forward]
- pi_backward = [nodes1[pi] if pi < nb1 else np.inf for pi in pi_backward]
- # print(pi_forward)
-
- return dis, pi_forward, pi_backward
-
-
- def compute_geds(graphs, options={}, parallel=False):
- # initialize ged env.
- ged_env = gedlibpy.GEDEnv()
- ged_env.set_edit_cost(options['edit_cost'], edit_cost_constant=options['edit_cost_constants'])
- for g in graphs:
- ged_env.add_nx_graph(g, '')
- listID = ged_env.get_all_graph_ids()
- ged_env.init()
- ged_env.set_method(options['method'], ged_options_to_string(options))
- ged_env.init_method()
-
- # compute ged.
- ged_mat = np.zeros((len(graphs), len(graphs)))
- if parallel:
- len_itr = int(len(graphs) * (len(graphs) - 1) / 2)
- ged_vec = [0 for i in range(len_itr)]
- n_edit_operations = [0 for i in range(len_itr)]
- itr = combinations(range(0, len(graphs)), 2)
- n_jobs = multiprocessing.cpu_count()
- if len_itr < 100 * n_jobs:
- chunksize = int(len_itr / n_jobs) + 1
- else:
- chunksize = 100
- def init_worker(graphs_toshare, ged_env_toshare, listID_toshare):
- global G_graphs, G_ged_env, G_listID
- G_graphs = graphs_toshare
- G_ged_env = ged_env_toshare
- G_listID = listID_toshare
- do_partial = partial(_wrapper_compute_ged_parallel, options)
- pool = Pool(processes=n_jobs, initializer=init_worker, initargs=(graphs, ged_env, listID))
- iterator = tqdm(pool.imap_unordered(do_partial, itr, chunksize),
- desc='computing GEDs', file=sys.stdout)
- # iterator = pool.imap_unordered(do_partial, itr, chunksize)
- for i, j, dis, n_eo_tmp in iterator:
- idx_itr = int(len(graphs) * i + j - (i + 1) * (i + 2) / 2)
- ged_vec[idx_itr] = dis
- ged_mat[i][j] = dis
- ged_mat[j][i] = dis
- n_edit_operations[idx_itr] = n_eo_tmp
- # print('\n-------------------------------------------')
- # print(i, j, idx_itr, dis)
- pool.close()
- pool.join()
-
- else:
- ged_vec = []
- n_edit_operations = []
- for i in tqdm(range(len(graphs)), desc='computing GEDs', file=sys.stdout):
- # for i in range(len(graphs)):
- for j in range(i + 1, len(graphs)):
- dis, pi_forward, pi_backward = _compute_ged(ged_env, listID[i], listID[j], graphs[i], graphs[j])
- ged_vec.append(dis)
- ged_mat[i][j] = dis
- ged_mat[j][i] = dis
- n_eo_tmp = get_nb_edit_operations(graphs[i], graphs[j], pi_forward, pi_backward, edit_cost=options['edit_cost'])
- n_edit_operations.append(n_eo_tmp)
-
- return ged_vec, ged_mat, n_edit_operations
-
-
- def _wrapper_compute_ged_parallel(options, itr):
- i = itr[0]
- j = itr[1]
- dis, n_eo_tmp = _compute_ged_parallel(G_ged_env, G_listID[i], G_listID[j], G_graphs[i], G_graphs[j], options)
- return i, j, dis, n_eo_tmp
-
-
- def _compute_ged_parallel(env, gid1, gid2, g1, g2, options):
- dis, pi_forward, pi_backward = _compute_ged(env, gid1, gid2, g1, g2)
- n_eo_tmp = get_nb_edit_operations(g1, g2, pi_forward, pi_backward, edit_cost=options['edit_cost']) # [0,0,0,0,0,0]
- return dis, n_eo_tmp
-
-
- def _compute_ged(env, gid1, gid2, g1, g2):
- env.run_method(gid1, gid2)
- pi_forward = env.get_forward_map(gid1, gid2)
- pi_backward = env.get_backward_map(gid1, gid2)
- upper = env.get_upper_bound(gid1, gid2)
- dis = upper
-
- # make the map label correct (label remove map as np.inf)
- nodes1 = [n for n in g1.nodes()]
- nodes2 = [n for n in g2.nodes()]
- nb1 = nx.number_of_nodes(g1)
- nb2 = nx.number_of_nodes(g2)
- pi_forward = [nodes2[pi] if pi < nb2 else np.inf for pi in pi_forward]
- pi_backward = [nodes1[pi] if pi < nb1 else np.inf for pi in pi_backward]
-
- return dis, pi_forward, pi_backward
-
-
- def get_nb_edit_operations(g1, g2, forward_map, backward_map, edit_cost=None):
- if edit_cost == 'LETTER' or edit_cost == 'LETTER2':
- return get_nb_edit_operations_letter(g1, g2, forward_map, backward_map)
- elif edit_cost == 'NON_SYMBOLIC':
- return get_nb_edit_operations_nonsymbolic(g1, g2, forward_map, backward_map)
- else:
- return get_nb_edit_operations_symbolic(g1, g2, forward_map, backward_map)
-
-
- def get_nb_edit_operations_symbolic(g1, g2, forward_map, backward_map):
- """Compute the number of each edit operations.
- """
- n_vi = 0
- n_vr = 0
- n_vs = 0
- n_ei = 0
- n_er = 0
- n_es = 0
-
- nodes1 = [n for n in g1.nodes()]
- for i, map_i in enumerate(forward_map):
- if map_i == np.inf:
- n_vr += 1
- elif g1.node[nodes1[i]]['atom'] != g2.node[map_i]['atom']:
- n_vs += 1
- for map_i in backward_map:
- if map_i == np.inf:
- n_vi += 1
-
- # idx_nodes1 = range(0, len(node1))
-
- edges1 = [e for e in g1.edges()]
- nb_edges2_cnted = 0
- for n1, n2 in edges1:
- idx1 = nodes1.index(n1)
- idx2 = nodes1.index(n2)
- # one of the nodes is removed, thus the edge is removed.
- if forward_map[idx1] == np.inf or forward_map[idx2] == np.inf:
- n_er += 1
- # corresponding edge is in g2.
- elif (forward_map[idx1], forward_map[idx2]) in g2.edges():
- nb_edges2_cnted += 1
- # edge labels are different.
- if g2.edges[((forward_map[idx1], forward_map[idx2]))]['bond_type'] \
- != g1.edges[(n1, n2)]['bond_type']:
- n_es += 1
- elif (forward_map[idx2], forward_map[idx1]) in g2.edges():
- nb_edges2_cnted += 1
- # edge labels are different.
- if g2.edges[((forward_map[idx2], forward_map[idx1]))]['bond_type'] \
- != g1.edges[(n1, n2)]['bond_type']:
- n_es += 1
- # corresponding nodes are in g2, however the edge is removed.
- else:
- n_er += 1
- n_ei = nx.number_of_edges(g2) - nb_edges2_cnted
-
- return n_vi, n_vr, n_vs, n_ei, n_er, n_es
-
-
- def get_nb_edit_operations_letter(g1, g2, forward_map, backward_map):
- """Compute the number of each edit operations.
- """
- n_vi = 0
- n_vr = 0
- n_vs = 0
- sod_vs = 0
- n_ei = 0
- n_er = 0
-
- nodes1 = [n for n in g1.nodes()]
- for i, map_i in enumerate(forward_map):
- if map_i == np.inf:
- n_vr += 1
- else:
- n_vs += 1
- diff_x = float(g1.nodes[nodes1[i]]['x']) - float(g2.nodes[map_i]['x'])
- diff_y = float(g1.nodes[nodes1[i]]['y']) - float(g2.nodes[map_i]['y'])
- sod_vs += np.sqrt(np.square(diff_x) + np.square(diff_y))
- for map_i in backward_map:
- if map_i == np.inf:
- n_vi += 1
-
- # idx_nodes1 = range(0, len(node1))
-
- edges1 = [e for e in g1.edges()]
- nb_edges2_cnted = 0
- for n1, n2 in edges1:
- idx1 = nodes1.index(n1)
- idx2 = nodes1.index(n2)
- # one of the nodes is removed, thus the edge is removed.
- if forward_map[idx1] == np.inf or forward_map[idx2] == np.inf:
- n_er += 1
- # corresponding edge is in g2. Edge label is not considered.
- elif (forward_map[idx1], forward_map[idx2]) in g2.edges() or \
- (forward_map[idx2], forward_map[idx1]) in g2.edges():
- nb_edges2_cnted += 1
- # corresponding nodes are in g2, however the edge is removed.
- else:
- n_er += 1
- n_ei = nx.number_of_edges(g2) - nb_edges2_cnted
-
- return n_vi, n_vr, n_vs, sod_vs, n_ei, n_er
-
-
- def get_nb_edit_operations_nonsymbolic(g1, g2, forward_map, backward_map):
- """Compute the number of each edit operations.
- """
- n_vi = 0
- n_vr = 0
- n_vs = 0
- sod_vs = 0
- n_ei = 0
- n_er = 0
- n_es = 0
- sod_es = 0
-
- nodes1 = [n for n in g1.nodes()]
- for i, map_i in enumerate(forward_map):
- if map_i == np.inf:
- n_vr += 1
- else:
- n_vs += 1
- sum_squares = 0
- for a_name in g1.graph['node_attrs']:
- diff = float(g1.nodes[nodes1[i]][a_name]) - float(g2.nodes[map_i][a_name])
- sum_squares += np.square(diff)
- sod_vs += np.sqrt(sum_squares)
- for map_i in backward_map:
- if map_i == np.inf:
- n_vi += 1
-
- # idx_nodes1 = range(0, len(node1))
-
- edges1 = [e for e in g1.edges()]
- for n1, n2 in edges1:
- idx1 = nodes1.index(n1)
- idx2 = nodes1.index(n2)
- n1_g2 = forward_map[idx1]
- n2_g2 = forward_map[idx2]
- # one of the nodes is removed, thus the edge is removed.
- if n1_g2 == np.inf or n2_g2 == np.inf:
- n_er += 1
- # corresponding edge is in g2.
- elif (n1_g2, n2_g2) in g2.edges():
- n_es += 1
- sum_squares = 0
- for a_name in g1.graph['edge_attrs']:
- diff = float(g1.edges[n1, n2][a_name]) - float(g2.nodes[n1_g2, n2_g2][a_name])
- sum_squares += np.square(diff)
- sod_es += np.sqrt(sum_squares)
- elif (n2_g2, n1_g2) in g2.edges():
- n_es += 1
- sum_squares = 0
- for a_name in g1.graph['edge_attrs']:
- diff = float(g1.edges[n2, n1][a_name]) - float(g2.nodes[n2_g2, n1_g2][a_name])
- sum_squares += np.square(diff)
- sod_es += np.sqrt(sum_squares)
- # corresponding nodes are in g2, however the edge is removed.
- else:
- n_er += 1
- n_ei = nx.number_of_edges(g2) - n_es
-
- return n_vi, n_vr, sod_vs, n_ei, n_er, sod_es
-
-
- def ged_options_to_string(options):
- opt_str = ' '
- for key, val in options.items():
- if key == 'initialization_method':
- opt_str += '--initial_solutions ' + str(val) + ' '
- elif key == 'initialization_options':
- opt_str += '--initialization-options ' + str(val) + ' '
- elif key == 'lower_bound_method':
- opt_str += '--lower-bound-method ' + str(val) + ' '
- elif key == 'random_substitution_ratio':
- opt_str += '--random-substitution-ratio ' + str(val) + ' '
- elif key == 'initial_solutions':
- opt_str += '--initial-solutions ' + str(val) + ' '
- elif key == 'ratio_runs_from_initial_solutions':
- opt_str += '--ratio-runs-from-initial-solutions ' + str(val) + ' '
- elif key == 'threads':
- opt_str += '--threads ' + str(val) + ' '
- elif key == 'num_randpost_loops':
- opt_str += '--num-randpost-loops ' + str(val) + ' '
- elif key == 'max_randpost_retrials':
- opt_str += '--maxrandpost-retrials ' + str(val) + ' '
- elif key == 'randpost_penalty':
- opt_str += '--randpost-penalty ' + str(val) + ' '
- elif key == 'randpost_decay':
- opt_str += '--randpost-decay ' + str(val) + ' '
- elif key == 'log':
- opt_str += '--log ' + str(val) + ' '
- elif key == 'randomness':
- opt_str += '--randomness ' + str(val) + ' '
-
- # if not isinstance(val, list):
- # opt_str += '--' + key.replace('_', '-') + ' '
- # if val == False:
- # val_str = 'FALSE'
- # else:
- # val_str = str(val)
- # opt_str += val_str + ' '
-
- return opt_str
|