You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

util.py 12 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370
  1. #!/usr/bin/env python3
  2. # -*- coding: utf-8 -*-
  3. """
  4. Created on Tue Mar 31 17:06:22 2020
  5. @author: ljia
  6. """
  7. import numpy as np
  8. from itertools import combinations
  9. import multiprocessing
  10. from multiprocessing import Pool
  11. from functools import partial
  12. import sys
  13. from tqdm import tqdm
  14. import networkx as nx
  15. from gklearn.gedlib import librariesImport, gedlibpy
  16. def compute_ged(g1, g2, options):
  17. ged_env = gedlibpy.GEDEnv()
  18. ged_env.set_edit_cost(options['edit_cost'], edit_cost_constant=options['edit_cost_constants'])
  19. ged_env.add_nx_graph(g1, '')
  20. ged_env.add_nx_graph(g2, '')
  21. listID = ged_env.get_all_graph_ids()
  22. ged_env.init()
  23. ged_env.set_method(options['method'], ged_options_to_string(options))
  24. ged_env.init_method()
  25. g = listID[0]
  26. h = listID[1]
  27. ged_env.run_method(g, h)
  28. pi_forward = ged_env.get_forward_map(g, h)
  29. pi_backward = ged_env.get_backward_map(g, h)
  30. upper = ged_env.get_upper_bound(g, h)
  31. dis = upper
  32. # make the map label correct (label remove map as np.inf)
  33. nodes1 = [n for n in g1.nodes()]
  34. nodes2 = [n for n in g2.nodes()]
  35. nb1 = nx.number_of_nodes(g1)
  36. nb2 = nx.number_of_nodes(g2)
  37. pi_forward = [nodes2[pi] if pi < nb2 else np.inf for pi in pi_forward]
  38. pi_backward = [nodes1[pi] if pi < nb1 else np.inf for pi in pi_backward]
  39. # print(pi_forward)
  40. return dis, pi_forward, pi_backward
  41. def compute_geds(graphs, options={}, parallel=False):
  42. # initialize ged env.
  43. ged_env = gedlibpy.GEDEnv()
  44. ged_env.set_edit_cost(options['edit_cost'], edit_cost_constant=options['edit_cost_constants'])
  45. for g in graphs:
  46. ged_env.add_nx_graph(g, '')
  47. listID = ged_env.get_all_graph_ids()
  48. ged_env.init()
  49. if parallel:
  50. options['threads'] = 1
  51. ged_env.set_method(options['method'], ged_options_to_string(options))
  52. ged_env.init_method()
  53. # compute ged.
  54. neo_options = {'edit_cost': options['edit_cost'],
  55. 'node_labels': options['node_labels'], 'edge_labels': options['edge_labels'],
  56. 'node_attrs': options['node_attrs'], 'edge_attrs': options['edge_attrs']}
  57. ged_mat = np.zeros((len(graphs), len(graphs)))
  58. if parallel:
  59. len_itr = int(len(graphs) * (len(graphs) - 1) / 2)
  60. ged_vec = [0 for i in range(len_itr)]
  61. n_edit_operations = [0 for i in range(len_itr)]
  62. itr = combinations(range(0, len(graphs)), 2)
  63. n_jobs = multiprocessing.cpu_count()
  64. if len_itr < 100 * n_jobs:
  65. chunksize = int(len_itr / n_jobs) + 1
  66. else:
  67. chunksize = 100
  68. def init_worker(graphs_toshare, ged_env_toshare, listID_toshare):
  69. global G_graphs, G_ged_env, G_listID
  70. G_graphs = graphs_toshare
  71. G_ged_env = ged_env_toshare
  72. G_listID = listID_toshare
  73. do_partial = partial(_wrapper_compute_ged_parallel, neo_options)
  74. pool = Pool(processes=n_jobs, initializer=init_worker, initargs=(graphs, ged_env, listID))
  75. iterator = tqdm(pool.imap_unordered(do_partial, itr, chunksize),
  76. desc='computing GEDs', file=sys.stdout)
  77. # iterator = pool.imap_unordered(do_partial, itr, chunksize)
  78. for i, j, dis, n_eo_tmp in iterator:
  79. idx_itr = int(len(graphs) * i + j - (i + 1) * (i + 2) / 2)
  80. ged_vec[idx_itr] = dis
  81. ged_mat[i][j] = dis
  82. ged_mat[j][i] = dis
  83. n_edit_operations[idx_itr] = n_eo_tmp
  84. # print('\n-------------------------------------------')
  85. # print(i, j, idx_itr, dis)
  86. pool.close()
  87. pool.join()
  88. else:
  89. ged_vec = []
  90. n_edit_operations = []
  91. for i in tqdm(range(len(graphs)), desc='computing GEDs', file=sys.stdout):
  92. # for i in range(len(graphs)):
  93. for j in range(i + 1, len(graphs)):
  94. dis, pi_forward, pi_backward = _compute_ged(ged_env, listID[i], listID[j], graphs[i], graphs[j])
  95. ged_vec.append(dis)
  96. ged_mat[i][j] = dis
  97. ged_mat[j][i] = dis
  98. n_eo_tmp = get_nb_edit_operations(graphs[i], graphs[j], pi_forward, pi_backward, **neo_options)
  99. n_edit_operations.append(n_eo_tmp)
  100. return ged_vec, ged_mat, n_edit_operations
  101. def _wrapper_compute_ged_parallel(options, itr):
  102. i = itr[0]
  103. j = itr[1]
  104. dis, n_eo_tmp = _compute_ged_parallel(G_ged_env, G_listID[i], G_listID[j], G_graphs[i], G_graphs[j], options)
  105. return i, j, dis, n_eo_tmp
  106. def _compute_ged_parallel(env, gid1, gid2, g1, g2, options):
  107. dis, pi_forward, pi_backward = _compute_ged(env, gid1, gid2, g1, g2)
  108. n_eo_tmp = get_nb_edit_operations(g1, g2, pi_forward, pi_backward, **options) # [0,0,0,0,0,0]
  109. return dis, n_eo_tmp
  110. def _compute_ged(env, gid1, gid2, g1, g2):
  111. env.run_method(gid1, gid2)
  112. pi_forward = env.get_forward_map(gid1, gid2)
  113. pi_backward = env.get_backward_map(gid1, gid2)
  114. upper = env.get_upper_bound(gid1, gid2)
  115. dis = upper
  116. # make the map label correct (label remove map as np.inf)
  117. nodes1 = [n for n in g1.nodes()]
  118. nodes2 = [n for n in g2.nodes()]
  119. nb1 = nx.number_of_nodes(g1)
  120. nb2 = nx.number_of_nodes(g2)
  121. pi_forward = [nodes2[pi] if pi < nb2 else np.inf for pi in pi_forward]
  122. pi_backward = [nodes1[pi] if pi < nb1 else np.inf for pi in pi_backward]
  123. return dis, pi_forward, pi_backward
  124. def get_nb_edit_operations(g1, g2, forward_map, backward_map, edit_cost=None, **kwargs):
  125. if edit_cost == 'LETTER' or edit_cost == 'LETTER2':
  126. return get_nb_edit_operations_letter(g1, g2, forward_map, backward_map)
  127. elif edit_cost == 'NON_SYMBOLIC':
  128. node_attrs = kwargs.get('node_attrs', [])
  129. edge_attrs = kwargs.get('edge_attrs', [])
  130. return get_nb_edit_operations_nonsymbolic(g1, g2, forward_map, backward_map,
  131. node_attrs=node_attrs, edge_attrs=edge_attrs)
  132. elif edit_cost == 'CONSTANT':
  133. node_labels = kwargs.get('node_labels', [])
  134. edge_labels = kwargs.get('edge_labels', [])
  135. return get_nb_edit_operations_symbolic(g1, g2, forward_map, backward_map,
  136. node_labels=node_labels, edge_labels=edge_labels)
  137. else:
  138. return get_nb_edit_operations_symbolic(g1, g2, forward_map, backward_map)
  139. def get_nb_edit_operations_symbolic(g1, g2, forward_map, backward_map,
  140. node_labels=[], edge_labels=[]):
  141. """Compute the number of each edit operations for symbolic-labeled graphs.
  142. """
  143. n_vi = 0
  144. n_vr = 0
  145. n_vs = 0
  146. n_ei = 0
  147. n_er = 0
  148. n_es = 0
  149. nodes1 = [n for n in g1.nodes()]
  150. for i, map_i in enumerate(forward_map):
  151. if map_i == np.inf:
  152. n_vr += 1
  153. else:
  154. for nl in node_labels:
  155. label1 = g1.nodes[nodes1[i]][nl]
  156. label2 = g2.nodes[map_i][nl]
  157. if label1 != label2:
  158. n_vs += 1
  159. break
  160. for map_i in backward_map:
  161. if map_i == np.inf:
  162. n_vi += 1
  163. # idx_nodes1 = range(0, len(node1))
  164. edges1 = [e for e in g1.edges()]
  165. nb_edges2_cnted = 0
  166. for n1, n2 in edges1:
  167. idx1 = nodes1.index(n1)
  168. idx2 = nodes1.index(n2)
  169. # one of the nodes is removed, thus the edge is removed.
  170. if forward_map[idx1] == np.inf or forward_map[idx2] == np.inf:
  171. n_er += 1
  172. # corresponding edge is in g2.
  173. elif (forward_map[idx1], forward_map[idx2]) in g2.edges():
  174. nb_edges2_cnted += 1
  175. # edge labels are different.
  176. for el in edge_labels:
  177. label1 = g2.edges[((forward_map[idx1], forward_map[idx2]))][el]
  178. label2 = g1.edges[(n1, n2)][el]
  179. if label1 != label2:
  180. n_es += 1
  181. break
  182. elif (forward_map[idx2], forward_map[idx1]) in g2.edges():
  183. nb_edges2_cnted += 1
  184. # edge labels are different.
  185. for el in edge_labels:
  186. label1 = g2.edges[((forward_map[idx2], forward_map[idx1]))][el]
  187. label2 = g1.edges[(n1, n2)][el]
  188. if label1 != label2:
  189. n_es += 1
  190. break
  191. # corresponding nodes are in g2, however the edge is removed.
  192. else:
  193. n_er += 1
  194. n_ei = nx.number_of_edges(g2) - nb_edges2_cnted
  195. return n_vi, n_vr, n_vs, n_ei, n_er, n_es
  196. def get_nb_edit_operations_letter(g1, g2, forward_map, backward_map):
  197. """Compute the number of each edit operations.
  198. """
  199. n_vi = 0
  200. n_vr = 0
  201. n_vs = 0
  202. sod_vs = 0
  203. n_ei = 0
  204. n_er = 0
  205. nodes1 = [n for n in g1.nodes()]
  206. for i, map_i in enumerate(forward_map):
  207. if map_i == np.inf:
  208. n_vr += 1
  209. else:
  210. n_vs += 1
  211. diff_x = float(g1.nodes[nodes1[i]]['x']) - float(g2.nodes[map_i]['x'])
  212. diff_y = float(g1.nodes[nodes1[i]]['y']) - float(g2.nodes[map_i]['y'])
  213. sod_vs += np.sqrt(np.square(diff_x) + np.square(diff_y))
  214. for map_i in backward_map:
  215. if map_i == np.inf:
  216. n_vi += 1
  217. # idx_nodes1 = range(0, len(node1))
  218. edges1 = [e for e in g1.edges()]
  219. nb_edges2_cnted = 0
  220. for n1, n2 in edges1:
  221. idx1 = nodes1.index(n1)
  222. idx2 = nodes1.index(n2)
  223. # one of the nodes is removed, thus the edge is removed.
  224. if forward_map[idx1] == np.inf or forward_map[idx2] == np.inf:
  225. n_er += 1
  226. # corresponding edge is in g2. Edge label is not considered.
  227. elif (forward_map[idx1], forward_map[idx2]) in g2.edges() or \
  228. (forward_map[idx2], forward_map[idx1]) in g2.edges():
  229. nb_edges2_cnted += 1
  230. # corresponding nodes are in g2, however the edge is removed.
  231. else:
  232. n_er += 1
  233. n_ei = nx.number_of_edges(g2) - nb_edges2_cnted
  234. return n_vi, n_vr, n_vs, sod_vs, n_ei, n_er
  235. def get_nb_edit_operations_nonsymbolic(g1, g2, forward_map, backward_map,
  236. node_attrs=[], edge_attrs=[]):
  237. """Compute the number of each edit operations.
  238. """
  239. n_vi = 0
  240. n_vr = 0
  241. n_vs = 0
  242. sod_vs = 0
  243. n_ei = 0
  244. n_er = 0
  245. n_es = 0
  246. sod_es = 0
  247. nodes1 = [n for n in g1.nodes()]
  248. for i, map_i in enumerate(forward_map):
  249. if map_i == np.inf:
  250. n_vr += 1
  251. else:
  252. n_vs += 1
  253. sum_squares = 0
  254. for a_name in node_attrs:
  255. diff = float(g1.nodes[nodes1[i]][a_name]) - float(g2.nodes[map_i][a_name])
  256. sum_squares += np.square(diff)
  257. sod_vs += np.sqrt(sum_squares)
  258. for map_i in backward_map:
  259. if map_i == np.inf:
  260. n_vi += 1
  261. # idx_nodes1 = range(0, len(node1))
  262. edges1 = [e for e in g1.edges()]
  263. for n1, n2 in edges1:
  264. idx1 = nodes1.index(n1)
  265. idx2 = nodes1.index(n2)
  266. n1_g2 = forward_map[idx1]
  267. n2_g2 = forward_map[idx2]
  268. # one of the nodes is removed, thus the edge is removed.
  269. if n1_g2 == np.inf or n2_g2 == np.inf:
  270. n_er += 1
  271. # corresponding edge is in g2.
  272. elif (n1_g2, n2_g2) in g2.edges():
  273. n_es += 1
  274. sum_squares = 0
  275. for a_name in edge_attrs:
  276. diff = float(g1.edges[n1, n2][a_name]) - float(g2.edges[n1_g2, n2_g2][a_name])
  277. sum_squares += np.square(diff)
  278. sod_es += np.sqrt(sum_squares)
  279. elif (n2_g2, n1_g2) in g2.edges():
  280. n_es += 1
  281. sum_squares = 0
  282. for a_name in edge_attrs:
  283. diff = float(g1.edges[n2, n1][a_name]) - float(g2.edges[n2_g2, n1_g2][a_name])
  284. sum_squares += np.square(diff)
  285. sod_es += np.sqrt(sum_squares)
  286. # corresponding nodes are in g2, however the edge is removed.
  287. else:
  288. n_er += 1
  289. n_ei = nx.number_of_edges(g2) - n_es
  290. return n_vi, n_vr, sod_vs, n_ei, n_er, sod_es
  291. def ged_options_to_string(options):
  292. opt_str = ' '
  293. for key, val in options.items():
  294. if key == 'initialization_method':
  295. opt_str += '--initialization-method ' + str(val) + ' '
  296. elif key == 'initialization_options':
  297. opt_str += '--initialization-options ' + str(val) + ' '
  298. elif key == 'lower_bound_method':
  299. opt_str += '--lower-bound-method ' + str(val) + ' '
  300. elif key == 'random_substitution_ratio':
  301. opt_str += '--random-substitution-ratio ' + str(val) + ' '
  302. elif key == 'initial_solutions':
  303. opt_str += '--initial-solutions ' + str(val) + ' '
  304. elif key == 'ratio_runs_from_initial_solutions':
  305. opt_str += '--ratio-runs-from-initial-solutions ' + str(val) + ' '
  306. elif key == 'threads':
  307. opt_str += '--threads ' + str(val) + ' '
  308. elif key == 'num_randpost_loops':
  309. opt_str += '--num-randpost-loops ' + str(val) + ' '
  310. elif key == 'max_randpost_retrials':
  311. opt_str += '--maxrandpost-retrials ' + str(val) + ' '
  312. elif key == 'randpost_penalty':
  313. opt_str += '--randpost-penalty ' + str(val) + ' '
  314. elif key == 'randpost_decay':
  315. opt_str += '--randpost-decay ' + str(val) + ' '
  316. elif key == 'log':
  317. opt_str += '--log ' + str(val) + ' '
  318. elif key == 'randomness':
  319. opt_str += '--randomness ' + str(val) + ' '
  320. # if not isinstance(val, list):
  321. # opt_str += '--' + key.replace('_', '-') + ' '
  322. # if val == False:
  323. # val_str = 'FALSE'
  324. # else:
  325. # val_str = str(val)
  326. # opt_str += val_str + ' '
  327. return opt_str

A Python package for graph kernels, graph edit distances and graph pre-image problem.