You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

util.py 23 kB

5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652
  1. #!/usr/bin/env python3
  2. # -*- coding: utf-8 -*-
  3. """
  4. Created on Tue Mar 31 17:06:22 2020
  5. @author: ljia
  6. """
  7. import numpy as np
  8. from itertools import combinations
  9. import multiprocessing
  10. from multiprocessing import Pool
  11. from functools import partial
  12. import sys
  13. # from tqdm import tqdm
  14. import networkx as nx
  15. from gklearn.ged.env import GEDEnv
  16. from gklearn.utils import get_iters
  17. def compute_ged(g1, g2, options):
  18. from gklearn.gedlib import librariesImport, gedlibpy
  19. ged_env = gedlibpy.GEDEnv()
  20. ged_env.set_edit_cost(options['edit_cost'], edit_cost_constant=options['edit_cost_constants'])
  21. ged_env.add_nx_graph(g1, '')
  22. ged_env.add_nx_graph(g2, '')
  23. listID = ged_env.get_all_graph_ids()
  24. ged_env.init(init_type=options['init_option'])
  25. ged_env.set_method(options['method'], ged_options_to_string(options))
  26. ged_env.init_method()
  27. g = listID[0]
  28. h = listID[1]
  29. ged_env.run_method(g, h)
  30. pi_forward = ged_env.get_forward_map(g, h)
  31. pi_backward = ged_env.get_backward_map(g, h)
  32. upper = ged_env.get_upper_bound(g, h)
  33. dis = upper
  34. # make the map label correct (label remove map as np.inf)
  35. nodes1 = [n for n in g1.nodes()]
  36. nodes2 = [n for n in g2.nodes()]
  37. nb1 = nx.number_of_nodes(g1)
  38. nb2 = nx.number_of_nodes(g2)
  39. pi_forward = [nodes2[pi] if pi < nb2 else np.inf for pi in pi_forward]
  40. pi_backward = [nodes1[pi] if pi < nb1 else np.inf for pi in pi_backward]
  41. # print(pi_forward)
  42. return dis, pi_forward, pi_backward
  43. def pairwise_ged(g1, g2, options={}, sort=True, repeats=1, parallel=False, verbose=True):
  44. from gklearn.gedlib import librariesImport, gedlibpy
  45. ged_env = gedlibpy.GEDEnv()
  46. ged_env.set_edit_cost(options['edit_cost'], edit_cost_constant=options['edit_cost_constants'])
  47. ged_env.add_nx_graph(g1, '')
  48. ged_env.add_nx_graph(g2, '')
  49. listID = ged_env.get_all_graph_ids()
  50. ged_env.init(init_option=(options['init_option'] if 'init_option' in options else 'EAGER_WITHOUT_SHUFFLED_COPIES'))
  51. ged_env.set_method(options['method'], ged_options_to_string(options))
  52. ged_env.init_method()
  53. g = listID[0]
  54. h = listID[1]
  55. dis_min = np.inf
  56. for i in range(0, repeats):
  57. ged_env.run_method(g, h)
  58. upper = ged_env.get_upper_bound(g, h)
  59. dis = upper
  60. if dis < dis_min:
  61. dis_min = dis
  62. pi_forward = ged_env.get_forward_map(g, h)
  63. pi_backward = ged_env.get_backward_map(g, h)
  64. # lower = ged_env.get_lower_bound(g, h)
  65. # make the map label correct (label remove map as np.inf)
  66. nodes1 = [n for n in g1.nodes()]
  67. nodes2 = [n for n in g2.nodes()]
  68. nb1 = nx.number_of_nodes(g1)
  69. nb2 = nx.number_of_nodes(g2)
  70. pi_forward = [nodes2[pi] if pi < nb2 else np.inf for pi in pi_forward]
  71. pi_backward = [nodes1[pi] if pi < nb1 else np.inf for pi in pi_backward]
  72. # print(pi_forward)
  73. return dis, pi_forward, pi_backward
  74. def compute_geds_cml(graphs, options={}, sort=True, parallel=False, verbose=True):
  75. # initialize ged env.
  76. ged_env = GEDEnv()
  77. ged_env.set_edit_cost(options['edit_cost'], edit_cost_constants=options['edit_cost_constants'])
  78. for g in graphs:
  79. ged_env.add_nx_graph(g, '')
  80. listID = ged_env.get_all_graph_ids()
  81. node_labels = ged_env.get_all_node_labels()
  82. edge_labels = ged_env.get_all_edge_labels()
  83. node_label_costs = label_costs_to_matrix(options['node_label_costs'], len(node_labels)) if 'node_label_costs' in options else None
  84. edge_label_costs = label_costs_to_matrix(options['edge_label_costs'], len(edge_labels)) if 'edge_label_costs' in options else None
  85. ged_env.set_label_costs(node_label_costs, edge_label_costs)
  86. ged_env.init(init_type=options['init_option'])
  87. if parallel:
  88. options['threads'] = 1
  89. ged_env.set_method(options['method'], options)
  90. ged_env.init_method()
  91. # compute ged.
  92. # options used to compute numbers of edit operations.
  93. if node_label_costs is None and edge_label_costs is None:
  94. neo_options = {'edit_cost': options['edit_cost'],
  95. 'is_cml': False,
  96. 'node_labels': options['node_labels'], 'edge_labels': options['edge_labels'],
  97. 'node_attrs': options['node_attrs'], 'edge_attrs': options['edge_attrs']}
  98. else:
  99. neo_options = {'edit_cost': options['edit_cost'],
  100. 'is_cml': True,
  101. 'node_labels': node_labels,
  102. 'edge_labels': edge_labels}
  103. ged_mat = np.zeros((len(graphs), len(graphs)))
  104. if parallel:
  105. len_itr = int(len(graphs) * (len(graphs) - 1) / 2)
  106. ged_vec = [0 for i in range(len_itr)]
  107. n_edit_operations = [0 for i in range(len_itr)]
  108. itr = combinations(range(0, len(graphs)), 2)
  109. n_jobs = multiprocessing.cpu_count()
  110. if len_itr < 100 * n_jobs:
  111. chunksize = int(len_itr / n_jobs) + 1
  112. else:
  113. chunksize = 100
  114. def init_worker(graphs_toshare, ged_env_toshare, listID_toshare):
  115. global G_graphs, G_ged_env, G_listID
  116. G_graphs = graphs_toshare
  117. G_ged_env = ged_env_toshare
  118. G_listID = listID_toshare
  119. do_partial = partial(_wrapper_compute_ged_parallel, neo_options, sort)
  120. pool = Pool(processes=n_jobs, initializer=init_worker, initargs=(graphs, ged_env, listID))
  121. iterator = get_iters(pool.imap_unordered(do_partial, itr, chunksize), desc='computing GEDs', file=sys.stdout, length=len(graphs), verbose=verbose)
  122. # iterator = pool.imap_unordered(do_partial, itr, chunksize)
  123. for i, j, dis, n_eo_tmp in iterator:
  124. idx_itr = int(len(graphs) * i + j - (i + 1) * (i + 2) / 2)
  125. ged_vec[idx_itr] = dis
  126. ged_mat[i][j] = dis
  127. ged_mat[j][i] = dis
  128. n_edit_operations[idx_itr] = n_eo_tmp
  129. # print('\n-------------------------------------------')
  130. # print(i, j, idx_itr, dis)
  131. pool.close()
  132. pool.join()
  133. else:
  134. ged_vec = []
  135. n_edit_operations = []
  136. iterator = get_iters(range(len(graphs)), desc='computing GEDs', file=sys.stdout, length=len(graphs), verbose=verbose)
  137. for i in iterator:
  138. # for i in range(len(graphs)):
  139. for j in range(i + 1, len(graphs)):
  140. if nx.number_of_nodes(graphs[i]) <= nx.number_of_nodes(graphs[j]) or not sort:
  141. dis, pi_forward, pi_backward = _compute_ged(ged_env, listID[i], listID[j], graphs[i], graphs[j])
  142. else:
  143. dis, pi_backward, pi_forward = _compute_ged(ged_env, listID[j], listID[i], graphs[j], graphs[i])
  144. ged_vec.append(dis)
  145. ged_mat[i][j] = dis
  146. ged_mat[j][i] = dis
  147. n_eo_tmp = get_nb_edit_operations(graphs[i], graphs[j], pi_forward, pi_backward, **neo_options)
  148. n_edit_operations.append(n_eo_tmp)
  149. return ged_vec, ged_mat, n_edit_operations
  150. def compute_geds(graphs, options={}, sort=True, repeats=1, parallel=False, n_jobs=None, verbose=True):
  151. from gklearn.gedlib import librariesImport, gedlibpy
  152. # initialize ged env.
  153. ged_env = gedlibpy.GEDEnv()
  154. ged_env.set_edit_cost(options['edit_cost'], edit_cost_constant=options['edit_cost_constants'])
  155. for g in graphs:
  156. ged_env.add_nx_graph(g, '')
  157. listID = ged_env.get_all_graph_ids()
  158. ged_env.init()
  159. if parallel:
  160. options['threads'] = 1
  161. ged_env.set_method(options['method'], ged_options_to_string(options))
  162. ged_env.init_method()
  163. # compute ged.
  164. neo_options = {'edit_cost': options['edit_cost'],
  165. 'node_labels': options['node_labels'], 'edge_labels': options['edge_labels'],
  166. 'node_attrs': options['node_attrs'], 'edge_attrs': options['edge_attrs']}
  167. ged_mat = np.zeros((len(graphs), len(graphs)))
  168. if parallel:
  169. len_itr = int(len(graphs) * (len(graphs) - 1) / 2)
  170. ged_vec = [0 for i in range(len_itr)]
  171. n_edit_operations = [0 for i in range(len_itr)]
  172. itr = combinations(range(0, len(graphs)), 2)
  173. if n_jobs is None:
  174. n_jobs = multiprocessing.cpu_count()
  175. if len_itr < 100 * n_jobs:
  176. chunksize = int(len_itr / n_jobs) + 1
  177. else:
  178. chunksize = 100
  179. def init_worker(graphs_toshare, ged_env_toshare, listID_toshare):
  180. global G_graphs, G_ged_env, G_listID
  181. G_graphs = graphs_toshare
  182. G_ged_env = ged_env_toshare
  183. G_listID = listID_toshare
  184. do_partial = partial(_wrapper_compute_ged_parallel, neo_options, sort, repeats)
  185. pool = Pool(processes=n_jobs, initializer=init_worker, initargs=(graphs, ged_env, listID))
  186. iterator = get_iters(pool.imap_unordered(do_partial, itr, chunksize), desc='computing GEDs', file=sys.stdout, length=len(graphs), verbose=verbose)
  187. # iterator = pool.imap_unordered(do_partial, itr, chunksize)
  188. for i, j, dis, n_eo_tmp in iterator:
  189. idx_itr = int(len(graphs) * i + j - (i + 1) * (i + 2) / 2)
  190. ged_vec[idx_itr] = dis
  191. ged_mat[i][j] = dis
  192. ged_mat[j][i] = dis
  193. n_edit_operations[idx_itr] = n_eo_tmp
  194. # print('\n-------------------------------------------')
  195. # print(i, j, idx_itr, dis)
  196. pool.close()
  197. pool.join()
  198. else:
  199. ged_vec = []
  200. n_edit_operations = []
  201. iterator = get_iters(range(len(graphs)), desc='computing GEDs', file=sys.stdout, length=len(graphs), verbose=verbose)
  202. for i in iterator:
  203. # for i in range(len(graphs)):
  204. for j in range(i + 1, len(graphs)):
  205. if nx.number_of_nodes(graphs[i]) <= nx.number_of_nodes(graphs[j]) or not sort:
  206. dis, pi_forward, pi_backward = _compute_ged(ged_env, listID[i], listID[j], graphs[i], graphs[j], repeats)
  207. else:
  208. dis, pi_backward, pi_forward = _compute_ged(ged_env, listID[j], listID[i], graphs[j], graphs[i], repeats)
  209. ged_vec.append(dis)
  210. ged_mat[i][j] = dis
  211. ged_mat[j][i] = dis
  212. n_eo_tmp = get_nb_edit_operations(graphs[i], graphs[j], pi_forward, pi_backward, **neo_options)
  213. n_edit_operations.append(n_eo_tmp)
  214. return ged_vec, ged_mat, n_edit_operations
  215. def _wrapper_compute_ged_parallel(options, sort, repeats, itr):
  216. i = itr[0]
  217. j = itr[1]
  218. dis, n_eo_tmp = _compute_ged_parallel(G_ged_env, G_listID[i], G_listID[j], G_graphs[i], G_graphs[j], options, sort, repeats)
  219. return i, j, dis, n_eo_tmp
  220. def _compute_ged_parallel(env, gid1, gid2, g1, g2, options, sort, repeats):
  221. if nx.number_of_nodes(g1) <= nx.number_of_nodes(g2) or not sort:
  222. dis, pi_forward, pi_backward = _compute_ged(env, gid1, gid2, g1, g2, repeats)
  223. else:
  224. dis, pi_backward, pi_forward = _compute_ged(env, gid2, gid1, g2, g1, repeats)
  225. n_eo_tmp = get_nb_edit_operations(g1, g2, pi_forward, pi_backward, **options) # [0,0,0,0,0,0]
  226. return dis, n_eo_tmp
  227. def _compute_ged(env, gid1, gid2, g1, g2, repeats):
  228. dis_min = np.inf # @todo: maybe compare distance and then do others (faster).
  229. for i in range(0, repeats):
  230. env.run_method(gid1, gid2)
  231. pi_forward = env.get_forward_map(gid1, gid2)
  232. pi_backward = env.get_backward_map(gid1, gid2)
  233. upper = env.get_upper_bound(gid1, gid2)
  234. dis = upper
  235. # make the map label correct (label remove map as np.inf)
  236. nodes1 = [n for n in g1.nodes()]
  237. nodes2 = [n for n in g2.nodes()]
  238. nb1 = nx.number_of_nodes(g1)
  239. nb2 = nx.number_of_nodes(g2)
  240. pi_forward = [nodes2[pi] if pi < nb2 else np.inf for pi in pi_forward]
  241. pi_backward = [nodes1[pi] if pi < nb1 else np.inf for pi in pi_backward]
  242. if dis < dis_min:
  243. dis_min = dis
  244. pi_forward_min = pi_forward
  245. pi_backward_min = pi_backward
  246. return dis_min, pi_forward_min, pi_backward_min
  247. def label_costs_to_matrix(costs, nb_labels):
  248. """Reform a label cost vector to a matrix.
  249. Parameters
  250. ----------
  251. costs : numpy.array
  252. The vector containing costs between labels, in the order of node insertion costs, node deletion costs, node substitition costs, edge insertion costs, edge deletion costs, edge substitition costs.
  253. nb_labels : integer
  254. Number of labels.
  255. Returns
  256. -------
  257. cost_matrix : numpy.array.
  258. The reformed label cost matrix of size (nb_labels, nb_labels). Each row/column of cost_matrix corresponds to a label, and the first label is the dummy label. This is the same setting as in GEDData.
  259. """
  260. # Initialize label cost matrix.
  261. cost_matrix = np.zeros((nb_labels + 1, nb_labels + 1))
  262. i = 0
  263. # Costs of insertions.
  264. for col in range(1, nb_labels + 1):
  265. cost_matrix[0, col] = costs[i]
  266. i += 1
  267. # Costs of deletions.
  268. for row in range(1, nb_labels + 1):
  269. cost_matrix[row, 0] = costs[i]
  270. i += 1
  271. # Costs of substitutions.
  272. for row in range(1, nb_labels + 1):
  273. for col in range(row + 1, nb_labels + 1):
  274. cost_matrix[row, col] = costs[i]
  275. cost_matrix[col, row] = costs[i]
  276. i += 1
  277. return cost_matrix
  278. def get_nb_edit_operations(g1, g2, forward_map, backward_map, edit_cost=None, is_cml=False, **kwargs):
  279. if is_cml:
  280. if edit_cost == 'CONSTANT':
  281. node_labels = kwargs.get('node_labels', [])
  282. edge_labels = kwargs.get('edge_labels', [])
  283. return get_nb_edit_operations_symbolic_cml(g1, g2, forward_map, backward_map,
  284. node_labels=node_labels, edge_labels=edge_labels)
  285. else:
  286. raise Exception('Edit cost "', edit_cost, '" is not supported.')
  287. else:
  288. if edit_cost == 'LETTER' or edit_cost == 'LETTER2':
  289. return get_nb_edit_operations_letter(g1, g2, forward_map, backward_map)
  290. elif edit_cost == 'NON_SYMBOLIC':
  291. node_attrs = kwargs.get('node_attrs', [])
  292. edge_attrs = kwargs.get('edge_attrs', [])
  293. return get_nb_edit_operations_nonsymbolic(g1, g2, forward_map, backward_map,
  294. node_attrs=node_attrs, edge_attrs=edge_attrs)
  295. elif edit_cost == 'CONSTANT':
  296. node_labels = kwargs.get('node_labels', [])
  297. edge_labels = kwargs.get('edge_labels', [])
  298. return get_nb_edit_operations_symbolic(g1, g2, forward_map, backward_map,
  299. node_labels=node_labels, edge_labels=edge_labels)
  300. else:
  301. return get_nb_edit_operations_symbolic(g1, g2, forward_map, backward_map)
  302. def get_nb_edit_operations_symbolic_cml(g1, g2, forward_map, backward_map,
  303. node_labels=[], edge_labels=[]):
  304. """Compute times that edit operations are used in an edit path for symbolic-labeled graphs, where the costs are different for each pair of nodes.
  305. Returns
  306. -------
  307. list
  308. A vector of numbers of times that costs bewteen labels are used in an edit path, formed in the order of node insertion costs, node deletion costs, node substitition costs, edge insertion costs, edge deletion costs, edge substitition costs. The dummy label is the first label, and the self label costs are not included.
  309. """
  310. # Initialize.
  311. nb_ops_node = np.zeros((1 + len(node_labels), 1 + len(node_labels)))
  312. nb_ops_edge = np.zeros((1 + len(edge_labels), 1 + len(edge_labels)))
  313. # For nodes.
  314. nodes1 = [n for n in g1.nodes()]
  315. for i, map_i in enumerate(forward_map):
  316. label1 = tuple(g1.nodes[nodes1[i]].items()) # @todo: order and faster
  317. idx_label1 = node_labels.index(label1) # @todo: faster
  318. if map_i == np.inf: # deletions.
  319. nb_ops_node[idx_label1 + 1, 0] += 1
  320. else: # substitutions.
  321. label2 = tuple(g2.nodes[map_i].items())
  322. if label1 != label2:
  323. idx_label2 = node_labels.index(label2) # @todo: faster
  324. nb_ops_node[idx_label1 + 1, idx_label2 + 1] += 1
  325. # insertions.
  326. nodes2 = [n for n in g2.nodes()]
  327. for i, map_i in enumerate(backward_map):
  328. if map_i == np.inf:
  329. label = tuple(g2.nodes[nodes2[i]].items())
  330. idx_label = node_labels.index(label) # @todo: faster
  331. nb_ops_node[0, idx_label + 1] += 1
  332. # For edges.
  333. edges1 = [e for e in g1.edges()]
  334. edges2_marked = []
  335. for nf1, nt1 in edges1:
  336. label1 = tuple(g1.edges[(nf1, nt1)].items())
  337. idx_label1 = edge_labels.index(label1) # @todo: faster
  338. idxf1 = nodes1.index(nf1) # @todo: faster
  339. idxt1 = nodes1.index(nt1) # @todo: faster
  340. # At least one of the nodes is removed, thus the edge is removed.
  341. if forward_map[idxf1] == np.inf or forward_map[idxt1] == np.inf:
  342. nb_ops_edge[idx_label1 + 1, 0] += 1
  343. # corresponding edge is in g2.
  344. else:
  345. nf2, nt2 = forward_map[idxf1], forward_map[idxt1]
  346. if (nf2, nt2) in g2.edges():
  347. edges2_marked.append((nf2, nt2))
  348. # If edge labels are different.
  349. label2 = tuple(g2.edges[(nf2, nt2)].items())
  350. if label1 != label2:
  351. idx_label2 = edge_labels.index(label2) # @todo: faster
  352. nb_ops_edge[idx_label1 + 1, idx_label2 + 1] += 1
  353. # Switch nf2 and nt2, for directed graphs.
  354. elif (nt2, nf2) in g2.edges():
  355. edges2_marked.append((nt2, nf2))
  356. # If edge labels are different.
  357. label2 = tuple(g2.edges[(nt2, nf2)].items())
  358. if label1 != label2:
  359. idx_label2 = edge_labels.index(label2) # @todo: faster
  360. nb_ops_edge[idx_label1 + 1, idx_label2 + 1] += 1
  361. # Corresponding nodes are in g2, however the edge is removed.
  362. else:
  363. nb_ops_edge[idx_label1 + 1, 0] += 1
  364. # insertions.
  365. for nt, nf in g2.edges():
  366. if (nt, nf) not in edges2_marked and (nf, nt) not in edges2_marked: # @todo: for directed.
  367. label = tuple(g2.edges[(nt, nf)].items())
  368. idx_label = edge_labels.index(label) # @todo: faster
  369. nb_ops_edge[0, idx_label + 1] += 1
  370. # Reform the numbers of edit oeprations into a vector.
  371. nb_eo_vector = []
  372. # node insertion.
  373. for i in range(1, len(nb_ops_node)):
  374. nb_eo_vector.append(nb_ops_node[0, i])
  375. # node deletion.
  376. for i in range(1, len(nb_ops_node)):
  377. nb_eo_vector.append(nb_ops_node[i, 0])
  378. # node substitution.
  379. for i in range(1, len(nb_ops_node)):
  380. for j in range(i + 1, len(nb_ops_node)):
  381. nb_eo_vector.append(nb_ops_node[i, j])
  382. # edge insertion.
  383. for i in range(1, len(nb_ops_edge)):
  384. nb_eo_vector.append(nb_ops_edge[0, i])
  385. # edge deletion.
  386. for i in range(1, len(nb_ops_edge)):
  387. nb_eo_vector.append(nb_ops_edge[i, 0])
  388. # edge substitution.
  389. for i in range(1, len(nb_ops_edge)):
  390. for j in range(i + 1, len(nb_ops_edge)):
  391. nb_eo_vector.append(nb_ops_edge[i, j])
  392. return nb_eo_vector
  393. def get_nb_edit_operations_symbolic(g1, g2, forward_map, backward_map,
  394. node_labels=[], edge_labels=[]):
  395. """Compute the number of each edit operations for symbolic-labeled graphs.
  396. """
  397. n_vi = 0
  398. n_vr = 0
  399. n_vs = 0
  400. n_ei = 0
  401. n_er = 0
  402. n_es = 0
  403. nodes1 = [n for n in g1.nodes()]
  404. for i, map_i in enumerate(forward_map):
  405. if map_i == np.inf:
  406. n_vr += 1
  407. else:
  408. for nl in node_labels:
  409. label1 = g1.nodes[nodes1[i]][nl]
  410. label2 = g2.nodes[map_i][nl]
  411. if label1 != label2:
  412. n_vs += 1
  413. break
  414. for map_i in backward_map:
  415. if map_i == np.inf:
  416. n_vi += 1
  417. # idx_nodes1 = range(0, len(node1))
  418. edges1 = [e for e in g1.edges()]
  419. nb_edges2_cnted = 0
  420. for n1, n2 in edges1:
  421. idx1 = nodes1.index(n1)
  422. idx2 = nodes1.index(n2)
  423. # one of the nodes is removed, thus the edge is removed.
  424. if forward_map[idx1] == np.inf or forward_map[idx2] == np.inf:
  425. n_er += 1
  426. # corresponding edge is in g2.
  427. elif (forward_map[idx1], forward_map[idx2]) in g2.edges():
  428. nb_edges2_cnted += 1
  429. # edge labels are different.
  430. for el in edge_labels:
  431. label1 = g2.edges[((forward_map[idx1], forward_map[idx2]))][el]
  432. label2 = g1.edges[(n1, n2)][el]
  433. if label1 != label2:
  434. n_es += 1
  435. break
  436. elif (forward_map[idx2], forward_map[idx1]) in g2.edges():
  437. nb_edges2_cnted += 1
  438. # edge labels are different.
  439. for el in edge_labels:
  440. label1 = g2.edges[((forward_map[idx2], forward_map[idx1]))][el]
  441. label2 = g1.edges[(n1, n2)][el]
  442. if label1 != label2:
  443. n_es += 1
  444. break
  445. # corresponding nodes are in g2, however the edge is removed.
  446. else:
  447. n_er += 1
  448. n_ei = nx.number_of_edges(g2) - nb_edges2_cnted
  449. return n_vi, n_vr, n_vs, n_ei, n_er, n_es
  450. def get_nb_edit_operations_letter(g1, g2, forward_map, backward_map):
  451. """Compute the number of each edit operations.
  452. """
  453. n_vi = 0
  454. n_vr = 0
  455. n_vs = 0
  456. sod_vs = 0
  457. n_ei = 0
  458. n_er = 0
  459. nodes1 = [n for n in g1.nodes()]
  460. for i, map_i in enumerate(forward_map):
  461. if map_i == np.inf:
  462. n_vr += 1
  463. else:
  464. n_vs += 1
  465. diff_x = float(g1.nodes[nodes1[i]]['x']) - float(g2.nodes[map_i]['x'])
  466. diff_y = float(g1.nodes[nodes1[i]]['y']) - float(g2.nodes[map_i]['y'])
  467. sod_vs += np.sqrt(np.square(diff_x) + np.square(diff_y))
  468. for map_i in backward_map:
  469. if map_i == np.inf:
  470. n_vi += 1
  471. # idx_nodes1 = range(0, len(node1))
  472. edges1 = [e for e in g1.edges()]
  473. nb_edges2_cnted = 0
  474. for n1, n2 in edges1:
  475. idx1 = nodes1.index(n1)
  476. idx2 = nodes1.index(n2)
  477. # one of the nodes is removed, thus the edge is removed.
  478. if forward_map[idx1] == np.inf or forward_map[idx2] == np.inf:
  479. n_er += 1
  480. # corresponding edge is in g2. Edge label is not considered.
  481. elif (forward_map[idx1], forward_map[idx2]) in g2.edges() or \
  482. (forward_map[idx2], forward_map[idx1]) in g2.edges():
  483. nb_edges2_cnted += 1
  484. # corresponding nodes are in g2, however the edge is removed.
  485. else:
  486. n_er += 1
  487. n_ei = nx.number_of_edges(g2) - nb_edges2_cnted
  488. return n_vi, n_vr, n_vs, sod_vs, n_ei, n_er
  489. def get_nb_edit_operations_nonsymbolic(g1, g2, forward_map, backward_map,
  490. node_attrs=[], edge_attrs=[]):
  491. """Compute the number of each edit operations.
  492. """
  493. n_vi = 0
  494. n_vr = 0
  495. n_vs = 0
  496. sod_vs = 0
  497. n_ei = 0
  498. n_er = 0
  499. n_es = 0
  500. sod_es = 0
  501. nodes1 = [n for n in g1.nodes()]
  502. for i, map_i in enumerate(forward_map):
  503. if map_i == np.inf:
  504. n_vr += 1
  505. else:
  506. n_vs += 1
  507. sum_squares = 0
  508. for a_name in node_attrs:
  509. diff = float(g1.nodes[nodes1[i]][a_name]) - float(g2.nodes[map_i][a_name])
  510. sum_squares += np.square(diff)
  511. sod_vs += np.sqrt(sum_squares)
  512. for map_i in backward_map:
  513. if map_i == np.inf:
  514. n_vi += 1
  515. # idx_nodes1 = range(0, len(node1))
  516. edges1 = [e for e in g1.edges()]
  517. for n1, n2 in edges1:
  518. idx1 = nodes1.index(n1)
  519. idx2 = nodes1.index(n2)
  520. n1_g2 = forward_map[idx1]
  521. n2_g2 = forward_map[idx2]
  522. # one of the nodes is removed, thus the edge is removed.
  523. if n1_g2 == np.inf or n2_g2 == np.inf:
  524. n_er += 1
  525. # corresponding edge is in g2.
  526. elif (n1_g2, n2_g2) in g2.edges():
  527. n_es += 1
  528. sum_squares = 0
  529. for a_name in edge_attrs:
  530. diff = float(g1.edges[n1, n2][a_name]) - float(g2.edges[n1_g2, n2_g2][a_name])
  531. sum_squares += np.square(diff)
  532. sod_es += np.sqrt(sum_squares)
  533. elif (n2_g2, n1_g2) in g2.edges():
  534. n_es += 1
  535. sum_squares = 0
  536. for a_name in edge_attrs:
  537. diff = float(g1.edges[n2, n1][a_name]) - float(g2.edges[n2_g2, n1_g2][a_name])
  538. sum_squares += np.square(diff)
  539. sod_es += np.sqrt(sum_squares)
  540. # corresponding nodes are in g2, however the edge is removed.
  541. else:
  542. n_er += 1
  543. n_ei = nx.number_of_edges(g2) - n_es
  544. return n_vi, n_vr, sod_vs, n_ei, n_er, sod_es
  545. def ged_options_to_string(options):
  546. opt_str = ' '
  547. for key, val in options.items():
  548. if key == 'initialization_method':
  549. opt_str += '--initialization-method ' + str(val) + ' '
  550. elif key == 'initialization_options':
  551. opt_str += '--initialization-options ' + str(val) + ' '
  552. elif key == 'lower_bound_method':
  553. opt_str += '--lower-bound-method ' + str(val) + ' '
  554. elif key == 'random_substitution_ratio':
  555. opt_str += '--random-substitution-ratio ' + str(val) + ' '
  556. elif key == 'initial_solutions':
  557. opt_str += '--initial-solutions ' + str(val) + ' '
  558. elif key == 'ratio_runs_from_initial_solutions':
  559. opt_str += '--ratio-runs-from-initial-solutions ' + str(val) + ' '
  560. elif key == 'threads':
  561. opt_str += '--threads ' + str(val) + ' '
  562. elif key == 'num_randpost_loops':
  563. opt_str += '--num-randpost-loops ' + str(val) + ' '
  564. elif key == 'max_randpost_retrials':
  565. opt_str += '--maxrandpost-retrials ' + str(val) + ' '
  566. elif key == 'randpost_penalty':
  567. opt_str += '--randpost-penalty ' + str(val) + ' '
  568. elif key == 'randpost_decay':
  569. opt_str += '--randpost-decay ' + str(val) + ' '
  570. elif key == 'log':
  571. opt_str += '--log ' + str(val) + ' '
  572. elif key == 'randomness':
  573. opt_str += '--randomness ' + str(val) + ' '
  574. # if not isinstance(val, list):
  575. # opt_str += '--' + key.replace('_', '-') + ' '
  576. # if val == False:
  577. # val_str = 'FALSE'
  578. # else:
  579. # val_str = str(val)
  580. # opt_str += val_str + ' '
  581. return opt_str

A Python package for graph kernels, graph edit distances and graph pre-image problem.