You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

ged.py 7.2 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197
  1. #!/usr/bin/env python3
  2. # -*- coding: utf-8 -*-
  3. """
  4. Created on Thu Oct 17 18:44:59 2019
  5. @author: ljia
  6. """
  7. import numpy as np
  8. import networkx as nx
  9. from tqdm import tqdm
  10. import sys
  11. from gedlibpy import librariesImport, gedlibpy
  12. def GED(g1, g2, lib='gedlibpy', cost='CHEM_1', method='IPFP',
  13. edit_cost_constant=[], saveGXL='benoit', stabilizer='min', repeat=50):
  14. """
  15. Compute GED for 2 graphs.
  16. """
  17. if lib == 'gedlibpy':
  18. def convertGraph(G):
  19. """Convert a graph to the proper NetworkX format that can be
  20. recognized by library gedlibpy.
  21. """
  22. G_new = nx.Graph()
  23. for nd, attrs in G.nodes(data=True):
  24. G_new.add_node(str(nd), chem=attrs['atom'])
  25. for nd1, nd2, attrs in G.edges(data=True):
  26. # G_new.add_edge(str(nd1), str(nd2), valence=attrs['bond_type'])
  27. G_new.add_edge(str(nd1), str(nd2))
  28. return G_new
  29. gedlibpy.restart_env()
  30. gedlibpy.add_nx_graph(convertGraph(g1), "")
  31. gedlibpy.add_nx_graph(convertGraph(g2), "")
  32. listID = gedlibpy.get_all_graph_ids()
  33. gedlibpy.set_edit_cost(cost, edit_cost_constant=edit_cost_constant)
  34. gedlibpy.init()
  35. gedlibpy.set_method(method, "")
  36. gedlibpy.init_method()
  37. g = listID[0]
  38. h = listID[1]
  39. if stabilizer == None:
  40. gedlibpy.run_method(g, h)
  41. pi_forward = gedlibpy.get_forward_map(g, h)
  42. pi_backward = gedlibpy.get_backward_map(g, h)
  43. upper = gedlibpy.get_upper_bound(g, h)
  44. lower = gedlibpy.get_lower_bound(g, h)
  45. elif stabilizer == 'min':
  46. upper = np.inf
  47. for itr in range(repeat):
  48. gedlibpy.run_method(g, h)
  49. upper_tmp = gedlibpy.get_upper_bound(g, h)
  50. if upper_tmp < upper:
  51. upper = upper_tmp
  52. pi_forward = gedlibpy.get_forward_map(g, h)
  53. pi_backward = gedlibpy.get_backward_map(g, h)
  54. lower = gedlibpy.get_lower_bound(g, h)
  55. if upper == 0:
  56. break
  57. dis = upper
  58. # make the map label correct (label remove map as np.inf)
  59. nodes1 = [n for n in g1.nodes()]
  60. nodes2 = [n for n in g2.nodes()]
  61. nb1 = nx.number_of_nodes(g1)
  62. nb2 = nx.number_of_nodes(g2)
  63. pi_forward = [nodes2[pi] if pi < nb2 else np.inf for pi in pi_forward]
  64. pi_backward = [nodes1[pi] if pi < nb1 else np.inf for pi in pi_backward]
  65. return dis, pi_forward, pi_backward
  66. def GED_n(Gn, lib='gedlibpy', cost='CHEM_1', method='IPFP',
  67. edit_cost_constant=[], stabilizer='min', repeat=50):
  68. """
  69. Compute GEDs for a group of graphs.
  70. """
  71. if lib == 'gedlibpy':
  72. def convertGraph(G):
  73. """Convert a graph to the proper NetworkX format that can be
  74. recognized by library gedlibpy.
  75. """
  76. G_new = nx.Graph()
  77. for nd, attrs in G.nodes(data=True):
  78. G_new.add_node(str(nd), chem=attrs['atom'])
  79. for nd1, nd2, attrs in G.edges(data=True):
  80. # G_new.add_edge(str(nd1), str(nd2), valence=attrs['bond_type'])
  81. G_new.add_edge(str(nd1), str(nd2))
  82. return G_new
  83. gedlibpy.restart_env()
  84. gedlibpy.add_nx_graph(convertGraph(g1), "")
  85. gedlibpy.add_nx_graph(convertGraph(g2), "")
  86. listID = gedlibpy.get_all_graph_ids()
  87. gedlibpy.set_edit_cost(cost, edit_cost_constant=edit_cost_constant)
  88. gedlibpy.init()
  89. gedlibpy.set_method(method, "")
  90. gedlibpy.init_method()
  91. g = listID[0]
  92. h = listID[1]
  93. if stabilizer == None:
  94. gedlibpy.run_method(g, h)
  95. pi_forward = gedlibpy.get_forward_map(g, h)
  96. pi_backward = gedlibpy.get_backward_map(g, h)
  97. upper = gedlibpy.get_upper_bound(g, h)
  98. lower = gedlibpy.get_lower_bound(g, h)
  99. elif stabilizer == 'min':
  100. upper = np.inf
  101. for itr in range(repeat):
  102. gedlibpy.run_method(g, h)
  103. upper_tmp = gedlibpy.get_upper_bound(g, h)
  104. if upper_tmp < upper:
  105. upper = upper_tmp
  106. pi_forward = gedlibpy.get_forward_map(g, h)
  107. pi_backward = gedlibpy.get_backward_map(g, h)
  108. lower = gedlibpy.get_lower_bound(g, h)
  109. if upper == 0:
  110. break
  111. dis = upper
  112. # make the map label correct (label remove map as np.inf)
  113. nodes1 = [n for n in g1.nodes()]
  114. nodes2 = [n for n in g2.nodes()]
  115. nb1 = nx.number_of_nodes(g1)
  116. nb2 = nx.number_of_nodes(g2)
  117. pi_forward = [nodes2[pi] if pi < nb2 else np.inf for pi in pi_forward]
  118. pi_backward = [nodes1[pi] if pi < nb1 else np.inf for pi in pi_backward]
  119. return dis, pi_forward, pi_backward
  120. def ged_median(Gn, Gn_median, measure='ged', verbose=False,
  121. ged_cost='CHEM_1', ged_method='IPFP', saveGXL='benoit'):
  122. dis_list = []
  123. pi_forward_list = []
  124. for idx, G in tqdm(enumerate(Gn), desc='computing median distances',
  125. file=sys.stdout) if verbose else enumerate(Gn):
  126. dis_sum = 0
  127. pi_forward_list.append([])
  128. for G_p in Gn_median:
  129. dis_tmp, pi_tmp_forward, pi_tmp_backward = GED(G, G_p,
  130. cost=ged_cost, method=ged_method, saveGXL=saveGXL)
  131. pi_forward_list[idx].append(pi_tmp_forward)
  132. dis_sum += dis_tmp
  133. dis_list.append(dis_sum)
  134. return dis_list, pi_forward_list
  135. def get_nb_edit_operations(g1, g2, forward_map, backward_map):
  136. """Compute the number of each edit operations.
  137. """
  138. n_vi = 0
  139. n_vr = 0
  140. n_vs = 0
  141. n_ei = 0
  142. n_er = 0
  143. n_es = 0
  144. nodes1 = [n for n in g1.nodes()]
  145. for i, map_i in enumerate(forward_map):
  146. if map_i == np.inf:
  147. n_vr += 1
  148. elif g1.node[nodes1[i]]['atom'] != g2.node[map_i]['atom']:
  149. n_vs += 1
  150. for map_i in backward_map:
  151. if map_i == np.inf:
  152. n_vi += 1
  153. # idx_nodes1 = range(0, len(node1))
  154. edges1 = [e for e in g1.edges()]
  155. nb_edges2_cnted = 0
  156. for n1, n2 in edges1:
  157. idx1 = nodes1.index(n1)
  158. idx2 = nodes1.index(n2)
  159. # one of the nodes is removed, thus the edge is removed.
  160. if forward_map[idx1] == np.inf or forward_map[idx2] == np.inf:
  161. n_er += 1
  162. # corresponding edge is in g2. Edge label is not considered.
  163. elif (forward_map[idx1], forward_map[idx2]) in g2.edges() or \
  164. (forward_map[idx2], forward_map[idx1]) in g2.edges():
  165. nb_edges2_cnted += 1
  166. # corresponding nodes are in g2, however the edge is removed.
  167. else:
  168. n_er += 1
  169. n_ei = nx.number_of_edges(g2) - nb_edges2_cnted
  170. return n_vi, n_vr, n_vs, n_ei, n_er, n_es

A Python package for graph kernels, graph edit distances and graph pre-image problem.