You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

utils.py 8.7 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262
  1. import networkx as nx
  2. import numpy as np
  3. from copy import deepcopy
  4. #from itertools import product
  5. # from tqdm import tqdm
  6. def getSPLengths(G1):
  7. sp = nx.shortest_path(G1)
  8. distances = np.zeros((G1.number_of_nodes(), G1.number_of_nodes()))
  9. for i in sp.keys():
  10. for j in sp[i].keys():
  11. distances[i, j] = len(sp[i][j]) - 1
  12. return distances
  13. def getSPGraph(G, edge_weight=None):
  14. """Transform graph G to its corresponding shortest-paths graph.
  15. Parameters
  16. ----------
  17. G : NetworkX graph
  18. The graph to be tramsformed.
  19. edge_weight : string
  20. edge attribute corresponding to the edge weight.
  21. Return
  22. ------
  23. S : NetworkX graph
  24. The shortest-paths graph corresponding to G.
  25. Notes
  26. ------
  27. For an input graph G, its corresponding shortest-paths graph S contains the same set of nodes as G, while there exists an edge between all nodes in S which are connected by a walk in G. Every edge in S between two nodes is labeled by the shortest distance between these two nodes.
  28. References
  29. ----------
  30. [1] Borgwardt KM, Kriegel HP. Shortest-path kernels on graphs. InData Mining, Fifth IEEE International Conference on 2005 Nov 27 (pp. 8-pp). IEEE.
  31. """
  32. return floydTransformation(G, edge_weight=edge_weight)
  33. def floydTransformation(G, edge_weight=None):
  34. """Transform graph G to its corresponding shortest-paths graph using Floyd-transformation.
  35. Parameters
  36. ----------
  37. G : NetworkX graph
  38. The graph to be tramsformed.
  39. edge_weight : string
  40. edge attribute corresponding to the edge weight. The default edge weight is bond_type.
  41. Return
  42. ------
  43. S : NetworkX graph
  44. The shortest-paths graph corresponding to G.
  45. References
  46. ----------
  47. [1] Borgwardt KM, Kriegel HP. Shortest-path kernels on graphs. InData Mining, Fifth IEEE International Conference on 2005 Nov 27 (pp. 8-pp). IEEE.
  48. """
  49. spMatrix = nx.floyd_warshall_numpy(G, weight=edge_weight)
  50. S = nx.Graph()
  51. S.add_nodes_from(G.nodes(data=True))
  52. ns = list(G.nodes())
  53. for i in range(0, G.number_of_nodes()):
  54. for j in range(i + 1, G.number_of_nodes()):
  55. if spMatrix[i, j] != np.inf:
  56. S.add_edge(ns[i], ns[j], cost=spMatrix[i, j])
  57. return S
  58. def untotterTransformation(G, node_label, edge_label):
  59. """Transform graph G according to Mahé et al.'s work to filter out tottering patterns of marginalized kernel and tree pattern kernel.
  60. Parameters
  61. ----------
  62. G : NetworkX graph
  63. The graph to be tramsformed.
  64. node_label : string
  65. node attribute used as label. The default node label is 'atom'.
  66. edge_label : string
  67. edge attribute used as label. The default edge label is 'bond_type'.
  68. Return
  69. ------
  70. gt : NetworkX graph
  71. The transformed graph corresponding to G.
  72. References
  73. ----------
  74. [1] Pierre Mahé, Nobuhisa Ueda, Tatsuya Akutsu, Jean-Luc Perret, and Jean-Philippe Vert. Extensions of marginalized graph kernels. In Proceedings of the twenty-first international conference on Machine learning, page 70. ACM, 2004.
  75. """
  76. # arrange all graphs in a list
  77. G = G.to_directed()
  78. gt = nx.Graph()
  79. gt.graph = G.graph
  80. gt.add_nodes_from(G.nodes(data=True))
  81. for edge in G.edges():
  82. gt.add_node(edge)
  83. gt.node[edge].update({node_label: G.node[edge[1]][node_label]})
  84. gt.add_edge(edge[0], edge)
  85. gt.edges[edge[0], edge].update({
  86. edge_label:
  87. G[edge[0]][edge[1]][edge_label]
  88. })
  89. for neighbor in G[edge[1]]:
  90. if neighbor != edge[0]:
  91. gt.add_edge(edge, (edge[1], neighbor))
  92. gt.edges[edge, (edge[1], neighbor)].update({
  93. edge_label:
  94. G[edge[1]][neighbor][edge_label]
  95. })
  96. # nx.draw_networkx(gt)
  97. # plt.show()
  98. # relabel nodes using consecutive integers for convenience of kernel calculation.
  99. gt = nx.convert_node_labels_to_integers(
  100. gt, first_label=0, label_attribute='label_orignal')
  101. return gt
  102. def direct_product(G1, G2, node_label, edge_label):
  103. """Return the direct/tensor product of directed graphs G1 and G2.
  104. Parameters
  105. ----------
  106. G1, G2 : NetworkX graph
  107. The original graphs.
  108. node_label : string
  109. node attribute used as label. The default node label is 'atom'.
  110. edge_label : string
  111. edge attribute used as label. The default edge label is 'bond_type'.
  112. Return
  113. ------
  114. gt : NetworkX graph
  115. The direct product graph of G1 and G2.
  116. Notes
  117. -----
  118. This method differs from networkx.tensor_product in that this method only adds nodes and edges in G1 and G2 that have the same labels to the direct product graph.
  119. References
  120. ----------
  121. [1] Thomas Gärtner, Peter Flach, and Stefan Wrobel. On graph kernels: Hardness results and efficient alternatives. Learning Theory and Kernel Machines, pages 129–143, 2003.
  122. """
  123. # arrange all graphs in a list
  124. from itertools import product
  125. # G = G.to_directed()
  126. gt = nx.DiGraph()
  127. # add nodes
  128. for u, v in product(G1, G2):
  129. if G1.nodes[u][node_label] == G2.nodes[v][node_label]:
  130. gt.add_node((u, v))
  131. gt.nodes[(u, v)].update({node_label: G1.nodes[u][node_label]})
  132. # add edges, faster for sparse graphs (no so many edges), which is the most case for now.
  133. for (u1, v1), (u2, v2) in product(G1.edges, G2.edges):
  134. if (u1, u2) in gt and (
  135. v1, v2
  136. ) in gt and G1.edges[u1, v1][edge_label] == G2.edges[u2,
  137. v2][edge_label]:
  138. gt.add_edge((u1, u2), (v1, v2))
  139. gt.edges[(u1, u2), (v1, v2)].update({
  140. edge_label:
  141. G1.edges[u1, v1][edge_label]
  142. })
  143. # # add edges, faster for dense graphs (a lot of edges, complete graph would be super).
  144. # for u, v in product(gt, gt):
  145. # if (u[0], v[0]) in G1.edges and (
  146. # u[1], v[1]
  147. # ) in G2.edges and G1.edges[u[0],
  148. # v[0]][edge_label] == G2.edges[u[1],
  149. # v[1]][edge_label]:
  150. # gt.add_edge((u[0], u[1]), (v[0], v[1]))
  151. # gt.edges[(u[0], u[1]), (v[0], v[1])].update({
  152. # edge_label:
  153. # G1.edges[u[0], v[0]][edge_label]
  154. # })
  155. # relabel nodes using consecutive integers for convenience of kernel calculation.
  156. # gt = nx.convert_node_labels_to_integers(
  157. # gt, first_label=0, label_attribute='label_orignal')
  158. return gt
  159. def graph_deepcopy(G):
  160. """Deep copy a graph, including deep copy of all nodes, edges and
  161. attributes of the graph, nodes and edges.
  162. Note
  163. ----
  164. It is the same as the NetworkX function graph.copy(), as far as I know.
  165. """
  166. # add graph attributes.
  167. labels = {}
  168. for k, v in G.graph.items():
  169. labels[k] = deepcopy(v)
  170. if G.is_directed():
  171. G_copy = nx.DiGraph(**labels)
  172. else:
  173. G_copy = nx.Graph(**labels)
  174. # add nodes
  175. for nd, attrs in G.nodes(data=True):
  176. labels = {}
  177. for k, v in attrs.items():
  178. labels[k] = deepcopy(v)
  179. G_copy.add_node(nd, **labels)
  180. # add edges.
  181. for nd1, nd2, attrs in G.edges(data=True):
  182. labels = {}
  183. for k, v in attrs.items():
  184. labels[k] = deepcopy(v)
  185. G_copy.add_edge(nd1, nd2, **labels)
  186. return G_copy
  187. def graph_isIdentical(G1, G2):
  188. """Check if two graphs are identical, including: same nodes, edges, node
  189. labels/attributes, edge labels/attributes.
  190. Notes
  191. ----
  192. 1. The type of graphs has to be the same.
  193. 2. Global/Graph attributes are neglected as they may contain names for graphs.
  194. """
  195. # check nodes.
  196. nlist1 = [n for n in G1.nodes(data=True)]
  197. nlist2 = [n for n in G2.nodes(data=True)]
  198. if not nlist1 == nlist2:
  199. return False
  200. # check edges.
  201. elist1 = [n for n in G1.edges(data=True)]
  202. elist2 = [n for n in G2.edges(data=True)]
  203. if not elist1 == elist2:
  204. return False
  205. # check graph attributes.
  206. return True
  207. def get_node_labels(Gn, node_label):
  208. """Get node labels of dataset Gn.
  209. """
  210. nl = set()
  211. for G in Gn:
  212. nl = nl | set(nx.get_node_attributes(G, node_label).values())
  213. return nl
  214. def get_edge_labels(Gn, edge_label):
  215. """Get edge labels of dataset Gn.
  216. """
  217. el = set()
  218. for G in Gn:
  219. el = el | set(nx.get_edge_attributes(G, edge_label).values())
  220. return el

A Python package for graph kernels, graph edit distances and graph pre-image problem.