You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

utils.py 6.7 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185
  1. import networkx as nx
  2. import numpy as np
  3. # from tqdm import tqdm
  4. def getSPLengths(G1):
  5. sp = nx.shortest_path(G1)
  6. distances = np.zeros((G1.number_of_nodes(), G1.number_of_nodes()))
  7. for i in sp.keys():
  8. for j in sp[i].keys():
  9. distances[i, j] = len(sp[i][j]) - 1
  10. return distances
  11. def getSPGraph(G, edge_weight=None):
  12. """Transform graph G to its corresponding shortest-paths graph.
  13. Parameters
  14. ----------
  15. G : NetworkX graph
  16. The graph to be tramsformed.
  17. edge_weight : string
  18. edge attribute corresponding to the edge weight.
  19. Return
  20. ------
  21. S : NetworkX graph
  22. The shortest-paths graph corresponding to G.
  23. Notes
  24. ------
  25. For an input graph G, its corresponding shortest-paths graph S contains the same set of nodes as G, while there exists an edge between all nodes in S which are connected by a walk in G. Every edge in S between two nodes is labeled by the shortest distance between these two nodes.
  26. References
  27. ----------
  28. [1] Borgwardt KM, Kriegel HP. Shortest-path kernels on graphs. InData Mining, Fifth IEEE International Conference on 2005 Nov 27 (pp. 8-pp). IEEE.
  29. """
  30. return floydTransformation(G, edge_weight=edge_weight)
  31. def floydTransformation(G, edge_weight=None):
  32. """Transform graph G to its corresponding shortest-paths graph using Floyd-transformation.
  33. Parameters
  34. ----------
  35. G : NetworkX graph
  36. The graph to be tramsformed.
  37. edge_weight : string
  38. edge attribute corresponding to the edge weight. The default edge weight is bond_type.
  39. Return
  40. ------
  41. S : NetworkX graph
  42. The shortest-paths graph corresponding to G.
  43. References
  44. ----------
  45. [1] Borgwardt KM, Kriegel HP. Shortest-path kernels on graphs. InData Mining, Fifth IEEE International Conference on 2005 Nov 27 (pp. 8-pp). IEEE.
  46. """
  47. spMatrix = nx.floyd_warshall_numpy(G, weight=edge_weight)
  48. S = nx.Graph()
  49. S.add_nodes_from(G.nodes(data=True))
  50. ns = list(G.nodes())
  51. for i in range(0, G.number_of_nodes()):
  52. for j in range(i + 1, G.number_of_nodes()):
  53. if spMatrix[i, j] != np.inf:
  54. S.add_edge(ns[i], ns[j], cost=spMatrix[i, j])
  55. return S
  56. def untotterTransformation(G, node_label, edge_label):
  57. """Transform graph G according to Mahé et al.'s work to filter out tottering patterns of marginalized kernel and tree pattern kernel.
  58. Parameters
  59. ----------
  60. G : NetworkX graph
  61. The graph to be tramsformed.
  62. node_label : string
  63. node attribute used as label. The default node label is 'atom'.
  64. edge_label : string
  65. edge attribute used as label. The default edge label is 'bond_type'.
  66. Return
  67. ------
  68. gt : NetworkX graph
  69. The transformed graph corresponding to G.
  70. References
  71. ----------
  72. [1] Pierre Mahé, Nobuhisa Ueda, Tatsuya Akutsu, Jean-Luc Perret, and Jean-Philippe Vert. Extensions of marginalized graph kernels. In Proceedings of the twenty-first international conference on Machine learning, page 70. ACM, 2004.
  73. """
  74. # arrange all graphs in a list
  75. G = G.to_directed()
  76. gt = nx.Graph()
  77. gt.graph = G.graph
  78. gt.add_nodes_from(G.nodes(data=True))
  79. for edge in G.edges():
  80. gt.add_node(edge)
  81. gt.node[edge].update({node_label: G.node[edge[1]][node_label]})
  82. gt.add_edge(edge[0], edge)
  83. gt.edges[edge[0], edge].update({
  84. edge_label:
  85. G[edge[0]][edge[1]][edge_label]
  86. })
  87. for neighbor in G[edge[1]]:
  88. if neighbor != edge[0]:
  89. gt.add_edge(edge, (edge[1], neighbor))
  90. gt.edges[edge, (edge[1], neighbor)].update({
  91. edge_label:
  92. G[edge[1]][neighbor][edge_label]
  93. })
  94. # nx.draw_networkx(gt)
  95. # plt.show()
  96. # relabel nodes using consecutive integers for convenience of kernel calculation.
  97. gt = nx.convert_node_labels_to_integers(
  98. gt, first_label=0, label_attribute='label_orignal')
  99. return gt
  100. def direct_product(G1, G2, node_label, edge_label):
  101. """Return the direct/tensor product of directed graphs G1 and G2.
  102. Parameters
  103. ----------
  104. G1, G2 : NetworkX graph
  105. The original graphs.
  106. node_label : string
  107. node attribute used as label. The default node label is 'atom'.
  108. edge_label : string
  109. edge attribute used as label. The default edge label is 'bond_type'.
  110. Return
  111. ------
  112. gt : NetworkX graph
  113. The direct product graph of G1 and G2.
  114. Notes
  115. -----
  116. This method differs from networkx.tensor_product in that this method only adds nodes and edges in G1 and G2 that have the same labels to the direct product graph.
  117. References
  118. ----------
  119. [1] Thomas Gärtner, Peter Flach, and Stefan Wrobel. On graph kernels: Hardness results and efficient alternatives. Learning Theory and Kernel Machines, pages 129–143, 2003.
  120. """
  121. # arrange all graphs in a list
  122. from itertools import product
  123. # G = G.to_directed()
  124. gt = nx.DiGraph()
  125. # add nodes
  126. for u, v in product(G1, G2):
  127. if G1.nodes[u][node_label] == G2.nodes[v][node_label]:
  128. gt.add_node((u, v))
  129. gt.nodes[(u, v)].update({node_label: G1.nodes[u][node_label]})
  130. # add edges, faster for sparse graphs (no so many edges), which is the most case for now.
  131. for (u1, v1), (u2, v2) in product(G1.edges, G2.edges):
  132. if (u1, u2) in gt and (
  133. v1, v2
  134. ) in gt and G1.edges[u1, v1][edge_label] == G2.edges[u2,
  135. v2][edge_label]:
  136. gt.add_edge((u1, u2), (v1, v2))
  137. gt.edges[(u1, u2), (v1, v2)].update({
  138. edge_label:
  139. G1.edges[u1, v1][edge_label]
  140. })
  141. # # add edges, faster for dense graphs (a lot of edges, complete graph would be super).
  142. # for u, v in product(gt, gt):
  143. # if (u[0], v[0]) in G1.edges and (
  144. # u[1], v[1]
  145. # ) in G2.edges and G1.edges[u[0],
  146. # v[0]][edge_label] == G2.edges[u[1],
  147. # v[1]][edge_label]:
  148. # gt.add_edge((u[0], u[1]), (v[0], v[1]))
  149. # gt.edges[(u[0], u[1]), (v[0], v[1])].update({
  150. # edge_label:
  151. # G1.edges[u[0], v[0]][edge_label]
  152. # })
  153. # relabel nodes using consecutive integers for convenience of kernel calculation.
  154. # gt = nx.convert_node_labels_to_integers(
  155. # gt, first_label=0, label_attribute='label_orignal')
  156. return gt

A Python package for graph kernels, graph edit distances and graph pre-image problem.