You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

pathKernel.py 8.2 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234
  1. """
  2. @author: linlin
  3. @references: Suard F, Rakotomamonjy A, Bensrhair A. Kernel on Bag of Paths For Measuring Similarity of Shapes. InESANN 2007 Apr 25 (pp. 355-360).
  4. """
  5. import sys
  6. import pathlib
  7. sys.path.insert(0, "../")
  8. import time
  9. import itertools
  10. from tqdm import tqdm
  11. import networkx as nx
  12. import numpy as np
  13. from gklearn.kernels.deltaKernel import deltakernel
  14. from gklearn.utils.graphdataset import get_dataset_attributes
  15. def pathkernel(*args, node_label='atom', edge_label='bond_type'):
  16. """Calculate mean average path kernels between graphs.
  17. Parameters
  18. ----------
  19. Gn : List of NetworkX graph
  20. List of graphs between which the kernels are calculated.
  21. /
  22. G1, G2 : NetworkX graphs
  23. 2 graphs between which the kernel is calculated.
  24. node_label : string
  25. node attribute used as label. The default node label is atom.
  26. edge_label : string
  27. edge attribute used as label. The default edge label is bond_type.
  28. Return
  29. ------
  30. Kmatrix/kernel : Numpy matrix/float
  31. Kernel matrix, each element of which is the path kernel between 2 praphs. / Path kernel between 2 graphs.
  32. """
  33. Gn = args[0] if len(args) == 1 else [args[0], args[1]]
  34. Kmatrix = np.zeros((len(Gn), len(Gn)))
  35. ds_attrs = get_dataset_attributes(
  36. Gn,
  37. attr_names=['node_labeled', 'edge_labeled', 'is_directed'],
  38. node_label=node_label,
  39. edge_label=edge_label)
  40. try:
  41. some_weight = list(nx.get_edge_attributes(Gn[0],
  42. edge_label).values())[0]
  43. weight = edge_label if isinstance(some_weight, float) or isinstance(
  44. some_weight, int) else None
  45. except:
  46. weight = None
  47. start_time = time.time()
  48. splist = [
  49. get_shortest_paths(Gn[i], weight) for i in tqdm(
  50. range(0, len(Gn)), desc='getting shortest paths', file=sys.stdout)
  51. ]
  52. pbar = tqdm(
  53. total=((len(Gn) + 1) * len(Gn) / 2),
  54. desc='calculating kernels',
  55. file=sys.stdout)
  56. if ds_attrs['node_labeled']:
  57. if ds_attrs['edge_labeled']:
  58. for i in range(0, len(Gn)):
  59. for j in range(i, len(Gn)):
  60. Kmatrix[i][j] = _pathkernel_do_l(Gn[i], Gn[j], splist[i],
  61. splist[j], node_label,
  62. edge_label)
  63. Kmatrix[j][i] = Kmatrix[i][j]
  64. pbar.update(1)
  65. else:
  66. for i in range(0, len(Gn)):
  67. for j in range(i, len(Gn)):
  68. Kmatrix[i][j] = _pathkernel_do_nl(Gn[i], Gn[j], splist[i],
  69. splist[j], node_label)
  70. Kmatrix[j][i] = Kmatrix[i][j]
  71. pbar.update(1)
  72. else:
  73. if ds_attrs['edge_labeled']:
  74. for i in range(0, len(Gn)):
  75. for j in range(i, len(Gn)):
  76. Kmatrix[i][j] = _pathkernel_do_el(Gn[i], Gn[j], splist[i],
  77. splist[j], edge_label)
  78. Kmatrix[j][i] = Kmatrix[i][j]
  79. pbar.update(1)
  80. else:
  81. for i in range(0, len(Gn)):
  82. for j in range(i, len(Gn)):
  83. Kmatrix[i][j] = _pathkernel_do_unl(Gn[i], Gn[j], splist[i],
  84. splist[j])
  85. Kmatrix[j][i] = Kmatrix[i][j]
  86. pbar.update(1)
  87. run_time = time.time() - start_time
  88. print(
  89. "\n --- mean average path kernel matrix of size %d built in %s seconds ---"
  90. % (len(Gn), run_time))
  91. return Kmatrix, run_time
  92. def _pathkernel_do_l(G1, G2, sp1, sp2, node_label, edge_label):
  93. """Calculate mean average path kernel between 2 fully-labeled graphs.
  94. Parameters
  95. ----------
  96. G1, G2 : NetworkX graphs
  97. 2 graphs between which the kernel is calculated.
  98. sp1, sp2 : list of list
  99. List of shortest paths of 2 graphs, where each path is represented by a list of nodes.
  100. node_label : string
  101. node attribute used as label. The default node label is atom.
  102. edge_label : string
  103. edge attribute used as label. The default edge label is bond_type.
  104. Return
  105. ------
  106. kernel : float
  107. Path Kernel between 2 graphs.
  108. """
  109. # calculate kernel
  110. kernel = 0
  111. # if len(sp1) == 0 or len(sp2) == 0:
  112. # return 0 # @todo: should it be zero?
  113. for path1 in sp1:
  114. for path2 in sp2:
  115. if len(path1) == len(path2):
  116. kernel_path = (G1.node[path1[0]][node_label] == G2.node[path2[
  117. 0]][node_label])
  118. if kernel_path:
  119. for i in range(1, len(path1)):
  120. # kernel = 1 if all corresponding nodes and edges in the 2 paths have same labels, otherwise 0
  121. if G1[path1[i - 1]][path1[i]][edge_label] != G2[path2[i - 1]][path2[i]][edge_label] or G1.node[path1[i]][node_label] != G2.node[path2[i]][node_label]:
  122. kernel_path = 0
  123. break
  124. kernel += kernel_path # add up kernels of all paths
  125. kernel = kernel / (len(sp1) * len(sp2)) # calculate mean average
  126. return kernel
  127. def _pathkernel_do_nl(G1, G2, sp1, sp2, node_label):
  128. """Calculate mean average path kernel between 2 node-labeled graphs.
  129. """
  130. # calculate kernel
  131. kernel = 0
  132. # if len(sp1) == 0 or len(sp2) == 0:
  133. # return 0 # @todo: should it be zero?
  134. for path1 in sp1:
  135. for path2 in sp2:
  136. if len(path1) == len(path2):
  137. kernel_path = 1
  138. for i in range(0, len(path1)):
  139. # kernel = 1 if all corresponding nodes in the 2 paths have same labels, otherwise 0
  140. if G1.node[path1[i]][node_label] != G2.node[path2[i]][node_label]:
  141. kernel_path = 0
  142. break
  143. kernel += kernel_path
  144. kernel = kernel / (len(sp1) * len(sp2)) # calculate mean average
  145. return kernel
  146. def _pathkernel_do_el(G1, G2, sp1, sp2, edge_label):
  147. """Calculate mean average path kernel between 2 edge-labeled graphs.
  148. """
  149. # calculate kernel
  150. kernel = 0
  151. for path1 in sp1:
  152. for path2 in sp2:
  153. if len(path1) == len(path2):
  154. if len(path1) == 0:
  155. kernel += 1
  156. else:
  157. kernel_path = 1
  158. for i in range(0, len(path1) - 1):
  159. # kernel = 1 if all corresponding edges in the 2 paths have same labels, otherwise 0
  160. if G1[path1[i]][path1[i + 1]][edge_label] != G2[path2[
  161. i]][path2[i + 1]][edge_label]:
  162. kernel_path = 0
  163. break
  164. kernel += kernel_path
  165. kernel = kernel / (len(sp1) * len(sp2)) # calculate mean average
  166. return kernel
  167. def _pathkernel_do_unl(G1, G2, sp1, sp2):
  168. """Calculate mean average path kernel between 2 unlabeled graphs.
  169. """
  170. # calculate kernel
  171. kernel = 0
  172. for path1 in sp1:
  173. for path2 in sp2:
  174. if len(path1) == len(path2):
  175. kernel += 1
  176. kernel = kernel / (len(sp1) * len(sp2)) # calculate mean average
  177. return kernel
  178. def get_shortest_paths(G, weight):
  179. """Get all shortest paths of a graph.
  180. Parameters
  181. ----------
  182. G : NetworkX graphs
  183. The graphs whose paths are calculated.
  184. weight : string/None
  185. edge attribute used as weight to calculate the shortest path.
  186. Return
  187. ------
  188. sp : list of list
  189. List of shortest paths of the graph, where each path is represented by a list of nodes.
  190. """
  191. sp = []
  192. for n1, n2 in itertools.combinations(G.nodes(), 2):
  193. try:
  194. sp.append(nx.shortest_path(G, n1, n2, weight=weight))
  195. except nx.NetworkXNoPath: # nodes not connected
  196. sp.append([])
  197. # add single nodes as length 0 paths.
  198. sp += [[n] for n in G.nodes()]
  199. return sp

A Python package for graph kernels, graph edit distances and graph pre-image problem.