You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

pathKernel.py 5.8 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150
  1. """
  2. @author: linlin
  3. @references: Suard F, Rakotomamonjy A, Bensrhair A. Kernel on Bag of Paths For Measuring Similarity of Shapes. InESANN 2007 Apr 25 (pp. 355-360).
  4. """
  5. import sys
  6. import pathlib
  7. sys.path.insert(0, "../")
  8. import networkx as nx
  9. import numpy as np
  10. import time
  11. from pygraph.kernels.deltaKernel import deltakernel
  12. def pathkernel(*args, node_label = 'atom', edge_label = 'bond_type'):
  13. """Calculate mean average path kernels between graphs.
  14. Parameters
  15. ----------
  16. Gn : List of NetworkX graph
  17. List of graphs between which the kernels are calculated.
  18. /
  19. G1, G2 : NetworkX graphs
  20. 2 graphs between which the kernel is calculated.
  21. node_label : string
  22. node attribute used as label. The default node label is atom.
  23. edge_label : string
  24. edge attribute used as label. The default edge label is bond_type.
  25. Return
  26. ------
  27. Kmatrix/kernel : Numpy matrix/float
  28. Kernel matrix, each element of which is the path kernel between 2 praphs. / Path kernel between 2 graphs.
  29. """
  30. some_graph = args[0][0] if len(args) == 1 else args[0] # only edge attributes of type int or float can be used as edge weight to calculate the shortest paths.
  31. some_weight = list(nx.get_edge_attributes(some_graph, edge_label).values())[0]
  32. weight = edge_label if isinstance(some_weight, float) or isinstance(some_weight, int) else None
  33. if len(args) == 1: # for a list of graphs
  34. Gn = args[0]
  35. Kmatrix = np.zeros((len(Gn), len(Gn)))
  36. start_time = time.time()
  37. splist = [ get_shortest_paths(Gn[i], weight) for i in range(0, len(Gn)) ]
  38. for i in range(0, len(Gn)):
  39. for j in range(i, len(Gn)):
  40. Kmatrix[i][j] = _pathkernel_do(Gn[i], Gn[j], splist[i], splist[j], node_label, edge_label)
  41. Kmatrix[j][i] = Kmatrix[i][j]
  42. run_time = time.time() - start_time
  43. print("\n --- mean average path kernel matrix of size %d built in %s seconds ---" % (len(Gn), run_time))
  44. return Kmatrix, run_time
  45. else: # for only 2 graphs
  46. start_time = time.time()
  47. splist = get_shortest_paths(args[0], weight)
  48. splist = get_shortest_paths(args[1], weight)
  49. kernel = _pathkernel_do(args[0], args[1], sp1, sp2, node_label, edge_label)
  50. run_time = time.time() - start_time
  51. print("\n --- mean average path kernel built in %s seconds ---" % (run_time))
  52. return kernel, run_time
  53. def _pathkernel_do(G1, G2, sp1, sp2, node_label = 'atom', edge_label = 'bond_type'):
  54. """Calculate mean average path kernel between 2 graphs.
  55. Parameters
  56. ----------
  57. G1, G2 : NetworkX graphs
  58. 2 graphs between which the kernel is calculated.
  59. sp1, sp2 : list of list
  60. List of shortest paths of 2 graphs, where each path is represented by a list of nodes.
  61. node_label : string
  62. node attribute used as label. The default node label is atom.
  63. edge_label : string
  64. edge attribute used as label. The default edge label is bond_type.
  65. Return
  66. ------
  67. kernel : float
  68. Path Kernel between 2 graphs.
  69. """
  70. # calculate shortest paths for both graphs
  71. # calculate kernel
  72. kernel = 0
  73. for path1 in sp1:
  74. for path2 in sp2:
  75. if len(path1) == len(path2):
  76. kernel_path = (G1.node[path1[0]][node_label] == G2.node[path2[0]][node_label])
  77. if kernel_path:
  78. for i in range(1, len(path1)):
  79. # kernel = 1 if all corresponding nodes and edges in the 2 paths have same labels, otherwise 0
  80. kernel_path *= (G1[path1[i - 1]][path1[i]][edge_label] == G2[path2[i - 1]][path2[i]][edge_label]) \
  81. * (G1.node[path1[i]][node_label] == G2.node[path2[i]][node_label])
  82. if kernel_path == 0:
  83. break
  84. kernel += kernel_path # add up kernels of all paths
  85. # kernel = 0
  86. # for path1 in sp1:
  87. # for path2 in sp2:
  88. # if len(path1) == len(path2):
  89. # if (G1.node[path1[0]][node_label] == G2.node[path2[0]][node_label]):
  90. # for i in range(1, len(path1)):
  91. # # kernel = 1 if all corresponding nodes and edges in the 2 paths have same labels, otherwise 0
  92. # # kernel_path *= (G1[path1[i - 1]][path1[i]][edge_label] == G2[path2[i - 1]][path2[i]][edge_label]) \
  93. # # * (G1.node[path1[i]][node_label] == G2.node[path2[i]][node_label])
  94. # # if kernel_path == 0:
  95. # # break
  96. # # kernel += kernel_path # add up kernels of all paths
  97. # if (G1[path1[i - 1]][path1[i]][edge_label] != G2[path2[i - 1]][path2[i]][edge_label]) or \
  98. # (G1.node[path1[i]][node_label] != G2.node[path2[i]][node_label]):
  99. # break
  100. # else:
  101. # kernel += 1
  102. kernel = kernel / (len(sp1) * len(sp2)) # calculate mean average
  103. return kernel
  104. def get_shortest_paths(G, weight):
  105. """Get all shortest paths of a graph.
  106. Parameters
  107. ----------
  108. G : NetworkX graphs
  109. The graphs whose paths are calculated.
  110. weight : string/None
  111. edge attribute used as weight to calculate the shortest path.
  112. Return
  113. ------
  114. sp : list of list
  115. List of shortest paths of the graph, where each path is represented by a list of nodes.
  116. """
  117. sp = []
  118. num_nodes = G.number_of_nodes()
  119. for node1 in range(num_nodes):
  120. for node2 in range(node1 + 1, num_nodes):
  121. sp.append(nx.shortest_path(G, node1, node2, weight = weight))
  122. return sp

A Python package for graph kernels, graph edit distances and graph pre-image problem.