You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

pathKernel.py 4.3 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112
  1. import sys
  2. import pathlib
  3. sys.path.insert(0, "../")
  4. import networkx as nx
  5. import numpy as np
  6. import time
  7. from pygraph.kernels.deltaKernel import deltakernel
  8. def pathkernel(*args, node_label = 'atom', edge_label = 'bond_type'):
  9. """Calculate mean average path kernels between graphs.
  10. Parameters
  11. ----------
  12. Gn : List of NetworkX graph
  13. List of graphs between which the kernels are calculated.
  14. /
  15. G1, G2 : NetworkX graphs
  16. 2 graphs between which the kernel is calculated.
  17. node_label : string
  18. node attribute used as label. The default node label is atom.
  19. edge_label : string
  20. edge attribute used as label. The default edge label is bond_type.
  21. Return
  22. ------
  23. Kmatrix/kernel : Numpy matrix/float
  24. Kernel matrix, each element of which is the path kernel between 2 praphs. / Path kernel between 2 graphs.
  25. References
  26. ----------
  27. [1] Suard F, Rakotomamonjy A, Bensrhair A. Kernel on Bag of Paths For Measuring Similarity of Shapes. InESANN 2007 Apr 25 (pp. 355-360).
  28. """
  29. some_graph = args[0][0] if len(args) == 1 else args[0] # only edge attributes of type int or float can be used as edge weight to calculate the shortest paths.
  30. some_weight = list(nx.get_edge_attributes(some_graph, edge_label).values())[0]
  31. weight = edge_label if isinstance(some_weight, float) or isinstance(some_weight, int) else None
  32. if len(args) == 1: # for a list of graphs
  33. Gn = args[0]
  34. Kmatrix = np.zeros((len(Gn), len(Gn)))
  35. start_time = time.time()
  36. for i in range(0, len(Gn)):
  37. for j in range(i, len(Gn)):
  38. Kmatrix[i][j] = _pathkernel_do(Gn[i], Gn[j], node_label, edge_label, weight = weight)
  39. Kmatrix[j][i] = Kmatrix[i][j]
  40. run_time = time.time() - start_time
  41. print("\n --- mean average path kernel matrix of size %d built in %s seconds ---" % (len(Gn), run_time))
  42. return Kmatrix, run_time
  43. else: # for only 2 graphs
  44. start_time = time.time()
  45. kernel = _pathkernel_do(args[0], args[1], node_label, edge_label, weight = weight)
  46. run_time = time.time() - start_time
  47. print("\n --- mean average path kernel built in %s seconds ---" % (run_time))
  48. return kernel, run_time
  49. def _pathkernel_do(G1, G2, node_label = 'atom', edge_label = 'bond_type', weight = None):
  50. """Calculate mean average path kernel between 2 graphs.
  51. Parameters
  52. ----------
  53. G1, G2 : NetworkX graphs
  54. 2 graphs between which the kernel is calculated.
  55. node_label : string
  56. node attribute used as label. The default node label is atom.
  57. edge_label : string
  58. edge attribute used as label. The default edge label is bond_type.
  59. weight : string/None
  60. edge attribute used as weight to calculate the shortest path. The default edge label is None.
  61. Return
  62. ------
  63. kernel : float
  64. Path Kernel between 2 graphs.
  65. """
  66. # calculate shortest paths for both graphs
  67. sp1 = []
  68. num_nodes = G1.number_of_nodes()
  69. for node1 in range(num_nodes):
  70. for node2 in range(node1 + 1, num_nodes):
  71. sp1.append(nx.shortest_path(G1, node1, node2, weight = weight))
  72. sp2 = []
  73. num_nodes = G2.number_of_nodes()
  74. for node1 in range(num_nodes):
  75. for node2 in range(node1 + 1, num_nodes):
  76. sp2.append(nx.shortest_path(G2, node1, node2, weight = weight))
  77. # calculate kernel
  78. kernel = 0
  79. for path1 in sp1:
  80. for path2 in sp2:
  81. if len(path1) == len(path2):
  82. kernel_path = deltakernel(G1.node[path1[0]][node_label] == G2.node[path2[0]][node_label])
  83. if kernel_path:
  84. for i in range(1, len(path1)):
  85. # kernel = 1 if all corresponding nodes and edges in the 2 paths have same labels, otherwise 0
  86. kernel_path *= deltakernel(G1[path1[i - 1]][path1[i]][edge_label] == G2[path2[i - 1]][path2[i]][edge_label]) * deltakernel(G1.node[path1[i]][node_label] == G2.node[path2[i]][node_label])
  87. kernel += kernel_path # add up kernels of all paths
  88. kernel = kernel / (len(sp1) * len(sp2)) # calculate mean average
  89. return kernel

A Python package for graph kernels, graph edit distances and graph pre-image problem.