You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

pathKernel.py 3.9 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106
  1. import sys
  2. import pathlib
  3. sys.path.insert(0, "../")
  4. import networkx as nx
  5. import numpy as np
  6. import time
  7. from pygraph.kernels.deltaKernel import deltakernel
  8. def pathkernel(*args, node_label = 'atom', edge_label = 'bond_type'):
  9. """Calculate mean average path kernels between graphs.
  10. Parameters
  11. ----------
  12. Gn : List of NetworkX graph
  13. List of graphs between which the kernels are calculated.
  14. /
  15. G1, G2 : NetworkX graphs
  16. 2 graphs between which the kernel is calculated.
  17. node_label : string
  18. node attribute used as label. The default node label is atom.
  19. edge_label : string
  20. edge attribute used as label. The default edge label is bond_type.
  21. Return
  22. ------
  23. Kmatrix/Kernel : Numpy matrix/int
  24. Kernel matrix, each element of which is the path kernel between 2 praphs. / Path Kernel between 2 graphs.
  25. References
  26. ----------
  27. [1] Suard F, Rakotomamonjy A, Bensrhair A. Kernel on Bag of Paths For Measuring Similarity of Shapes. InESANN 2007 Apr 25 (pp. 355-360).
  28. """
  29. if len(args) == 1: # for a list of graphs
  30. Gn = args[0]
  31. Kmatrix = np.zeros((len(Gn), len(Gn)))
  32. start_time = time.time()
  33. for i in range(0, len(Gn)):
  34. for j in range(i, len(Gn)):
  35. Kmatrix[i][j] = _pathkernel_do(Gn[i], Gn[j], node_label, edge_label)
  36. Kmatrix[j][i] = Kmatrix[i][j]
  37. run_time = time.time() - start_time
  38. print("\n --- mean average path kernel matrix of size %d built in %s seconds ---" % (len(Gn), run_time))
  39. return Kmatrix, run_time
  40. else: # for only 2 graphs
  41. start_time = time.time()
  42. kernel = _pathkernel_do(args[0], args[1], node_label, edge_label)
  43. run_time = time.time() - start_time
  44. print("\n --- mean average path kernel built in %s seconds ---" % (run_time))
  45. return kernel, run_time
  46. def _pathkernel_do(G1, G2, node_label = 'atom', edge_label = 'bond_type'):
  47. """Calculate mean average path kernels between 2 graphs.
  48. Parameters
  49. ----------
  50. G1, G2 : NetworkX graphs
  51. 2 graphs between which the kernel is calculated.
  52. node_label : string
  53. node attribute used as label. The default node label is atom.
  54. edge_label : string
  55. edge attribute used as label. The default edge label is bond_type.
  56. Return
  57. ------
  58. Kernel : int
  59. Path Kernel between 2 graphs.
  60. """
  61. # calculate shortest paths for both graphs
  62. sp1 = []
  63. num_nodes = G1.number_of_nodes()
  64. for node1 in range(num_nodes):
  65. for node2 in range(node1 + 1, num_nodes):
  66. sp1.append(nx.shortest_path(G1, node1, node2, weight = edge_label))
  67. sp2 = []
  68. num_nodes = G2.number_of_nodes()
  69. for node1 in range(num_nodes):
  70. for node2 in range(node1 + 1, num_nodes):
  71. sp2.append(nx.shortest_path(G2, node1, node2, weight = edge_label))
  72. # calculate kernel
  73. kernel = 0
  74. for path1 in sp1:
  75. for path2 in sp2:
  76. if len(path1) == len(path2):
  77. kernel_path = deltakernel(G1.node[path1[0]][node_label] == G2.node[path2[0]][node_label])
  78. if kernel_path:
  79. for i in range(1, len(path1)):
  80. # kernel = 1 if all corresponding nodes and edges in the 2 paths have same labels, otherwise 0
  81. kernel_path *= deltakernel(G1[path1[i - 1]][path1[i]][edge_label] == G2[path2[i - 1]][path2[i]][edge_label]) * deltakernel(G1.node[path1[i]][node_label] == G2.node[path2[i]][node_label])
  82. kernel += kernel_path # add up kernels of all paths
  83. kernel = kernel / (len(sp1) * len(sp2)) # calculate mean average
  84. return kernel

A Python package for graph kernels, graph edit distances and graph pre-image problem.