You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

marginalizedKernel.py 5.0 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130
  1. import sys
  2. import pathlib
  3. sys.path.insert(0, "../")
  4. import networkx as nx
  5. import numpy as np
  6. import time
  7. from pygraph.kernels.deltaKernel import deltakernel
  8. def marginalizedkernel(*args, node_label = 'atom', edge_label = 'bond_type'):
  9. """Calculate marginalized graph kernels between graphs.
  10. Parameters
  11. ----------
  12. Gn : List of NetworkX graph
  13. List of graphs between which the kernels are calculated.
  14. /
  15. G1, G2 : NetworkX graphs
  16. 2 graphs between which the kernel is calculated.
  17. p_quit : integer
  18. the termination probability in the random walks generating step
  19. itr : integer
  20. time of iterations to calculate R_inf
  21. node_label : string
  22. node attribute used as label. The default node label is atom.
  23. edge_label : string
  24. edge attribute used as label. The default edge label is bond_type.
  25. Return
  26. ------
  27. Kmatrix/Kernel : Numpy matrix/int
  28. Kernel matrix, each element of which is the marginalized kernel between 2 praphs. / Marginalized Kernel between 2 graphs.
  29. References
  30. ----------
  31. [1] H. Kashima, K. Tsuda, and A. Inokuchi. Marginalized kernels between labeled graphs. In Proceedings of the 20th International Conference on Machine Learning, Washington, DC, United States, 2003.
  32. """
  33. if len(args) == 3: # for a list of graphs
  34. Gn = args[0]
  35. Kmatrix = np.zeros((len(Gn), len(Gn)))
  36. start_time = time.time()
  37. for i in range(0, len(Gn)):
  38. for j in range(i, len(Gn)):
  39. Kmatrix[i][j] = _marginalizedkernel_do(Gn[i], Gn[j], node_label, edge_label, args[1], args[2])
  40. Kmatrix[j][i] = Kmatrix[i][j]
  41. run_time = time.time() - start_time
  42. print("\n --- marginalized kernel matrix of size %d built in %s seconds ---" % (len(Gn), run_time))
  43. return Kmatrix, run_time
  44. else: # for only 2 graphs
  45. start_time = time.time()
  46. kernel = _marginalizedkernel_do(args[0], args[1], node_label, edge_label, args[2], args[3])
  47. run_time = time.time() - start_time
  48. print("\n --- marginalized kernel built in %s seconds ---" % (run_time))
  49. return kernel, run_time
  50. def _marginalizedkernel_do(G1, G2, node_label = 'atom', edge_label = 'bond_type', p_quit, itr):
  51. """Calculate marginalized graph kernels between 2 graphs.
  52. Parameters
  53. ----------
  54. G1, G2 : NetworkX graphs
  55. 2 graphs between which the kernel is calculated.
  56. node_label : string
  57. node attribute used as label. The default node label is atom.
  58. edge_label : string
  59. edge attribute used as label. The default edge label is bond_type.
  60. p_quit : integer
  61. the termination probability in the random walks generating step
  62. itr : integer
  63. time of iterations to calculate R_inf
  64. Return
  65. ------
  66. Kernel : int
  67. Marginalized Kernel between 2 graphs.
  68. """
  69. # init parameters
  70. kernel = 0
  71. num_nodes_G1 = nx.number_of_nodes(G1)
  72. num_nodes_G2 = nx.number_of_nodes(G2)
  73. p_init_G1 = 1 / num_nodes_G1 # the initial probability distribution in the random walks generating step (uniform distribution over |G|)
  74. p_init_G2 = 1 / num_nodes_G2
  75. q = p_quit * p_quit
  76. r1 = q
  77. # initial R_inf
  78. R_inf = np.zeros([num_nodes_G1, num_nodes_G2]) # matrix to save all the R_inf for all pairs of nodes
  79. # calculate R_inf with a simple interative method
  80. for i in range(1, itr):
  81. R_inf_new = np.zeros([num_nodes_G1, num_nodes_G2])
  82. R_inf_new.fill(r1)
  83. # calculate R_inf for each pair of nodes
  84. for node1 in G1.nodes(data = True):
  85. neighbor_n1 = G1[node1[0]]
  86. p_trans_n1 = (1 - p_quit) / len(neighbor_n1) # the transition probability distribution in the random walks generating step (uniform distribution over the vertices adjacent to the current vertex)
  87. for node2 in G2.nodes(data = True):
  88. neighbor_n2 = G2[node2[0]]
  89. p_trans_n2 = (1 - p_quit) / len(neighbor_n2)
  90. for neighbor1 in neighbor_n1:
  91. for neighbor2 in neighbor_n2:
  92. t = p_trans_n1 * p_trans_n2 * \
  93. deltakernel(G1.node[neighbor1][node_label] == G2.node[neighbor2][node_label]) * \
  94. deltakernel(neighbor_n1[neighbor1][edge_label] == neighbor_n2[neighbor2][edge_label])
  95. R_inf_new[node1[0]][node2[0]] += t * R_inf[neighbor1][neighbor2] # ref [1] equation (8)
  96. R_inf[:] = R_inf_new
  97. # add elements of R_inf up and calculate kernel
  98. for node1 in G1.nodes(data = True):
  99. for node2 in G2.nodes(data = True):
  100. s = p_init_G1 * p_init_G2 * deltakernel(node1[1][node_label] == node2[1][node_label])
  101. kernel += s * R_inf[node1[0]][node2[0]] # ref [1] equation (6)
  102. return kernel

A Python package for graph kernels, graph edit distances and graph pre-image problem.