You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

marginalizedKernel.py 5.4 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149
  1. """
  2. @author: linlin
  3. @references:
  4. [1] H. Kashima, K. Tsuda, and A. Inokuchi. Marginalized kernels between labeled graphs. In Proceedings of the 20th International Conference on Machine Learning, Washington, DC, United States, 2003.
  5. [2] Pierre Mahé, Nobuhisa Ueda, Tatsuya Akutsu, Jean-Luc Perret, and Jean-Philippe Vert. Extensions of marginalized graph kernels. In Proceedings of the twenty-first international conference on Machine learning, page 70. ACM, 2004.
  6. """
  7. import sys
  8. import pathlib
  9. sys.path.insert(0, "../")
  10. import time
  11. import networkx as nx
  12. import numpy as np
  13. from matplotlib import pyplot as plt
  14. from tqdm import tqdm
  15. tqdm.monitor_interval = 0
  16. from pygraph.kernels.deltaKernel import deltakernel
  17. from pygraph.utils.utils import untotterTransformation
  18. def marginalizedkernel(*args,
  19. node_label='atom',
  20. edge_label='bond_type',
  21. p_quit=0.5,
  22. itr=20,
  23. remove_totters=True):
  24. """Calculate marginalized graph kernels between graphs.
  25. Parameters
  26. ----------
  27. Gn : List of NetworkX graph
  28. List of graphs between which the kernels are calculated.
  29. /
  30. G1, G2 : NetworkX graphs
  31. 2 graphs between which the kernel is calculated.
  32. node_label : string
  33. node attribute used as label. The default node label is atom.
  34. edge_label : string
  35. edge attribute used as label. The default edge label is bond_type.
  36. p_quit : integer
  37. the termination probability in the random walks generating step
  38. itr : integer
  39. time of iterations to calculate R_inf
  40. remove_totters : boolean
  41. whether to remove totters. The default value is True.
  42. Return
  43. ------
  44. Kmatrix : Numpy matrix
  45. Kernel matrix, each element of which is the marginalized kernel between 2 praphs.
  46. """
  47. # arrange all graphs in a list
  48. Gn = args[0] if len(args) == 1 else [args[0], args[1]]
  49. Kmatrix = np.zeros((len(Gn), len(Gn)))
  50. start_time = time.time()
  51. if remove_totters:
  52. Gn = [untotterTransformation(G, node_label, edge_label) for G in Gn]
  53. pbar = tqdm(
  54. total=(1 + len(Gn)) * len(Gn) / 2,
  55. desc='calculate kernels',
  56. file=sys.stdout)
  57. for i in range(0, len(Gn)):
  58. for j in range(i, len(Gn)):
  59. Kmatrix[i][j] = _marginalizedkernel_do(Gn[i], Gn[j], node_label,
  60. edge_label, p_quit, itr)
  61. Kmatrix[j][i] = Kmatrix[i][j]
  62. pbar.update(1)
  63. run_time = time.time() - start_time
  64. print(
  65. "\n --- marginalized kernel matrix of size %d built in %s seconds ---"
  66. % (len(Gn), run_time))
  67. return Kmatrix, run_time
  68. def _marginalizedkernel_do(G1, G2, node_label, edge_label, p_quit, itr):
  69. """Calculate marginalized graph kernel between 2 graphs.
  70. Parameters
  71. ----------
  72. G1, G2 : NetworkX graphs
  73. 2 graphs between which the kernel is calculated.
  74. node_label : string
  75. node attribute used as label.
  76. edge_label : string
  77. edge attribute used as label.
  78. p_quit : integer
  79. the termination probability in the random walks generating step.
  80. itr : integer
  81. time of iterations to calculate R_inf.
  82. Return
  83. ------
  84. kernel : float
  85. Marginalized Kernel between 2 graphs.
  86. """
  87. # init parameters
  88. kernel = 0
  89. num_nodes_G1 = nx.number_of_nodes(G1)
  90. num_nodes_G2 = nx.number_of_nodes(G2)
  91. p_init_G1 = 1 / num_nodes_G1 # the initial probability distribution in the random walks generating step (uniform distribution over |G|)
  92. p_init_G2 = 1 / num_nodes_G2
  93. q = p_quit * p_quit
  94. r1 = q
  95. # initial R_inf
  96. # matrix to save all the R_inf for all pairs of nodes
  97. R_inf = np.zeros([num_nodes_G1, num_nodes_G2])
  98. # calculate R_inf with a simple interative method
  99. for i in range(1, itr):
  100. R_inf_new = np.zeros([num_nodes_G1, num_nodes_G2])
  101. R_inf_new.fill(r1)
  102. # calculate R_inf for each pair of nodes
  103. for node1 in G1.nodes(data=True):
  104. neighbor_n1 = G1[node1[0]]
  105. # the transition probability distribution in the random walks generating step (uniform distribution over the vertices adjacent to the current vertex)
  106. p_trans_n1 = (1 - p_quit) / len(neighbor_n1)
  107. for node2 in G2.nodes(data=True):
  108. neighbor_n2 = G2[node2[0]]
  109. p_trans_n2 = (1 - p_quit) / len(neighbor_n2)
  110. for neighbor1 in neighbor_n1:
  111. for neighbor2 in neighbor_n2:
  112. t = p_trans_n1 * p_trans_n2 * \
  113. deltakernel(G1.node[neighbor1][node_label] == G2.node[neighbor2][node_label]) * \
  114. deltakernel(neighbor_n1[neighbor1][edge_label] == neighbor_n2[neighbor2][edge_label])
  115. R_inf_new[node1[0]][node2[0]] += t * R_inf[neighbor1][
  116. neighbor2] # ref [1] equation (8)
  117. R_inf[:] = R_inf_new
  118. # add elements of R_inf up and calculate kernel
  119. for node1 in G1.nodes(data=True):
  120. for node2 in G2.nodes(data=True):
  121. s = p_init_G1 * p_init_G2 * deltakernel(
  122. node1[1][node_label] == node2[1][node_label])
  123. kernel += s * R_inf[node1[0]][node2[0]] # ref [1] equation (6)
  124. return kernel

A Python package for graph kernels, graph edit distances and graph pre-image problem.