You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

marginalizedKernel.py 6.0 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163
  1. """
  2. @author: linlin
  3. @references:
  4. [1] H. Kashima, K. Tsuda, and A. Inokuchi. Marginalized kernels between labeled graphs. In Proceedings of the 20th International Conference on Machine Learning, Washington, DC, United States, 2003.
  5. [2] Pierre Mahé, Nobuhisa Ueda, Tatsuya Akutsu, Jean-Luc Perret, and Jean-Philippe Vert. Extensions of marginalized graph kernels. In Proceedings of the twenty-first international conference on Machine learning, page 70. ACM, 2004.
  6. """
  7. import sys
  8. import pathlib
  9. sys.path.insert(0, "../")
  10. import time
  11. from tqdm import tqdm
  12. tqdm.monitor_interval = 0
  13. import networkx as nx
  14. import numpy as np
  15. from matplotlib import pyplot as plt
  16. from pygraph.kernels.deltaKernel import deltakernel
  17. from pygraph.utils.utils import untotterTransformation
  18. from pygraph.utils.graphdataset import get_dataset_attributes
  19. def marginalizedkernel(*args,
  20. node_label='atom',
  21. edge_label='bond_type',
  22. p_quit=0.5,
  23. itr=20,
  24. remove_totters=True):
  25. """Calculate marginalized graph kernels between graphs.
  26. Parameters
  27. ----------
  28. Gn : List of NetworkX graph
  29. List of graphs between which the kernels are calculated.
  30. /
  31. G1, G2 : NetworkX graphs
  32. 2 graphs between which the kernel is calculated.
  33. node_label : string
  34. node attribute used as label. The default node label is atom.
  35. edge_label : string
  36. edge attribute used as label. The default edge label is bond_type.
  37. p_quit : integer
  38. the termination probability in the random walks generating step
  39. itr : integer
  40. time of iterations to calculate R_inf
  41. remove_totters : boolean
  42. whether to remove totters. The default value is True.
  43. Return
  44. ------
  45. Kmatrix : Numpy matrix
  46. Kernel matrix, each element of which is the marginalized kernel between 2 praphs.
  47. """
  48. # arrange all graphs in a list
  49. Gn = args[0] if len(args) == 1 else [args[0], args[1]]
  50. Kmatrix = np.zeros((len(Gn), len(Gn)))
  51. ds_attrs = get_dataset_attributes(
  52. Gn,
  53. attr_names=['node_labeled', 'edge_labeled', 'is_directed'],
  54. node_label=node_label,
  55. edge_label=edge_label)
  56. if not ds_attrs['node_labeled']:
  57. for G in Gn:
  58. nx.set_node_attributes(G, '0', 'atom')
  59. if not ds_attrs['edge_labeled']:
  60. for G in Gn:
  61. nx.set_edge_attributes(G, '0', 'bond_type')
  62. start_time = time.time()
  63. if remove_totters:
  64. Gn = [
  65. untotterTransformation(G, node_label, edge_label)
  66. for G in tqdm(Gn, desc='removing tottering', file=sys.stdout)
  67. ]
  68. pbar = tqdm(
  69. total=(1 + len(Gn)) * len(Gn) / 2,
  70. desc='calculating kernels',
  71. file=sys.stdout)
  72. for i in range(0, len(Gn)):
  73. for j in range(i, len(Gn)):
  74. Kmatrix[i][j] = _marginalizedkernel_do(Gn[i], Gn[j], node_label,
  75. edge_label, p_quit, itr)
  76. Kmatrix[j][i] = Kmatrix[i][j]
  77. pbar.update(1)
  78. run_time = time.time() - start_time
  79. print(
  80. "\n --- marginalized kernel matrix of size %d built in %s seconds ---"
  81. % (len(Gn), run_time))
  82. return Kmatrix, run_time
  83. def _marginalizedkernel_do(G1, G2, node_label, edge_label, p_quit, itr):
  84. """Calculate marginalized graph kernel between 2 graphs.
  85. Parameters
  86. ----------
  87. G1, G2 : NetworkX graphs
  88. 2 graphs between which the kernel is calculated.
  89. node_label : string
  90. node attribute used as label.
  91. edge_label : string
  92. edge attribute used as label.
  93. p_quit : integer
  94. the termination probability in the random walks generating step.
  95. itr : integer
  96. time of iterations to calculate R_inf.
  97. Return
  98. ------
  99. kernel : float
  100. Marginalized Kernel between 2 graphs.
  101. """
  102. # init parameters
  103. kernel = 0
  104. num_nodes_G1 = nx.number_of_nodes(G1)
  105. num_nodes_G2 = nx.number_of_nodes(G2)
  106. p_init_G1 = 1 / num_nodes_G1 # the initial probability distribution in the random walks generating step (uniform distribution over |G|)
  107. p_init_G2 = 1 / num_nodes_G2
  108. q = p_quit * p_quit
  109. r1 = q
  110. # initial R_inf
  111. # matrix to save all the R_inf for all pairs of nodes
  112. R_inf = np.zeros([num_nodes_G1, num_nodes_G2])
  113. # calculate R_inf with a simple interative method
  114. for i in range(1, itr):
  115. R_inf_new = np.zeros([num_nodes_G1, num_nodes_G2])
  116. R_inf_new.fill(r1)
  117. # calculate R_inf for each pair of nodes
  118. for node1 in G1.nodes(data=True):
  119. neighbor_n1 = G1[node1[0]]
  120. # the transition probability distribution in the random walks generating step (uniform distribution over the vertices adjacent to the current vertex)
  121. p_trans_n1 = (1 - p_quit) / len(neighbor_n1)
  122. for node2 in G2.nodes(data=True):
  123. neighbor_n2 = G2[node2[0]]
  124. p_trans_n2 = (1 - p_quit) / len(neighbor_n2)
  125. for neighbor1 in neighbor_n1:
  126. for neighbor2 in neighbor_n2:
  127. t = p_trans_n1 * p_trans_n2 * \
  128. deltakernel(G1.node[neighbor1][node_label] == G2.node[neighbor2][node_label]) * \
  129. deltakernel(neighbor_n1[neighbor1][edge_label] == neighbor_n2[neighbor2][edge_label])
  130. R_inf_new[node1[0]][node2[0]] += t * R_inf[neighbor1][
  131. neighbor2] # ref [1] equation (8)
  132. R_inf[:] = R_inf_new
  133. # add elements of R_inf up and calculate kernel
  134. for node1 in G1.nodes(data=True):
  135. for node2 in G2.nodes(data=True):
  136. s = p_init_G1 * p_init_G2 * deltakernel(
  137. node1[1][node_label] == node2[1][node_label])
  138. kernel += s * R_inf[node1[0]][node2[0]] # ref [1] equation (6)
  139. return kernel

A Python package for graph kernels, graph edit distances and graph pre-image problem.