You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

marginalizedKernel.py 4.5 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116
  1. import sys
  2. import pathlib
  3. sys.path.insert(0, "../")
  4. import networkx as nx
  5. import numpy as np
  6. import time
  7. def marginalizedkernel(*args):
  8. """Calculate marginalized graph kernels between graphs.
  9. Parameters
  10. ----------
  11. Gn : List of NetworkX graph
  12. List of graphs between which the kernels are calculated.
  13. /
  14. G1, G2 : NetworkX graphs
  15. 2 graphs between which the kernel is calculated.
  16. p_quit : integer
  17. the termination probability in the random walks generating step
  18. itr : integer
  19. time of iterations to calculate R_inf
  20. Return
  21. ------
  22. Kmatrix/Kernel : Numpy matrix/int
  23. Kernel matrix, each element of which is the marginalized kernel between 2 praphs. / Marginalized Kernel between 2 graphs.
  24. References
  25. ----------
  26. [1] H. Kashima, K. Tsuda, and A. Inokuchi. Marginalized kernels between labeled graphs. In Proceedings of the 20th International Conference on Machine Learning, Washington, DC, United States, 2003.
  27. """
  28. if len(args) == 3: # for a list of graphs
  29. Gn = args[0]
  30. Kmatrix = np.zeros((len(Gn), len(Gn)))
  31. start_time = time.time()
  32. for i in range(0, len(Gn)):
  33. for j in range(i, len(Gn)):
  34. Kmatrix[i][j] = marginalizedkernel(Gn[i], Gn[j], args[1], args[2])
  35. Kmatrix[j][i] = Kmatrix[i][j]
  36. print("\n --- marginalized kernel matrix of size %d built in %s seconds ---" % (len(Gn), (time.time() - start_time)))
  37. return Kmatrix
  38. else: # for only 2 graphs
  39. # init parameters
  40. G1 = args[0]
  41. G2 = args[1]
  42. p_quit = args[2] # the termination probability in the random walks generating step
  43. itr = args[3] # time of iterations to calculate R_inf
  44. kernel = 0
  45. num_nodes_G1 = nx.number_of_nodes(G1)
  46. num_nodes_G2 = nx.number_of_nodes(G2)
  47. p_init_G1 = 1 / num_nodes_G1 # the initial probability distribution in the random walks generating step (uniform distribution over |G|)
  48. p_init_G2 = 1 / num_nodes_G2
  49. q = p_quit * p_quit
  50. r1 = q
  51. # initial R_inf
  52. R_inf = np.zeros([num_nodes_G1, num_nodes_G2]) # matrix to save all the R_inf for all pairs of nodes
  53. # calculate R_inf with a simple interative method
  54. for i in range(1, itr):
  55. R_inf_new = np.zeros([num_nodes_G1, num_nodes_G2])
  56. R_inf_new.fill(r1)
  57. # calculate R_inf for each pair of nodes
  58. for node1 in G1.nodes(data = True):
  59. neighbor_n1 = G1[node1[0]]
  60. p_trans_n1 = (1 - p_quit) / len(neighbor_n1) # the transition probability distribution in the random walks generating step (uniform distribution over the vertices adjacent to the current vertex)
  61. for node2 in G2.nodes(data = True):
  62. neighbor_n2 = G2[node2[0]]
  63. p_trans_n2 = (1 - p_quit) / len(neighbor_n2)
  64. for neighbor1 in neighbor_n1:
  65. for neighbor2 in neighbor_n2:
  66. t = p_trans_n1 * p_trans_n2 * \
  67. deltaKernel(G1.node[neighbor1]['label'] == G2.node[neighbor2]['label']) * \
  68. deltaKernel(neighbor_n1[neighbor1]['label'] == neighbor_n2[neighbor2]['label'])
  69. R_inf_new[node1[0]][node2[0]] += t * R_inf[neighbor1][neighbor2] # ref [1] equation (8)
  70. R_inf[:] = R_inf_new
  71. # add elements of R_inf up and calculate kernel
  72. for node1 in G1.nodes(data = True):
  73. for node2 in G2.nodes(data = True):
  74. s = p_init_G1 * p_init_G2 * deltaKernel(node1[1]['label'] == node2[1]['label'])
  75. kernel += s * R_inf[node1[0]][node2[0]] # ref [1] equation (6)
  76. return kernel
  77. def deltaKernel(condition):
  78. """Return 1 if condition holds, 0 otherwise.
  79. Parameters
  80. ----------
  81. condition : Boolean
  82. A condition, according to which the kernel is set to 1 or 0.
  83. Return
  84. ------
  85. Kernel : integer
  86. Delta Kernel.
  87. References
  88. ----------
  89. [1] H. Kashima, K. Tsuda, and A. Inokuchi. Marginalized kernels between labeled graphs. In Proceedings of the 20th International Conference on Machine Learning, Washington, DC, United States, 2003.
  90. """
  91. return (1 if condition else 0)

A Python package for graph kernels, graph edit distances and graph pre-image problem.