You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

randomWalkKernel.py 5.0 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149
  1. """
  2. @author: linlin
  3. @references: S Vichy N Vishwanathan, Nicol N Schraudolph, Risi Kondor, and Karsten M Borgwardt. Graph kernels. Journal of Machine Learning Research, 11(Apr):1201–1242, 2010.
  4. """
  5. import sys
  6. import pathlib
  7. sys.path.insert(0, "../")
  8. import time
  9. from tqdm import tqdm
  10. # from collections import Counter
  11. import networkx as nx
  12. import numpy as np
  13. from pygraph.utils.graphdataset import get_dataset_attributes
  14. def randomwalkkernel(*args,
  15. node_label='atom',
  16. edge_label='bond_type',
  17. h=10,
  18. compute_method=''):
  19. """Calculate random walk graph kernels.
  20. Parameters
  21. ----------
  22. Gn : List of NetworkX graph
  23. List of graphs between which the kernels are calculated.
  24. /
  25. G1, G2 : NetworkX graphs
  26. 2 graphs between which the kernel is calculated.
  27. node_label : string
  28. node attribute used as label. The default node label is atom.
  29. edge_label : string
  30. edge attribute used as label. The default edge label is bond_type.
  31. n : integer
  32. Longest length of walks.
  33. method : string
  34. Method used to compute the random walk kernel. Available methods are 'sylvester', 'conjugate', 'fp', 'spectral' and 'kron'.
  35. Return
  36. ------
  37. Kmatrix : Numpy matrix
  38. Kernel matrix, each element of which is the path kernel up to d between 2 praphs.
  39. """
  40. compute_method = compute_method.lower()
  41. h = int(h)
  42. Gn = args[0] if len(args) == 1 else [args[0], args[1]]
  43. Kmatrix = np.zeros((len(Gn), len(Gn)))
  44. ds_attrs = get_dataset_attributes(
  45. Gn,
  46. attr_names=['node_labeled', 'edge_labeled', 'is_directed'],
  47. node_label=node_label,
  48. edge_label=edge_label)
  49. if not ds_attrs['node_labeled']:
  50. for G in Gn:
  51. nx.set_node_attributes(G, '0', 'atom')
  52. if not ds_attrs['edge_labeled']:
  53. for G in Gn:
  54. nx.set_edge_attributes(G, '0', 'bond_type')
  55. start_time = time.time()
  56. # # get all paths of all graphs before calculating kernels to save time, but this may cost a lot of memory for large dataset.
  57. # all_walks = [
  58. # find_all_walks_until_length(
  59. # Gn[i],
  60. # n,
  61. # node_label=node_label,
  62. # edge_label=edge_label,
  63. # labeled=labeled) for i in range(0, len(Gn))
  64. # ]
  65. pbar = tqdm(
  66. total=(1 + len(Gn)) * len(Gn) / 2,
  67. desc='calculating kernels',
  68. file=sys.stdout)
  69. if compute_method == 'sylvester':
  70. import warnings
  71. warnings.warn(
  72. 'The Sylvester equation (rather than generalized Sylvester equation) is used; only walks of length 1 is considered.'
  73. )
  74. from control import dlyap
  75. for i in range(0, len(Gn)):
  76. for j in range(i, len(Gn)):
  77. Kmatrix[i][j] = _randomwalkkernel_sylvester(
  78. all_walks[i],
  79. all_walks[j],
  80. node_label=node_label,
  81. edge_label=edge_label)
  82. Kmatrix[j][i] = Kmatrix[i][j]
  83. pbar.update(1)
  84. elif compute_method == 'conjugate':
  85. pass
  86. elif compute_method == 'fp':
  87. pass
  88. elif compute_method == 'spectral':
  89. pass
  90. elif compute_method == 'kron':
  91. pass
  92. else:
  93. raise Exception(
  94. 'compute method name incorrect. Available methods: "sylvester", "conjugate", "fp", "spectral" and "kron".'
  95. )
  96. for i in range(0, len(Gn)):
  97. for j in range(i, len(Gn)):
  98. Kmatrix[i][j] = _randomwalkkernel_do(
  99. all_walks[i],
  100. all_walks[j],
  101. node_label=node_label,
  102. edge_label=edge_label,
  103. labeled=labeled)
  104. Kmatrix[j][i] = Kmatrix[i][j]
  105. run_time = time.time() - start_time
  106. print(
  107. "\n --- kernel matrix of walk kernel up to %d of size %d built in %s seconds ---"
  108. % (n, len(Gn), run_time))
  109. return Kmatrix, run_time
  110. def _randomwalkkernel_sylvester(walks1,
  111. walks2,
  112. node_label='atom',
  113. edge_label='bond_type'):
  114. """Calculate walk graph kernels up to n between 2 graphs using Sylvester method.
  115. Parameters
  116. ----------
  117. walks1, walks2 : list
  118. List of walks in 2 graphs, where for unlabeled graphs, each walk is represented by a list of nodes; while for labeled graphs, each walk is represented by a string consists of labels of nodes and edges on that walk.
  119. node_label : string
  120. node attribute used as label. The default node label is atom.
  121. edge_label : string
  122. edge attribute used as label. The default edge label is bond_type.
  123. Return
  124. ------
  125. kernel : float
  126. Treelet Kernel between 2 graphs.
  127. """
  128. dpg = nx.tensor_product(G1, G2) # direct product graph
  129. X = dlyap(A, Q, C)
  130. return kernel

A Python package for graph kernels, graph edit distances and graph pre-image problem.