You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

randomWalkKernel.py 10 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319
  1. """
  2. @author: linlin
  3. @references: S Vichy N Vishwanathan, Nicol N Schraudolph, Risi Kondor, and Karsten M Borgwardt. Graph kernels. Journal of Machine Learning Research, 11(Apr):1201–1242, 2010.
  4. """
  5. import sys
  6. import pathlib
  7. sys.path.insert(0, "../")
  8. import time
  9. from tqdm import tqdm
  10. # from collections import Counter
  11. import networkx as nx
  12. import numpy as np
  13. from pygraph.utils.graphdataset import get_dataset_attributes
  14. def randomwalkkernel(*args,
  15. node_label='atom',
  16. edge_label='bond_type',
  17. edge_weight=None,
  18. h=10,
  19. p=None,
  20. q=None,
  21. weight=None,
  22. compute_method=''):
  23. """Calculate random walk graph kernels.
  24. Parameters
  25. ----------
  26. Gn : List of NetworkX graph
  27. List of graphs between which the kernels are calculated.
  28. /
  29. G1, G2 : NetworkX graphs
  30. 2 graphs between which the kernel is calculated.
  31. node_label : string
  32. node attribute used as label. The default node label is atom.
  33. edge_label : string
  34. edge attribute used as label. The default edge label is bond_type.
  35. h : integer
  36. Longest length of walks.
  37. method : string
  38. Method used to compute the random walk kernel. Available methods are 'sylvester', 'conjugate', 'fp', 'spectral' and 'kron'.
  39. Return
  40. ------
  41. Kmatrix : Numpy matrix
  42. Kernel matrix, each element of which is the path kernel up to d between 2 praphs.
  43. """
  44. compute_method = compute_method.lower()
  45. h = int(h)
  46. Gn = args[0] if len(args) == 1 else [args[0], args[1]]
  47. eweight = None
  48. if edge_weight == None:
  49. print('\n None edge weight specified. Set all weight to 1.\n')
  50. else:
  51. try:
  52. some_weight = list(
  53. nx.get_edge_attributes(Gn[0], edge_weight).values())[0]
  54. if isinstance(some_weight, float) or isinstance(some_weight, int):
  55. eweight = edge_weight
  56. else:
  57. print(
  58. '\n Edge weight with name %s is not float or integer. Set all weight to 1.\n'
  59. % edge_weight)
  60. except:
  61. print(
  62. '\n Edge weight with name "%s" is not found in the edge attributes. Set all weight to 1.\n'
  63. % edge_weight)
  64. ds_attrs = get_dataset_attributes(
  65. Gn,
  66. attr_names=['node_labeled', 'edge_labeled', 'is_directed'],
  67. node_label=node_label,
  68. edge_label=edge_label)
  69. if not ds_attrs['node_labeled']:
  70. for G in Gn:
  71. nx.set_node_attributes(G, '0', 'atom')
  72. if not ds_attrs['edge_labeled']:
  73. for G in Gn:
  74. nx.set_edge_attributes(G, '0', 'bond_type')
  75. start_time = time.time()
  76. # # get all paths of all graphs before calculating kernels to save time, but this may cost a lot of memory for large dataset.
  77. # all_walks = [
  78. # find_all_walks_until_length(
  79. # Gn[i],
  80. # n,
  81. # node_label=node_label,
  82. # edge_label=edge_label,
  83. # labeled=labeled) for i in range(0, len(Gn))
  84. # ]
  85. if compute_method == 'sylvester':
  86. import warnings
  87. warnings.warn(
  88. 'The Sylvester equation (rather than generalized Sylvester equation) is used; edge label number has to smaller than 3.'
  89. )
  90. Kmatrix = _randomwalkkernel_sylvester(Gn, weight, p, q, node_label,
  91. edge_label, eweight)
  92. elif compute_method == 'conjugate':
  93. for i in range(0, len(Gn)):
  94. for j in range(i, len(Gn)):
  95. Kmatrix[i][j] = _randomwalkkernel_conjugate(
  96. Gn[i], Gn[j], node_label, edge_label)
  97. Kmatrix[j][i] = Kmatrix[i][j]
  98. pbar.update(1)
  99. elif compute_method == 'fp':
  100. for i in range(0, len(Gn)):
  101. for j in range(i, len(Gn)):
  102. Kmatrix[i][j] = _randomwalkkernel_fp(Gn[i], Gn[j], node_label,
  103. edge_label)
  104. Kmatrix[j][i] = Kmatrix[i][j]
  105. pbar.update(1)
  106. elif compute_method == 'spectral':
  107. for i in range(0, len(Gn)):
  108. for j in range(i, len(Gn)):
  109. Kmatrix[i][j] = _randomwalkkernel_spectral(
  110. Gn[i], Gn[j], node_label, edge_label)
  111. Kmatrix[j][i] = Kmatrix[i][j]
  112. pbar.update(1)
  113. elif compute_method == 'kron':
  114. for i in range(0, len(Gn)):
  115. for j in range(i, len(Gn)):
  116. Kmatrix[i][j] = _randomwalkkernel_kron(Gn[i], Gn[j],
  117. node_label, edge_label)
  118. Kmatrix[j][i] = Kmatrix[i][j]
  119. pbar.update(1)
  120. else:
  121. raise Exception(
  122. 'compute method name incorrect. Available methods: "sylvester", "conjugate", "fp", "spectral" and "kron".'
  123. )
  124. # for i in range(0, len(Gn)):
  125. # for j in range(i, len(Gn)):
  126. # Kmatrix[i][j] = _randomwalkkernel_do(
  127. # all_walks[i],
  128. # all_walks[j],
  129. # node_label=node_label,
  130. # edge_label=edge_label,
  131. # labeled=labeled)
  132. # Kmatrix[j][i] = Kmatrix[i][j]
  133. run_time = time.time() - start_time
  134. print(
  135. "\n --- kernel matrix of random walk kernel of size %d built in %s seconds ---"
  136. % (len(Gn), run_time))
  137. return Kmatrix, run_time
  138. def _randomwalkkernel_sylvester(Gn, lmda, p, q, node_label, edge_label,
  139. eweight):
  140. """Calculate walk graph kernels up to n between 2 graphs using Sylvester method.
  141. Parameters
  142. ----------
  143. G1, G2 : NetworkX graph
  144. Graphs between which the kernel is calculated.
  145. node_label : string
  146. node attribute used as label.
  147. edge_label : string
  148. edge attribute used as label.
  149. Return
  150. ------
  151. kernel : float
  152. Kernel between 2 graphs.
  153. """
  154. from control import dlyap
  155. Kmatrix = np.zeros((len(Gn), len(Gn)))
  156. if q == None:
  157. # don't normalize adjacency matrices if q is a uniform vector.
  158. A_list = [
  159. nx.adjacency_matrix(G, eweight).todense() for G in tqdm(
  160. Gn, desc='compute adjacency matrices', file=sys.stdout)
  161. ]
  162. if p == None:
  163. pbar = tqdm(
  164. total=(1 + len(Gn)) * len(Gn) / 2,
  165. desc='calculating kernels',
  166. file=sys.stdout)
  167. for i in range(0, len(Gn)):
  168. for j in range(i, len(Gn)):
  169. A = lmda * A_list[j]
  170. Q = A_list[i]
  171. # use uniform distribution if there is no prior knowledge.
  172. nb_pd = len(A_list[i]) * len(A_list[j])
  173. pd_uni = 1 / nb_pd
  174. C = np.full((len(A_list[j]), len(A_list[i])), pd_uni)
  175. try:
  176. X = dlyap(A, Q, C)
  177. X = np.reshape(X, (-1, 1), order='F')
  178. # use uniform distribution if there is no prior knowledge.
  179. q_direct = np.full((1, nb_pd), pd_uni)
  180. Kmatrix[i][j] = np.dot(q_direct, X)
  181. except TypeError:
  182. # print('sth wrong.')
  183. Kmatrix[i][j] = np.nan
  184. Kmatrix[j][i] = Kmatrix[i][j]
  185. pbar.update(1)
  186. # A_list = []
  187. # for G in tqdm(Gn, desc='compute adjacency matrices', file=sys.stdout):
  188. # A_tilde = nx.adjacency_matrix(G, weight=None).todense()
  189. # # normalized adjacency matrices
  190. # # A_list.append(A_tilde / A_tilde.sum(axis=0))
  191. # A_list.append(A_tilde)
  192. return Kmatrix
  193. def _randomwalkkernel_conjugate(G1, G2, node_label, edge_label):
  194. """Calculate walk graph kernels up to n between 2 graphs using conjugate method.
  195. Parameters
  196. ----------
  197. G1, G2 : NetworkX graph
  198. Graphs between which the kernel is calculated.
  199. node_label : string
  200. node attribute used as label.
  201. edge_label : string
  202. edge attribute used as label.
  203. Return
  204. ------
  205. kernel : float
  206. Kernel between 2 graphs.
  207. """
  208. dpg = nx.tensor_product(G1, G2) # direct product graph
  209. import matplotlib.pyplot as plt
  210. nx.draw_networkx(G1)
  211. plt.show()
  212. nx.draw_networkx(G2)
  213. plt.show()
  214. nx.draw_networkx(dpg)
  215. plt.show()
  216. X = dlyap(A, Q, C)
  217. return kernel
  218. def _randomwalkkernel_fp(G1, G2, node_label, edge_label):
  219. """Calculate walk graph kernels up to n between 2 graphs using Fixed-Point method.
  220. Parameters
  221. ----------
  222. G1, G2 : NetworkX graph
  223. Graphs between which the kernel is calculated.
  224. node_label : string
  225. node attribute used as label.
  226. edge_label : string
  227. edge attribute used as label.
  228. Return
  229. ------
  230. kernel : float
  231. Kernel between 2 graphs.
  232. """
  233. dpg = nx.tensor_product(G1, G2) # direct product graph
  234. X = dlyap(A, Q, C)
  235. return kernel
  236. def _randomwalkkernel_spectral(G1, G2, node_label, edge_label):
  237. """Calculate walk graph kernels up to n between 2 graphs using spectral decomposition method.
  238. Parameters
  239. ----------
  240. G1, G2 : NetworkX graph
  241. Graphs between which the kernel is calculated.
  242. node_label : string
  243. node attribute used as label.
  244. edge_label : string
  245. edge attribute used as label.
  246. Return
  247. ------
  248. kernel : float
  249. Kernel between 2 graphs.
  250. """
  251. dpg = nx.tensor_product(G1, G2) # direct product graph
  252. X = dlyap(A, Q, C)
  253. return kernel
  254. def _randomwalkkernel_kron(G1, G2, node_label, edge_label):
  255. """Calculate walk graph kernels up to n between 2 graphs using nearest Kronecker product approximation method.
  256. Parameters
  257. ----------
  258. G1, G2 : NetworkX graph
  259. Graphs between which the kernel is calculated.
  260. node_label : string
  261. node attribute used as label.
  262. edge_label : string
  263. edge attribute used as label.
  264. Return
  265. ------
  266. kernel : float
  267. Kernel between 2 graphs.
  268. """
  269. dpg = nx.tensor_product(G1, G2) # direct product graph
  270. X = dlyap(A, Q, C)
  271. return kernel

A Python package for graph kernels, graph edit distances and graph pre-image problem.