You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

weisfeiler_lehman.py 22 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623
  1. #!/usr/bin/env python3
  2. # -*- coding: utf-8 -*-
  3. """
  4. Created on Tue Apr 14 15:16:34 2020
  5. @author: ljia
  6. @references:
  7. [1] Shervashidze N, Schweitzer P, Leeuwen EJ, Mehlhorn K, Borgwardt KM.
  8. Weisfeiler-lehman graph kernels. Journal of Machine Learning Research.
  9. 2011;12(Sep):2539-61.
  10. """
  11. import numpy as np
  12. import networkx as nx
  13. import sys
  14. from collections import Counter
  15. # from functools import partial
  16. from itertools import combinations_with_replacement
  17. from gklearn.utils import SpecialLabel
  18. from gklearn.utils.parallel import parallel_gm, parallel_me
  19. from gklearn.kernels import GraphKernel
  20. from gklearn.utils.iters import get_iters
  21. class WeisfeilerLehman(GraphKernel): # @todo: sp, edge user kernel.
  22. def __init__(self, **kwargs):
  23. GraphKernel.__init__(self)
  24. self._node_labels = kwargs.get('node_labels', [])
  25. self._edge_labels = kwargs.get('edge_labels', [])
  26. self._height = int(kwargs.get('height', 0))
  27. self._base_kernel = kwargs.get('base_kernel', 'subtree')
  28. self._ds_infos = kwargs.get('ds_infos', {})
  29. def _compute_gm_series(self):
  30. # if self.verbose >= 2:
  31. # import warnings
  32. # warnings.warn('A part of the computation is parallelized.')
  33. self._add_dummy_node_labels(self._graphs)
  34. # for WL subtree kernel
  35. if self._base_kernel == 'subtree':
  36. gram_matrix = self._subtree_kernel_do(self._graphs)
  37. # for WL shortest path kernel
  38. elif self._base_kernel == 'sp':
  39. gram_matrix = self._sp_kernel_do(self._graphs)
  40. # for WL edge kernel
  41. elif self._base_kernel == 'edge':
  42. gram_matrix = self._edge_kernel_do(self._graphs)
  43. # for user defined base kernel
  44. else:
  45. gram_matrix = self._user_kernel_do(self._graphs)
  46. return gram_matrix
  47. def _compute_gm_imap_unordered(self):
  48. self._add_dummy_node_labels(self._graphs)
  49. if self._base_kernel == 'subtree':
  50. gram_matrix = np.zeros((len(self._graphs), len(self._graphs)))
  51. # for i in range(len(self._graphs)):
  52. # for j in range(i, len(self._graphs)):
  53. # gram_matrix[i][j] = self.pairwise_kernel(self._graphs[i], self._graphs[j])
  54. # gram_matrix[j][i] = gram_matrix[i][j]
  55. def init_worker(gn_toshare):
  56. global G_gn
  57. G_gn = gn_toshare
  58. do_fun = self._wrapper_pairwise
  59. parallel_gm(do_fun, gram_matrix, self._graphs, init_worker=init_worker,
  60. glbv=(self._graphs,), n_jobs=self.n_jobs, verbose=self.verbose)
  61. return gram_matrix
  62. else:
  63. if self.verbose >= 2:
  64. import warnings
  65. warnings.warn('This base kernel is not parallelized. The serial computation is used instead.')
  66. return self._compute_gm_series()
  67. def _compute_kernel_list_series(self, g1, g_list): # @todo: this should be better.
  68. # if self.verbose >= 2:
  69. # import warnings
  70. # warnings.warn('A part of the computation is parallelized.')
  71. self._add_dummy_node_labels(g_list + [g1])
  72. # for WL subtree kernel
  73. if self._base_kernel == 'subtree':
  74. gram_matrix = self._subtree_kernel_do(g_list + [g1])
  75. # for WL shortest path kernel
  76. elif self._base_kernel == 'sp':
  77. gram_matrix = self._sp_kernel_do(g_list + [g1])
  78. # for WL edge kernel
  79. elif self._base_kernel == 'edge':
  80. gram_matrix = self._edge_kernel_do(g_list + [g1])
  81. # for user defined base kernel
  82. else:
  83. gram_matrix = self._user_kernel_do(g_list + [g1])
  84. return list(gram_matrix[-1][0:-1])
  85. def _compute_kernel_list_imap_unordered(self, g1, g_list):
  86. self._add_dummy_node_labels(g_list + [g1])
  87. if self._base_kernel == 'subtree':
  88. kernel_list = [None] * len(g_list)
  89. def init_worker(g1_toshare, g_list_toshare):
  90. global G_g1, G_g_list
  91. G_g1 = g1_toshare
  92. G_g_list = g_list_toshare
  93. do_fun = self._wrapper_kernel_list_do
  94. def func_assign(result, var_to_assign):
  95. var_to_assign[result[0]] = result[1]
  96. itr = range(len(g_list))
  97. len_itr = len(g_list)
  98. parallel_me(do_fun, func_assign, kernel_list, itr, len_itr=len_itr,
  99. init_worker=init_worker, glbv=(g1, g_list), method='imap_unordered',
  100. n_jobs=self.n_jobs, itr_desc='Computing kernels', verbose=self.verbose)
  101. return kernel_list
  102. else:
  103. if self.verbose >= 2:
  104. import warnings
  105. warnings.warn('This base kernel is not parallelized. The serial computation is used instead.')
  106. return self._compute_kernel_list_series(g1, g_list)
  107. def _wrapper_kernel_list_do(self, itr):
  108. return itr, self.pairwise_kernel(G_g1, G_g_list[itr])
  109. def _compute_single_kernel_series(self, g1, g2): # @todo: this should be better.
  110. self._add_dummy_node_labels([g1] + [g2])
  111. # for WL subtree kernel
  112. if self._base_kernel == 'subtree':
  113. gram_matrix = self._subtree_kernel_do([g1] + [g2])
  114. # for WL shortest path kernel
  115. elif self._base_kernel == 'sp':
  116. gram_matrix = self._sp_kernel_do([g1] + [g2])
  117. # for WL edge kernel
  118. elif self._base_kernel == 'edge':
  119. gram_matrix = self._edge_kernel_do([g1] + [g2])
  120. # for user defined base kernel
  121. else:
  122. gram_matrix = self._user_kernel_do([g1] + [g2])
  123. return gram_matrix[0][1]
  124. def pairwise_kernel(self, g1, g2):
  125. Gn = [g1.copy(), g2.copy()] # @todo: make sure it is a full deep copy. and faster!
  126. kernel = 0
  127. # initial for height = 0
  128. all_num_of_each_label = [] # number of occurence of each label in each graph in this iteration
  129. # for each graph
  130. for G in Gn:
  131. # set all labels into a tuple.
  132. for nd, attrs in G.nodes(data=True): # @todo: there may be a better way.
  133. G.nodes[nd]['label_tuple'] = tuple(attrs[name] for name in self._node_labels)
  134. # get the set of original labels
  135. labels_ori = list(nx.get_node_attributes(G, 'label_tuple').values())
  136. # number of occurence of each label in G
  137. all_num_of_each_label.append(dict(Counter(labels_ori)))
  138. # Compute subtree kernel with the 0th iteration and add it to the final kernel.
  139. kernel = self._compute_kernel_itr(kernel, all_num_of_each_label)
  140. # iterate each height
  141. for h in range(1, self._height + 1):
  142. all_set_compressed = {} # a dictionary mapping original labels to new ones in all graphs in this iteration
  143. num_of_labels_occured = 0 # number of the set of letters that occur before as node labels at least once in all graphs
  144. # all_labels_ori = set() # all unique orignal labels in all graphs in this iteration
  145. all_num_of_each_label = [] # number of occurence of each label in G
  146. # @todo: parallel this part.
  147. for idx, G in enumerate(Gn):
  148. all_multisets = []
  149. for node, attrs in G.nodes(data=True):
  150. # Multiset-label determination.
  151. multiset = [G.nodes[neighbors]['label_tuple'] for neighbors in G[node]]
  152. # sorting each multiset
  153. multiset.sort()
  154. multiset = [attrs['label_tuple']] + multiset # add the prefix
  155. all_multisets.append(tuple(multiset))
  156. # label compression
  157. set_unique = list(set(all_multisets)) # set of unique multiset labels
  158. # a dictionary mapping original labels to new ones.
  159. set_compressed = {}
  160. # if a label occured before, assign its former compressed label,
  161. # else assign the number of labels occured + 1 as the compressed label.
  162. for value in set_unique:
  163. if value in all_set_compressed.keys():
  164. set_compressed.update({value: all_set_compressed[value]})
  165. else:
  166. set_compressed.update({value: str(num_of_labels_occured + 1)})
  167. num_of_labels_occured += 1
  168. all_set_compressed.update(set_compressed)
  169. # relabel nodes
  170. for idx, node in enumerate(G.nodes()):
  171. G.nodes[node]['label_tuple'] = set_compressed[all_multisets[idx]]
  172. # get the set of compressed labels
  173. labels_comp = list(nx.get_node_attributes(G, 'label_tuple').values())
  174. # all_labels_ori.update(labels_comp)
  175. all_num_of_each_label.append(dict(Counter(labels_comp)))
  176. # Compute subtree kernel with h iterations and add it to the final kernel
  177. kernel = self._compute_kernel_itr(kernel, all_num_of_each_label)
  178. return kernel
  179. def _wrapper_pairwise(self, itr):
  180. i = itr[0]
  181. j = itr[1]
  182. return i, j, self.pairwise_kernel(G_gn[i], G_gn[j])
  183. def _compute_kernel_itr(self, kernel, all_num_of_each_label):
  184. labels = set(list(all_num_of_each_label[0].keys()) +
  185. list(all_num_of_each_label[1].keys()))
  186. vector1 = np.array([(all_num_of_each_label[0][label]
  187. if (label in all_num_of_each_label[0].keys()) else 0)
  188. for label in labels])
  189. vector2 = np.array([(all_num_of_each_label[1][label]
  190. if (label in all_num_of_each_label[1].keys()) else 0)
  191. for label in labels])
  192. kernel += np.dot(vector1, vector2)
  193. return kernel
  194. def _subtree_kernel_do(self, Gn):
  195. """Compute Weisfeiler-Lehman kernels between graphs.
  196. Parameters
  197. ----------
  198. Gn : List of NetworkX graph
  199. List of graphs between which the kernels are computed.
  200. Return
  201. ------
  202. gram_matrix : Numpy matrix
  203. Kernel matrix, each element of which is the Weisfeiler-Lehman kernel between 2 praphs.
  204. """
  205. gram_matrix = np.zeros((len(Gn), len(Gn)))
  206. # initial for height = 0
  207. all_num_of_each_label = [] # number of occurence of each label in each graph in this iteration
  208. # for each graph
  209. if self.verbose >= 2:
  210. iterator = get_iters(Gn, desc='Setting all labels into a tuple')
  211. else:
  212. iterator = Gn
  213. for G in iterator:
  214. # set all labels into a tuple.
  215. for nd, attrs in G.nodes(data=True): # @todo: there may be a better way.
  216. G.nodes[nd]['label_tuple'] = tuple(attrs[name] for name in self._node_labels)
  217. # get the set of original labels
  218. labels_ori = list(nx.get_node_attributes(G, 'label_tuple').values())
  219. # number of occurence of each label in G
  220. all_num_of_each_label.append(dict(Counter(labels_ori)))
  221. # Compute subtree kernel with the 0th iteration and add it to the final kernel.
  222. self._compute_gram_itr(gram_matrix, all_num_of_each_label)
  223. # iterate each height
  224. for h in range(1, self._height + 1):
  225. all_set_compressed = {} # a dictionary mapping original labels to new ones in all graphs in this iteration
  226. num_of_labels_occured = 0 # number of the set of letters that occur before as node labels at least once in all graphs
  227. # all_labels_ori = set() # all unique orignal labels in all graphs in this iteration
  228. all_num_of_each_label = [] # number of occurence of each label in G
  229. # @todo: parallel this part.
  230. # if self.verbose >= 2:
  231. # iterator = get_iters(enumerate(Gn), desc='Going through iteration ' + str(h), length=len(Gn))
  232. # else:
  233. # iterator = enumerate(Gn)
  234. for idx, G in enumerate(Gn):
  235. all_multisets = []
  236. for node, attrs in G.nodes(data=True):
  237. # Multiset-label determination.
  238. multiset = [G.nodes[neighbors]['label_tuple'] for neighbors in G[node]]
  239. # sorting each multiset
  240. multiset.sort()
  241. multiset = [attrs['label_tuple']] + multiset # add the prefix
  242. all_multisets.append(tuple(multiset))
  243. # label compression
  244. set_unique = list(set(all_multisets)) # set of unique multiset labels
  245. # a dictionary mapping original labels to new ones.
  246. set_compressed = {}
  247. # if a label occured before, assign its former compressed label,
  248. # else assign the number of labels occured + 1 as the compressed label.
  249. for value in set_unique:
  250. if value in all_set_compressed.keys():
  251. set_compressed.update({value: all_set_compressed[value]})
  252. else:
  253. set_compressed.update({value: str(num_of_labels_occured + 1)})
  254. num_of_labels_occured += 1
  255. all_set_compressed.update(set_compressed)
  256. # relabel nodes
  257. for idx, node in enumerate(G.nodes()):
  258. G.nodes[node]['label_tuple'] = set_compressed[all_multisets[idx]]
  259. # get the set of compressed labels
  260. labels_comp = list(nx.get_node_attributes(G, 'label_tuple').values())
  261. # all_labels_ori.update(labels_comp)
  262. all_num_of_each_label.append(dict(Counter(labels_comp)))
  263. # Compute subtree kernel with h iterations and add it to the final kernel
  264. self._compute_gram_itr(gram_matrix, all_num_of_each_label)
  265. return gram_matrix
  266. def _compute_gram_itr(self, gram_matrix, all_num_of_each_label):
  267. """Compute Gram matrix using the base kernel.
  268. """
  269. # if self.parallel == 'imap_unordered':
  270. # # compute kernels.
  271. # def init_worker(alllabels_toshare):
  272. # global G_alllabels
  273. # G_alllabels = alllabels_toshare
  274. # do_partial = partial(self._wrapper_compute_subtree_kernel, gram_matrix)
  275. # parallel_gm(do_partial, gram_matrix, Gn, init_worker=init_worker,
  276. # glbv=(all_num_of_each_label,), n_jobs=self.n_jobs, verbose=self.verbose)
  277. # elif self.parallel is None:
  278. itr = combinations_with_replacement(range(0, len(gram_matrix)), 2)
  279. len_itr = int(len(gram_matrix) * (len(gram_matrix) + 1) / 2)
  280. iterator = get_iters(itr, desc='Computing Gram matrix for this iteration', file=sys.stdout, length=len_itr, verbose=(self.verbose >= 2))
  281. for i, j in iterator:
  282. # for i in iterator:
  283. # for j in range(i, len(gram_matrix)):
  284. gram_matrix[i][j] = self._compute_subtree_kernel(all_num_of_each_label[i],
  285. all_num_of_each_label[j], gram_matrix[i][j])
  286. gram_matrix[j][i] = gram_matrix[i][j]
  287. def _compute_subtree_kernel(self, num_of_each_label1, num_of_each_label2, kernel):
  288. """Compute the subtree kernel.
  289. """
  290. labels = set(list(num_of_each_label1.keys()) + list(num_of_each_label2.keys()))
  291. vector1 = np.array([(num_of_each_label1[label]
  292. if (label in num_of_each_label1.keys()) else 0)
  293. for label in labels])
  294. vector2 = np.array([(num_of_each_label2[label]
  295. if (label in num_of_each_label2.keys()) else 0)
  296. for label in labels])
  297. kernel += np.dot(vector1, vector2)
  298. return kernel
  299. # def _wrapper_compute_subtree_kernel(self, gram_matrix, itr):
  300. # i = itr[0]
  301. # j = itr[1]
  302. # return i, j, self._compute_subtree_kernel(G_alllabels[i], G_alllabels[j], gram_matrix[i][j])
  303. def _wl_spkernel_do(Gn, node_label, edge_label, height):
  304. """Compute Weisfeiler-Lehman shortest path kernels between graphs.
  305. Parameters
  306. ----------
  307. Gn : List of NetworkX graph
  308. List of graphs between which the kernels are computed.
  309. node_label : string
  310. node attribute used as label.
  311. edge_label : string
  312. edge attribute used as label.
  313. height : int
  314. subtree height.
  315. Return
  316. ------
  317. gram_matrix : Numpy matrix
  318. Kernel matrix, each element of which is the Weisfeiler-Lehman kernel between 2 praphs.
  319. """
  320. pass
  321. from gklearn.utils.utils import getSPGraph
  322. # init.
  323. height = int(height)
  324. gram_matrix = np.zeros((len(Gn), len(Gn))) # init kernel
  325. Gn = [ getSPGraph(G, edge_weight = edge_label) for G in Gn ] # get shortest path graphs of Gn
  326. # initial for height = 0
  327. for i in range(0, len(Gn)):
  328. for j in range(i, len(Gn)):
  329. for e1 in Gn[i].edges(data = True):
  330. for e2 in Gn[j].edges(data = True):
  331. if e1[2]['cost'] != 0 and e1[2]['cost'] == e2[2]['cost'] and ((e1[0] == e2[0] and e1[1] == e2[1]) or (e1[0] == e2[1] and e1[1] == e2[0])):
  332. gram_matrix[i][j] += 1
  333. gram_matrix[j][i] = gram_matrix[i][j]
  334. # iterate each height
  335. for h in range(1, height + 1):
  336. all_set_compressed = {} # a dictionary mapping original labels to new ones in all graphs in this iteration
  337. num_of_labels_occured = 0 # number of the set of letters that occur before as node labels at least once in all graphs
  338. for G in Gn: # for each graph
  339. set_multisets = []
  340. for node in G.nodes(data = True):
  341. # Multiset-label determination.
  342. multiset = [ G.node[neighbors][node_label] for neighbors in G[node[0]] ]
  343. # sorting each multiset
  344. multiset.sort()
  345. multiset = node[1][node_label] + ''.join(multiset) # concatenate to a string and add the prefix
  346. set_multisets.append(multiset)
  347. # label compression
  348. set_unique = list(set(set_multisets)) # set of unique multiset labels
  349. # a dictionary mapping original labels to new ones.
  350. set_compressed = {}
  351. # if a label occured before, assign its former compressed label, else assign the number of labels occured + 1 as the compressed label
  352. for value in set_unique:
  353. if value in all_set_compressed.keys():
  354. set_compressed.update({ value : all_set_compressed[value] })
  355. else:
  356. set_compressed.update({ value : str(num_of_labels_occured + 1) })
  357. num_of_labels_occured += 1
  358. all_set_compressed.update(set_compressed)
  359. # relabel nodes
  360. for node in G.nodes(data = True):
  361. node[1][node_label] = set_compressed[set_multisets[node[0]]]
  362. # Compute subtree kernel with h iterations and add it to the final kernel
  363. for i in range(0, len(Gn)):
  364. for j in range(i, len(Gn)):
  365. for e1 in Gn[i].edges(data = True):
  366. for e2 in Gn[j].edges(data = True):
  367. if e1[2]['cost'] != 0 and e1[2]['cost'] == e2[2]['cost'] and ((e1[0] == e2[0] and e1[1] == e2[1]) or (e1[0] == e2[1] and e1[1] == e2[0])):
  368. gram_matrix[i][j] += 1
  369. gram_matrix[j][i] = gram_matrix[i][j]
  370. return gram_matrix
  371. def _wl_edgekernel_do(Gn, node_label, edge_label, height):
  372. """Compute Weisfeiler-Lehman edge kernels between graphs.
  373. Parameters
  374. ----------
  375. Gn : List of NetworkX graph
  376. List of graphs between which the kernels are computed.
  377. node_label : string
  378. node attribute used as label.
  379. edge_label : string
  380. edge attribute used as label.
  381. height : int
  382. subtree height.
  383. Return
  384. ------
  385. gram_matrix : Numpy matrix
  386. Kernel matrix, each element of which is the Weisfeiler-Lehman kernel between 2 praphs.
  387. """
  388. pass
  389. # init.
  390. height = int(height)
  391. gram_matrix = np.zeros((len(Gn), len(Gn))) # init kernel
  392. # initial for height = 0
  393. for i in range(0, len(Gn)):
  394. for j in range(i, len(Gn)):
  395. for e1 in Gn[i].edges(data = True):
  396. for e2 in Gn[j].edges(data = True):
  397. if e1[2][edge_label] == e2[2][edge_label] and ((e1[0] == e2[0] and e1[1] == e2[1]) or (e1[0] == e2[1] and e1[1] == e2[0])):
  398. gram_matrix[i][j] += 1
  399. gram_matrix[j][i] = gram_matrix[i][j]
  400. # iterate each height
  401. for h in range(1, height + 1):
  402. all_set_compressed = {} # a dictionary mapping original labels to new ones in all graphs in this iteration
  403. num_of_labels_occured = 0 # number of the set of letters that occur before as node labels at least once in all graphs
  404. for G in Gn: # for each graph
  405. set_multisets = []
  406. for node in G.nodes(data = True):
  407. # Multiset-label determination.
  408. multiset = [ G.node[neighbors][node_label] for neighbors in G[node[0]] ]
  409. # sorting each multiset
  410. multiset.sort()
  411. multiset = node[1][node_label] + ''.join(multiset) # concatenate to a string and add the prefix
  412. set_multisets.append(multiset)
  413. # label compression
  414. set_unique = list(set(set_multisets)) # set of unique multiset labels
  415. # a dictionary mapping original labels to new ones.
  416. set_compressed = {}
  417. # if a label occured before, assign its former compressed label, else assign the number of labels occured + 1 as the compressed label
  418. for value in set_unique:
  419. if value in all_set_compressed.keys():
  420. set_compressed.update({ value : all_set_compressed[value] })
  421. else:
  422. set_compressed.update({ value : str(num_of_labels_occured + 1) })
  423. num_of_labels_occured += 1
  424. all_set_compressed.update(set_compressed)
  425. # relabel nodes
  426. for node in G.nodes(data = True):
  427. node[1][node_label] = set_compressed[set_multisets[node[0]]]
  428. # Compute subtree kernel with h iterations and add it to the final kernel
  429. for i in range(0, len(Gn)):
  430. for j in range(i, len(Gn)):
  431. for e1 in Gn[i].edges(data = True):
  432. for e2 in Gn[j].edges(data = True):
  433. if e1[2][edge_label] == e2[2][edge_label] and ((e1[0] == e2[0] and e1[1] == e2[1]) or (e1[0] == e2[1] and e1[1] == e2[0])):
  434. gram_matrix[i][j] += 1
  435. gram_matrix[j][i] = gram_matrix[i][j]
  436. return gram_matrix
  437. def _wl_userkernel_do(Gn, node_label, edge_label, height, base_kernel):
  438. """Compute Weisfeiler-Lehman kernels based on user-defined kernel between graphs.
  439. Parameters
  440. ----------
  441. Gn : List of NetworkX graph
  442. List of graphs between which the kernels are computed.
  443. node_label : string
  444. node attribute used as label.
  445. edge_label : string
  446. edge attribute used as label.
  447. height : int
  448. subtree height.
  449. base_kernel : string
  450. Name of the base kernel function used in each iteration of WL kernel. This function returns a Numpy matrix, each element of which is the user-defined Weisfeiler-Lehman kernel between 2 praphs.
  451. Return
  452. ------
  453. gram_matrix : Numpy matrix
  454. Kernel matrix, each element of which is the Weisfeiler-Lehman kernel between 2 praphs.
  455. """
  456. pass
  457. # init.
  458. height = int(height)
  459. gram_matrix = np.zeros((len(Gn), len(Gn))) # init kernel
  460. # initial for height = 0
  461. gram_matrix = base_kernel(Gn, node_label, edge_label)
  462. # iterate each height
  463. for h in range(1, height + 1):
  464. all_set_compressed = {} # a dictionary mapping original labels to new ones in all graphs in this iteration
  465. num_of_labels_occured = 0 # number of the set of letters that occur before as node labels at least once in all graphs
  466. for G in Gn: # for each graph
  467. set_multisets = []
  468. for node in G.nodes(data = True):
  469. # Multiset-label determination.
  470. multiset = [ G.node[neighbors][node_label] for neighbors in G[node[0]] ]
  471. # sorting each multiset
  472. multiset.sort()
  473. multiset = node[1][node_label] + ''.join(multiset) # concatenate to a string and add the prefix
  474. set_multisets.append(multiset)
  475. # label compression
  476. set_unique = list(set(set_multisets)) # set of unique multiset labels
  477. # a dictionary mapping original labels to new ones.
  478. set_compressed = {}
  479. # if a label occured before, assign its former compressed label, else assign the number of labels occured + 1 as the compressed label
  480. for value in set_unique:
  481. if value in all_set_compressed.keys():
  482. set_compressed.update({ value : all_set_compressed[value] })
  483. else:
  484. set_compressed.update({ value : str(num_of_labels_occured + 1) })
  485. num_of_labels_occured += 1
  486. all_set_compressed.update(set_compressed)
  487. # relabel nodes
  488. for node in G.nodes(data = True):
  489. node[1][node_label] = set_compressed[set_multisets[node[0]]]
  490. # Compute kernel with h iterations and add it to the final kernel
  491. gram_matrix += base_kernel(Gn, node_label, edge_label)
  492. return gram_matrix
  493. def _add_dummy_node_labels(self, Gn):
  494. if len(self._node_labels) == 0 or (len(self._node_labels) == 1 and self._node_labels[0] == SpecialLabel.DUMMY):
  495. for i in range(len(Gn)):
  496. nx.set_node_attributes(Gn[i], '0', SpecialLabel.DUMMY)
  497. self._node_labels = [SpecialLabel.DUMMY]
  498. class WLSubtree(WeisfeilerLehman):
  499. def __init__(self, **kwargs):
  500. kwargs['base_kernel'] = 'subtree'
  501. super().__init__(**kwargs)

A Python package for graph kernels, graph edit distances and graph pre-image problem.