You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

weisfeiler_lehman.py 22 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603
  1. #!/usr/bin/env python3
  2. # -*- coding: utf-8 -*-
  3. """
  4. Created on Tue Apr 14 15:16:34 2020
  5. @author: ljia
  6. @references:
  7. [1] Shervashidze N, Schweitzer P, Leeuwen EJ, Mehlhorn K, Borgwardt KM.
  8. Weisfeiler-lehman graph kernels. Journal of Machine Learning Research.
  9. 2011;12(Sep):2539-61.
  10. """
  11. import numpy as np
  12. import networkx as nx
  13. from collections import Counter
  14. # from functools import partial
  15. from gklearn.utils import SpecialLabel
  16. from gklearn.utils.parallel import parallel_gm, parallel_me
  17. from gklearn.kernels import GraphKernel
  18. class WeisfeilerLehman(GraphKernel): # @todo: sp, edge user kernel.
  19. def __init__(self, **kwargs):
  20. GraphKernel.__init__(self)
  21. self._node_labels = kwargs.get('node_labels', [])
  22. self._edge_labels = kwargs.get('edge_labels', [])
  23. self._height = int(kwargs.get('height', 0))
  24. self._base_kernel = kwargs.get('base_kernel', 'subtree')
  25. self._ds_infos = kwargs.get('ds_infos', {})
  26. def _compute_gm_series(self):
  27. # if self._verbose >= 2:
  28. # import warnings
  29. # warnings.warn('A part of the computation is parallelized.')
  30. self._add_dummy_node_labels(self._graphs)
  31. # for WL subtree kernel
  32. if self._base_kernel == 'subtree':
  33. gram_matrix = self._subtree_kernel_do(self._graphs)
  34. # for WL shortest path kernel
  35. elif self._base_kernel == 'sp':
  36. gram_matrix = self._sp_kernel_do(self._graphs)
  37. # for WL edge kernel
  38. elif self._base_kernel == 'edge':
  39. gram_matrix = self._edge_kernel_do(self._graphs)
  40. # for user defined base kernel
  41. else:
  42. gram_matrix = self._user_kernel_do(self._graphs)
  43. return gram_matrix
  44. def _compute_gm_imap_unordered(self):
  45. self._add_dummy_node_labels(self._graphs)
  46. if self._base_kernel == 'subtree':
  47. gram_matrix = np.zeros((len(self._graphs), len(self._graphs)))
  48. def init_worker(gn_toshare):
  49. global G_gn
  50. G_gn = gn_toshare
  51. do_fun = self._wrapper_pairwise
  52. parallel_gm(do_fun, gram_matrix, self._graphs, init_worker=init_worker,
  53. glbv=(self._graphs,), n_jobs=self._n_jobs, verbose=self._verbose)
  54. return gram_matrix
  55. else:
  56. if self._verbose >= 2:
  57. import warnings
  58. warnings.warn('This base kernel is not parallelized. The serial computation is used instead.')
  59. return self._compute_gm_series()
  60. def _compute_kernel_list_series(self, g1, g_list): # @todo: this should be better.
  61. # if self._verbose >= 2:
  62. # import warnings
  63. # warnings.warn('A part of the computation is parallelized.')
  64. self._add_dummy_node_labels(g_list + [g1])
  65. # for WL subtree kernel
  66. if self._base_kernel == 'subtree':
  67. gram_matrix = self._subtree_kernel_do(g_list + [g1])
  68. # for WL shortest path kernel
  69. elif self._base_kernel == 'sp':
  70. gram_matrix = self._sp_kernel_do(g_list + [g1])
  71. # for WL edge kernel
  72. elif self._base_kernel == 'edge':
  73. gram_matrix = self._edge_kernel_do(g_list + [g1])
  74. # for user defined base kernel
  75. else:
  76. gram_matrix = self._user_kernel_do(g_list + [g1])
  77. return list(gram_matrix[-1][0:-1])
  78. def _compute_kernel_list_imap_unordered(self, g1, g_list):
  79. self._add_dummy_node_labels(g_list + [g1])
  80. if self._base_kernel == 'subtree':
  81. kernel_list = [None] * len(g_list)
  82. def init_worker(g1_toshare, g_list_toshare):
  83. global G_g1, G_g_list
  84. G_g1 = g1_toshare
  85. G_g_list = g_list_toshare
  86. do_fun = self._wrapper_kernel_list_do
  87. def func_assign(result, var_to_assign):
  88. var_to_assign[result[0]] = result[1]
  89. itr = range(len(g_list))
  90. len_itr = len(g_list)
  91. parallel_me(do_fun, func_assign, kernel_list, itr, len_itr=len_itr,
  92. init_worker=init_worker, glbv=(g1, g_list), method='imap_unordered',
  93. n_jobs=self._n_jobs, itr_desc='Computing kernels', verbose=self._verbose)
  94. return kernel_list
  95. else:
  96. if self._verbose >= 2:
  97. import warnings
  98. warnings.warn('This base kernel is not parallelized. The serial computation is used instead.')
  99. return self._compute_kernel_list_series(g1, g_list)
  100. def _wrapper_kernel_list_do(self, itr):
  101. return self._kernel_do_exp(G_g1, G_g_list[itr])
  102. def _compute_single_kernel_series(self, g1, g2): # @todo: this should be better.
  103. self._add_dummy_node_labels([g1] + [g2])
  104. # for WL subtree kernel
  105. if self._base_kernel == 'subtree':
  106. gram_matrix = self._subtree_kernel_do([g1] + [g2])
  107. # for WL shortest path kernel
  108. elif self._base_kernel == 'sp':
  109. gram_matrix = self._sp_kernel_do([g1] + [g2])
  110. # for WL edge kernel
  111. elif self._base_kernel == 'edge':
  112. gram_matrix = self._edge_kernel_do([g1] + [g2])
  113. # for user defined base kernel
  114. else:
  115. gram_matrix = self._user_kernel_do([g1] + [g2])
  116. return gram_matrix[0][1]
  117. def pairwise_kernel(self, g1, g2):
  118. Gn = [g1, g2]
  119. kernel = 0
  120. # initial for height = 0
  121. all_num_of_each_label = [] # number of occurence of each label in each graph in this iteration
  122. # for each graph
  123. for G in Gn:
  124. # set all labels into a tuple.
  125. for nd, attrs in G.nodes(data=True): # @todo: there may be a better way.
  126. G.nodes[nd]['label_tuple'] = tuple(attrs[name] for name in self._node_labels)
  127. # get the set of original labels
  128. labels_ori = list(nx.get_node_attributes(G, 'label_tuple').values())
  129. # number of occurence of each label in G
  130. all_num_of_each_label.append(dict(Counter(labels_ori)))
  131. # Compute subtree kernel with the 0th iteration and add it to the final kernel.
  132. kernel = self._compute_kernel_itr(kernel, all_num_of_each_label)
  133. # iterate each height
  134. for h in range(1, self._height + 1):
  135. all_set_compressed = {} # a dictionary mapping original labels to new ones in all graphs in this iteration
  136. num_of_labels_occured = 0 # number of the set of letters that occur before as node labels at least once in all graphs
  137. # all_labels_ori = set() # all unique orignal labels in all graphs in this iteration
  138. all_num_of_each_label = [] # number of occurence of each label in G
  139. # @todo: parallel this part.
  140. for idx, G in enumerate(Gn):
  141. all_multisets = []
  142. for node, attrs in G.nodes(data=True):
  143. # Multiset-label determination.
  144. multiset = [G.nodes[neighbors]['label_tuple'] for neighbors in G[node]]
  145. # sorting each multiset
  146. multiset.sort()
  147. multiset = [attrs['label_tuple']] + multiset # add the prefix
  148. all_multisets.append(tuple(multiset))
  149. # label compression
  150. set_unique = list(set(all_multisets)) # set of unique multiset labels
  151. # a dictionary mapping original labels to new ones.
  152. set_compressed = {}
  153. # if a label occured before, assign its former compressed label,
  154. # else assign the number of labels occured + 1 as the compressed label.
  155. for value in set_unique:
  156. if value in all_set_compressed.keys():
  157. set_compressed.update({value: all_set_compressed[value]})
  158. else:
  159. set_compressed.update({value: str(num_of_labels_occured + 1)})
  160. num_of_labels_occured += 1
  161. all_set_compressed.update(set_compressed)
  162. # relabel nodes
  163. for idx, node in enumerate(G.nodes()):
  164. G.nodes[node]['label_tuple'] = set_compressed[all_multisets[idx]]
  165. # get the set of compressed labels
  166. labels_comp = list(nx.get_node_attributes(G, 'label_tuple').values())
  167. # all_labels_ori.update(labels_comp)
  168. all_num_of_each_label.append(dict(Counter(labels_comp)))
  169. # Compute subtree kernel with h iterations and add it to the final kernel
  170. kernel = self._compute_kernel_itr(kernel, all_num_of_each_label)
  171. return kernel
  172. def _wrapper_pairwise(self, itr):
  173. i = itr[0]
  174. j = itr[1]
  175. return i, j, self.pairwise_kernel(G_gn[i], G_gn[j])
  176. def _compute_kernel_itr(kernel, all_num_of_each_label):
  177. labels = set(list(all_num_of_each_label[0].keys()) +
  178. list(all_num_of_each_label[1].keys()))
  179. vector1 = np.array([(all_num_of_each_label[0][label]
  180. if (label in all_num_of_each_label[0].keys()) else 0)
  181. for label in labels])
  182. vector2 = np.array([(all_num_of_each_label[1][label]
  183. if (label in all_num_of_each_label[1].keys()) else 0)
  184. for label in labels])
  185. kernel += np.dot(vector1, vector2)
  186. return kernel
  187. def _subtree_kernel_do(self, Gn):
  188. """Compute Weisfeiler-Lehman kernels between graphs.
  189. Parameters
  190. ----------
  191. Gn : List of NetworkX graph
  192. List of graphs between which the kernels are computed.
  193. Return
  194. ------
  195. gram_matrix : Numpy matrix
  196. Kernel matrix, each element of which is the Weisfeiler-Lehman kernel between 2 praphs.
  197. """
  198. gram_matrix = np.zeros((len(Gn), len(Gn)))
  199. # initial for height = 0
  200. all_num_of_each_label = [] # number of occurence of each label in each graph in this iteration
  201. # for each graph
  202. for G in Gn:
  203. # set all labels into a tuple.
  204. for nd, attrs in G.nodes(data=True): # @todo: there may be a better way.
  205. G.nodes[nd]['label_tuple'] = tuple(attrs[name] for name in self._node_labels)
  206. # get the set of original labels
  207. labels_ori = list(nx.get_node_attributes(G, 'label_tuple').values())
  208. # number of occurence of each label in G
  209. all_num_of_each_label.append(dict(Counter(labels_ori)))
  210. # Compute subtree kernel with the 0th iteration and add it to the final kernel.
  211. self._compute_gram_itr(gram_matrix, all_num_of_each_label)
  212. # iterate each height
  213. for h in range(1, self._height + 1):
  214. all_set_compressed = {} # a dictionary mapping original labels to new ones in all graphs in this iteration
  215. num_of_labels_occured = 0 # number of the set of letters that occur before as node labels at least once in all graphs
  216. # all_labels_ori = set() # all unique orignal labels in all graphs in this iteration
  217. all_num_of_each_label = [] # number of occurence of each label in G
  218. # @todo: parallel this part.
  219. for idx, G in enumerate(Gn):
  220. all_multisets = []
  221. for node, attrs in G.nodes(data=True):
  222. # Multiset-label determination.
  223. multiset = [G.nodes[neighbors]['label_tuple'] for neighbors in G[node]]
  224. # sorting each multiset
  225. multiset.sort()
  226. multiset = [attrs['label_tuple']] + multiset # add the prefix
  227. all_multisets.append(tuple(multiset))
  228. # label compression
  229. set_unique = list(set(all_multisets)) # set of unique multiset labels
  230. # a dictionary mapping original labels to new ones.
  231. set_compressed = {}
  232. # if a label occured before, assign its former compressed label,
  233. # else assign the number of labels occured + 1 as the compressed label.
  234. for value in set_unique:
  235. if value in all_set_compressed.keys():
  236. set_compressed.update({value: all_set_compressed[value]})
  237. else:
  238. set_compressed.update({value: str(num_of_labels_occured + 1)})
  239. num_of_labels_occured += 1
  240. all_set_compressed.update(set_compressed)
  241. # relabel nodes
  242. for idx, node in enumerate(G.nodes()):
  243. G.nodes[node]['label_tuple'] = set_compressed[all_multisets[idx]]
  244. # get the set of compressed labels
  245. labels_comp = list(nx.get_node_attributes(G, 'label_tuple').values())
  246. # all_labels_ori.update(labels_comp)
  247. all_num_of_each_label.append(dict(Counter(labels_comp)))
  248. # Compute subtree kernel with h iterations and add it to the final kernel
  249. self._compute_gram_itr(gram_matrix, all_num_of_each_label)
  250. return gram_matrix
  251. def _compute_gram_itr(self, gram_matrix, all_num_of_each_label):
  252. """Compute Gram matrix using the base kernel.
  253. """
  254. # if self._parallel == 'imap_unordered':
  255. # # compute kernels.
  256. # def init_worker(alllabels_toshare):
  257. # global G_alllabels
  258. # G_alllabels = alllabels_toshare
  259. # do_partial = partial(self._wrapper_compute_subtree_kernel, gram_matrix)
  260. # parallel_gm(do_partial, gram_matrix, Gn, init_worker=init_worker,
  261. # glbv=(all_num_of_each_label,), n_jobs=self._n_jobs, verbose=self._verbose)
  262. # elif self._parallel is None:
  263. for i in range(len(gram_matrix)):
  264. for j in range(i, len(gram_matrix)):
  265. gram_matrix[i][j] = self._compute_subtree_kernel(all_num_of_each_label[i],
  266. all_num_of_each_label[j], gram_matrix[i][j])
  267. gram_matrix[j][i] = gram_matrix[i][j]
  268. def _compute_subtree_kernel(self, num_of_each_label1, num_of_each_label2, kernel):
  269. """Compute the subtree kernel.
  270. """
  271. labels = set(list(num_of_each_label1.keys()) + list(num_of_each_label2.keys()))
  272. vector1 = np.array([(num_of_each_label1[label]
  273. if (label in num_of_each_label1.keys()) else 0)
  274. for label in labels])
  275. vector2 = np.array([(num_of_each_label2[label]
  276. if (label in num_of_each_label2.keys()) else 0)
  277. for label in labels])
  278. kernel += np.dot(vector1, vector2)
  279. return kernel
  280. # def _wrapper_compute_subtree_kernel(self, gram_matrix, itr):
  281. # i = itr[0]
  282. # j = itr[1]
  283. # return i, j, self._compute_subtree_kernel(G_alllabels[i], G_alllabels[j], gram_matrix[i][j])
  284. def _wl_spkernel_do(Gn, node_label, edge_label, height):
  285. """Compute Weisfeiler-Lehman shortest path kernels between graphs.
  286. Parameters
  287. ----------
  288. Gn : List of NetworkX graph
  289. List of graphs between which the kernels are computed.
  290. node_label : string
  291. node attribute used as label.
  292. edge_label : string
  293. edge attribute used as label.
  294. height : int
  295. subtree height.
  296. Return
  297. ------
  298. gram_matrix : Numpy matrix
  299. Kernel matrix, each element of which is the Weisfeiler-Lehman kernel between 2 praphs.
  300. """
  301. pass
  302. from gklearn.utils.utils import getSPGraph
  303. # init.
  304. height = int(height)
  305. gram_matrix = np.zeros((len(Gn), len(Gn))) # init kernel
  306. Gn = [ getSPGraph(G, edge_weight = edge_label) for G in Gn ] # get shortest path graphs of Gn
  307. # initial for height = 0
  308. for i in range(0, len(Gn)):
  309. for j in range(i, len(Gn)):
  310. for e1 in Gn[i].edges(data = True):
  311. for e2 in Gn[j].edges(data = True):
  312. if e1[2]['cost'] != 0 and e1[2]['cost'] == e2[2]['cost'] and ((e1[0] == e2[0] and e1[1] == e2[1]) or (e1[0] == e2[1] and e1[1] == e2[0])):
  313. gram_matrix[i][j] += 1
  314. gram_matrix[j][i] = gram_matrix[i][j]
  315. # iterate each height
  316. for h in range(1, height + 1):
  317. all_set_compressed = {} # a dictionary mapping original labels to new ones in all graphs in this iteration
  318. num_of_labels_occured = 0 # number of the set of letters that occur before as node labels at least once in all graphs
  319. for G in Gn: # for each graph
  320. set_multisets = []
  321. for node in G.nodes(data = True):
  322. # Multiset-label determination.
  323. multiset = [ G.node[neighbors][node_label] for neighbors in G[node[0]] ]
  324. # sorting each multiset
  325. multiset.sort()
  326. multiset = node[1][node_label] + ''.join(multiset) # concatenate to a string and add the prefix
  327. set_multisets.append(multiset)
  328. # label compression
  329. set_unique = list(set(set_multisets)) # set of unique multiset labels
  330. # a dictionary mapping original labels to new ones.
  331. set_compressed = {}
  332. # if a label occured before, assign its former compressed label, else assign the number of labels occured + 1 as the compressed label
  333. for value in set_unique:
  334. if value in all_set_compressed.keys():
  335. set_compressed.update({ value : all_set_compressed[value] })
  336. else:
  337. set_compressed.update({ value : str(num_of_labels_occured + 1) })
  338. num_of_labels_occured += 1
  339. all_set_compressed.update(set_compressed)
  340. # relabel nodes
  341. for node in G.nodes(data = True):
  342. node[1][node_label] = set_compressed[set_multisets[node[0]]]
  343. # Compute subtree kernel with h iterations and add it to the final kernel
  344. for i in range(0, len(Gn)):
  345. for j in range(i, len(Gn)):
  346. for e1 in Gn[i].edges(data = True):
  347. for e2 in Gn[j].edges(data = True):
  348. if e1[2]['cost'] != 0 and e1[2]['cost'] == e2[2]['cost'] and ((e1[0] == e2[0] and e1[1] == e2[1]) or (e1[0] == e2[1] and e1[1] == e2[0])):
  349. gram_matrix[i][j] += 1
  350. gram_matrix[j][i] = gram_matrix[i][j]
  351. return gram_matrix
  352. def _wl_edgekernel_do(Gn, node_label, edge_label, height):
  353. """Compute Weisfeiler-Lehman edge kernels between graphs.
  354. Parameters
  355. ----------
  356. Gn : List of NetworkX graph
  357. List of graphs between which the kernels are computed.
  358. node_label : string
  359. node attribute used as label.
  360. edge_label : string
  361. edge attribute used as label.
  362. height : int
  363. subtree height.
  364. Return
  365. ------
  366. gram_matrix : Numpy matrix
  367. Kernel matrix, each element of which is the Weisfeiler-Lehman kernel between 2 praphs.
  368. """
  369. pass
  370. # init.
  371. height = int(height)
  372. gram_matrix = np.zeros((len(Gn), len(Gn))) # init kernel
  373. # initial for height = 0
  374. for i in range(0, len(Gn)):
  375. for j in range(i, len(Gn)):
  376. for e1 in Gn[i].edges(data = True):
  377. for e2 in Gn[j].edges(data = True):
  378. if e1[2][edge_label] == e2[2][edge_label] and ((e1[0] == e2[0] and e1[1] == e2[1]) or (e1[0] == e2[1] and e1[1] == e2[0])):
  379. gram_matrix[i][j] += 1
  380. gram_matrix[j][i] = gram_matrix[i][j]
  381. # iterate each height
  382. for h in range(1, height + 1):
  383. all_set_compressed = {} # a dictionary mapping original labels to new ones in all graphs in this iteration
  384. num_of_labels_occured = 0 # number of the set of letters that occur before as node labels at least once in all graphs
  385. for G in Gn: # for each graph
  386. set_multisets = []
  387. for node in G.nodes(data = True):
  388. # Multiset-label determination.
  389. multiset = [ G.node[neighbors][node_label] for neighbors in G[node[0]] ]
  390. # sorting each multiset
  391. multiset.sort()
  392. multiset = node[1][node_label] + ''.join(multiset) # concatenate to a string and add the prefix
  393. set_multisets.append(multiset)
  394. # label compression
  395. set_unique = list(set(set_multisets)) # set of unique multiset labels
  396. # a dictionary mapping original labels to new ones.
  397. set_compressed = {}
  398. # if a label occured before, assign its former compressed label, else assign the number of labels occured + 1 as the compressed label
  399. for value in set_unique:
  400. if value in all_set_compressed.keys():
  401. set_compressed.update({ value : all_set_compressed[value] })
  402. else:
  403. set_compressed.update({ value : str(num_of_labels_occured + 1) })
  404. num_of_labels_occured += 1
  405. all_set_compressed.update(set_compressed)
  406. # relabel nodes
  407. for node in G.nodes(data = True):
  408. node[1][node_label] = set_compressed[set_multisets[node[0]]]
  409. # Compute subtree kernel with h iterations and add it to the final kernel
  410. for i in range(0, len(Gn)):
  411. for j in range(i, len(Gn)):
  412. for e1 in Gn[i].edges(data = True):
  413. for e2 in Gn[j].edges(data = True):
  414. if e1[2][edge_label] == e2[2][edge_label] and ((e1[0] == e2[0] and e1[1] == e2[1]) or (e1[0] == e2[1] and e1[1] == e2[0])):
  415. gram_matrix[i][j] += 1
  416. gram_matrix[j][i] = gram_matrix[i][j]
  417. return gram_matrix
  418. def _wl_userkernel_do(Gn, node_label, edge_label, height, base_kernel):
  419. """Compute Weisfeiler-Lehman kernels based on user-defined kernel between graphs.
  420. Parameters
  421. ----------
  422. Gn : List of NetworkX graph
  423. List of graphs between which the kernels are computed.
  424. node_label : string
  425. node attribute used as label.
  426. edge_label : string
  427. edge attribute used as label.
  428. height : int
  429. subtree height.
  430. base_kernel : string
  431. Name of the base kernel function used in each iteration of WL kernel. This function returns a Numpy matrix, each element of which is the user-defined Weisfeiler-Lehman kernel between 2 praphs.
  432. Return
  433. ------
  434. gram_matrix : Numpy matrix
  435. Kernel matrix, each element of which is the Weisfeiler-Lehman kernel between 2 praphs.
  436. """
  437. pass
  438. # init.
  439. height = int(height)
  440. gram_matrix = np.zeros((len(Gn), len(Gn))) # init kernel
  441. # initial for height = 0
  442. gram_matrix = base_kernel(Gn, node_label, edge_label)
  443. # iterate each height
  444. for h in range(1, height + 1):
  445. all_set_compressed = {} # a dictionary mapping original labels to new ones in all graphs in this iteration
  446. num_of_labels_occured = 0 # number of the set of letters that occur before as node labels at least once in all graphs
  447. for G in Gn: # for each graph
  448. set_multisets = []
  449. for node in G.nodes(data = True):
  450. # Multiset-label determination.
  451. multiset = [ G.node[neighbors][node_label] for neighbors in G[node[0]] ]
  452. # sorting each multiset
  453. multiset.sort()
  454. multiset = node[1][node_label] + ''.join(multiset) # concatenate to a string and add the prefix
  455. set_multisets.append(multiset)
  456. # label compression
  457. set_unique = list(set(set_multisets)) # set of unique multiset labels
  458. # a dictionary mapping original labels to new ones.
  459. set_compressed = {}
  460. # if a label occured before, assign its former compressed label, else assign the number of labels occured + 1 as the compressed label
  461. for value in set_unique:
  462. if value in all_set_compressed.keys():
  463. set_compressed.update({ value : all_set_compressed[value] })
  464. else:
  465. set_compressed.update({ value : str(num_of_labels_occured + 1) })
  466. num_of_labels_occured += 1
  467. all_set_compressed.update(set_compressed)
  468. # relabel nodes
  469. for node in G.nodes(data = True):
  470. node[1][node_label] = set_compressed[set_multisets[node[0]]]
  471. # Compute kernel with h iterations and add it to the final kernel
  472. gram_matrix += base_kernel(Gn, node_label, edge_label)
  473. return gram_matrix
  474. def _add_dummy_node_labels(self, Gn):
  475. if len(self._node_labels) == 0 or (len(self._node_labels) == 1 and self._node_labels[0] == SpecialLabel.DUMMY):
  476. for i in range(len(Gn)):
  477. nx.set_node_attributes(Gn[i], '0', SpecialLabel.DUMMY)
  478. self._node_labels = [SpecialLabel.DUMMY]
  479. class WLSubtree(WeisfeilerLehman):
  480. def __init__(self, **kwargs):
  481. kwargs['base_kernel'] = 'subtree'
  482. super().__init__(**kwargs)

A Python package for graph kernels, graph edit distances and graph pre-image problem.