You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

weisfeiler_lehman.py 22 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608
  1. #!/usr/bin/env python3
  2. # -*- coding: utf-8 -*-
  3. """
  4. Created on Tue Apr 14 15:16:34 2020
  5. @author: ljia
  6. @references:
  7. [1] Shervashidze N, Schweitzer P, Leeuwen EJ, Mehlhorn K, Borgwardt KM.
  8. Weisfeiler-lehman graph kernels. Journal of Machine Learning Research.
  9. 2011;12(Sep):2539-61.
  10. """
  11. import numpy as np
  12. import networkx as nx
  13. from collections import Counter
  14. # from functools import partial
  15. from gklearn.utils import SpecialLabel
  16. from gklearn.utils.parallel import parallel_gm, parallel_me
  17. from gklearn.kernels import GraphKernel
  18. class WeisfeilerLehman(GraphKernel): # @todo: sp, edge user kernel.
  19. def __init__(self, **kwargs):
  20. GraphKernel.__init__(self)
  21. self._node_labels = kwargs.get('node_labels', [])
  22. self._edge_labels = kwargs.get('edge_labels', [])
  23. self._height = int(kwargs.get('height', 0))
  24. self._base_kernel = kwargs.get('base_kernel', 'subtree')
  25. self._ds_infos = kwargs.get('ds_infos', {})
  26. def _compute_gm_series(self):
  27. # if self._verbose >= 2:
  28. # import warnings
  29. # warnings.warn('A part of the computation is parallelized.')
  30. self._add_dummy_node_labels(self._graphs)
  31. # for WL subtree kernel
  32. if self._base_kernel == 'subtree':
  33. gram_matrix = self._subtree_kernel_do(self._graphs)
  34. # for WL shortest path kernel
  35. elif self._base_kernel == 'sp':
  36. gram_matrix = self._sp_kernel_do(self._graphs)
  37. # for WL edge kernel
  38. elif self._base_kernel == 'edge':
  39. gram_matrix = self._edge_kernel_do(self._graphs)
  40. # for user defined base kernel
  41. else:
  42. gram_matrix = self._user_kernel_do(self._graphs)
  43. return gram_matrix
  44. def _compute_gm_imap_unordered(self):
  45. self._add_dummy_node_labels(self._graphs)
  46. if self._base_kernel == 'subtree':
  47. gram_matrix = np.zeros((len(self._graphs), len(self._graphs)))
  48. # for i in range(len(self._graphs)):
  49. # for j in range(i, len(self._graphs)):
  50. # gram_matrix[i][j] = self.pairwise_kernel(self._graphs[i], self._graphs[j])
  51. # gram_matrix[j][i] = gram_matrix[i][j]
  52. def init_worker(gn_toshare):
  53. global G_gn
  54. G_gn = gn_toshare
  55. do_fun = self._wrapper_pairwise
  56. parallel_gm(do_fun, gram_matrix, self._graphs, init_worker=init_worker,
  57. glbv=(self._graphs,), n_jobs=self._n_jobs, verbose=self._verbose)
  58. return gram_matrix
  59. else:
  60. if self._verbose >= 2:
  61. import warnings
  62. warnings.warn('This base kernel is not parallelized. The serial computation is used instead.')
  63. return self._compute_gm_series()
  64. def _compute_kernel_list_series(self, g1, g_list): # @todo: this should be better.
  65. # if self._verbose >= 2:
  66. # import warnings
  67. # warnings.warn('A part of the computation is parallelized.')
  68. self._add_dummy_node_labels(g_list + [g1])
  69. # for WL subtree kernel
  70. if self._base_kernel == 'subtree':
  71. gram_matrix = self._subtree_kernel_do(g_list + [g1])
  72. # for WL shortest path kernel
  73. elif self._base_kernel == 'sp':
  74. gram_matrix = self._sp_kernel_do(g_list + [g1])
  75. # for WL edge kernel
  76. elif self._base_kernel == 'edge':
  77. gram_matrix = self._edge_kernel_do(g_list + [g1])
  78. # for user defined base kernel
  79. else:
  80. gram_matrix = self._user_kernel_do(g_list + [g1])
  81. return list(gram_matrix[-1][0:-1])
  82. def _compute_kernel_list_imap_unordered(self, g1, g_list):
  83. self._add_dummy_node_labels(g_list + [g1])
  84. if self._base_kernel == 'subtree':
  85. kernel_list = [None] * len(g_list)
  86. def init_worker(g1_toshare, g_list_toshare):
  87. global G_g1, G_g_list
  88. G_g1 = g1_toshare
  89. G_g_list = g_list_toshare
  90. do_fun = self._wrapper_kernel_list_do
  91. def func_assign(result, var_to_assign):
  92. var_to_assign[result[0]] = result[1]
  93. itr = range(len(g_list))
  94. len_itr = len(g_list)
  95. parallel_me(do_fun, func_assign, kernel_list, itr, len_itr=len_itr,
  96. init_worker=init_worker, glbv=(g1, g_list), method='imap_unordered',
  97. n_jobs=self._n_jobs, itr_desc='Computing kernels', verbose=self._verbose)
  98. return kernel_list
  99. else:
  100. if self._verbose >= 2:
  101. import warnings
  102. warnings.warn('This base kernel is not parallelized. The serial computation is used instead.')
  103. return self._compute_kernel_list_series(g1, g_list)
  104. def _wrapper_kernel_list_do(self, itr):
  105. return itr, self.pairwise_kernel(G_g1, G_g_list[itr])
  106. def _compute_single_kernel_series(self, g1, g2): # @todo: this should be better.
  107. self._add_dummy_node_labels([g1] + [g2])
  108. # for WL subtree kernel
  109. if self._base_kernel == 'subtree':
  110. gram_matrix = self._subtree_kernel_do([g1] + [g2])
  111. # for WL shortest path kernel
  112. elif self._base_kernel == 'sp':
  113. gram_matrix = self._sp_kernel_do([g1] + [g2])
  114. # for WL edge kernel
  115. elif self._base_kernel == 'edge':
  116. gram_matrix = self._edge_kernel_do([g1] + [g2])
  117. # for user defined base kernel
  118. else:
  119. gram_matrix = self._user_kernel_do([g1] + [g2])
  120. return gram_matrix[0][1]
  121. def pairwise_kernel(self, g1, g2):
  122. Gn = [g1.copy(), g2.copy()] # @todo: make sure it is a full deep copy. and faster!
  123. kernel = 0
  124. # initial for height = 0
  125. all_num_of_each_label = [] # number of occurence of each label in each graph in this iteration
  126. # for each graph
  127. for G in Gn:
  128. # set all labels into a tuple.
  129. for nd, attrs in G.nodes(data=True): # @todo: there may be a better way.
  130. G.nodes[nd]['label_tuple'] = tuple(attrs[name] for name in self._node_labels)
  131. # get the set of original labels
  132. labels_ori = list(nx.get_node_attributes(G, 'label_tuple').values())
  133. # number of occurence of each label in G
  134. all_num_of_each_label.append(dict(Counter(labels_ori)))
  135. # Compute subtree kernel with the 0th iteration and add it to the final kernel.
  136. kernel = self._compute_kernel_itr(kernel, all_num_of_each_label)
  137. # iterate each height
  138. for h in range(1, self._height + 1):
  139. all_set_compressed = {} # a dictionary mapping original labels to new ones in all graphs in this iteration
  140. num_of_labels_occured = 0 # number of the set of letters that occur before as node labels at least once in all graphs
  141. # all_labels_ori = set() # all unique orignal labels in all graphs in this iteration
  142. all_num_of_each_label = [] # number of occurence of each label in G
  143. # @todo: parallel this part.
  144. for idx, G in enumerate(Gn):
  145. all_multisets = []
  146. for node, attrs in G.nodes(data=True):
  147. # Multiset-label determination.
  148. multiset = [G.nodes[neighbors]['label_tuple'] for neighbors in G[node]]
  149. # sorting each multiset
  150. multiset.sort()
  151. multiset = [attrs['label_tuple']] + multiset # add the prefix
  152. all_multisets.append(tuple(multiset))
  153. # label compression
  154. set_unique = list(set(all_multisets)) # set of unique multiset labels
  155. # a dictionary mapping original labels to new ones.
  156. set_compressed = {}
  157. # if a label occured before, assign its former compressed label,
  158. # else assign the number of labels occured + 1 as the compressed label.
  159. for value in set_unique:
  160. if value in all_set_compressed.keys():
  161. set_compressed.update({value: all_set_compressed[value]})
  162. else:
  163. set_compressed.update({value: str(num_of_labels_occured + 1)})
  164. num_of_labels_occured += 1
  165. all_set_compressed.update(set_compressed)
  166. # relabel nodes
  167. for idx, node in enumerate(G.nodes()):
  168. G.nodes[node]['label_tuple'] = set_compressed[all_multisets[idx]]
  169. # get the set of compressed labels
  170. labels_comp = list(nx.get_node_attributes(G, 'label_tuple').values())
  171. # all_labels_ori.update(labels_comp)
  172. all_num_of_each_label.append(dict(Counter(labels_comp)))
  173. # Compute subtree kernel with h iterations and add it to the final kernel
  174. kernel = self._compute_kernel_itr(kernel, all_num_of_each_label)
  175. return kernel
  176. def _wrapper_pairwise(self, itr):
  177. i = itr[0]
  178. j = itr[1]
  179. return i, j, self.pairwise_kernel(G_gn[i], G_gn[j])
  180. def _compute_kernel_itr(self, kernel, all_num_of_each_label):
  181. labels = set(list(all_num_of_each_label[0].keys()) +
  182. list(all_num_of_each_label[1].keys()))
  183. vector1 = np.array([(all_num_of_each_label[0][label]
  184. if (label in all_num_of_each_label[0].keys()) else 0)
  185. for label in labels])
  186. vector2 = np.array([(all_num_of_each_label[1][label]
  187. if (label in all_num_of_each_label[1].keys()) else 0)
  188. for label in labels])
  189. kernel += np.dot(vector1, vector2)
  190. return kernel
  191. def _subtree_kernel_do(self, Gn):
  192. """Compute Weisfeiler-Lehman kernels between graphs.
  193. Parameters
  194. ----------
  195. Gn : List of NetworkX graph
  196. List of graphs between which the kernels are computed.
  197. Return
  198. ------
  199. gram_matrix : Numpy matrix
  200. Kernel matrix, each element of which is the Weisfeiler-Lehman kernel between 2 praphs.
  201. """
  202. gram_matrix = np.zeros((len(Gn), len(Gn)))
  203. # initial for height = 0
  204. all_num_of_each_label = [] # number of occurence of each label in each graph in this iteration
  205. # for each graph
  206. for G in Gn:
  207. # set all labels into a tuple.
  208. for nd, attrs in G.nodes(data=True): # @todo: there may be a better way.
  209. G.nodes[nd]['label_tuple'] = tuple(attrs[name] for name in self._node_labels)
  210. # get the set of original labels
  211. labels_ori = list(nx.get_node_attributes(G, 'label_tuple').values())
  212. # number of occurence of each label in G
  213. all_num_of_each_label.append(dict(Counter(labels_ori)))
  214. # Compute subtree kernel with the 0th iteration and add it to the final kernel.
  215. self._compute_gram_itr(gram_matrix, all_num_of_each_label)
  216. # iterate each height
  217. for h in range(1, self._height + 1):
  218. all_set_compressed = {} # a dictionary mapping original labels to new ones in all graphs in this iteration
  219. num_of_labels_occured = 0 # number of the set of letters that occur before as node labels at least once in all graphs
  220. # all_labels_ori = set() # all unique orignal labels in all graphs in this iteration
  221. all_num_of_each_label = [] # number of occurence of each label in G
  222. # @todo: parallel this part.
  223. for idx, G in enumerate(Gn):
  224. all_multisets = []
  225. for node, attrs in G.nodes(data=True):
  226. # Multiset-label determination.
  227. multiset = [G.nodes[neighbors]['label_tuple'] for neighbors in G[node]]
  228. # sorting each multiset
  229. multiset.sort()
  230. multiset = [attrs['label_tuple']] + multiset # add the prefix
  231. all_multisets.append(tuple(multiset))
  232. # label compression
  233. set_unique = list(set(all_multisets)) # set of unique multiset labels
  234. # a dictionary mapping original labels to new ones.
  235. set_compressed = {}
  236. # if a label occured before, assign its former compressed label,
  237. # else assign the number of labels occured + 1 as the compressed label.
  238. for value in set_unique:
  239. if value in all_set_compressed.keys():
  240. set_compressed.update({value: all_set_compressed[value]})
  241. else:
  242. set_compressed.update({value: str(num_of_labels_occured + 1)})
  243. num_of_labels_occured += 1
  244. all_set_compressed.update(set_compressed)
  245. # relabel nodes
  246. for idx, node in enumerate(G.nodes()):
  247. G.nodes[node]['label_tuple'] = set_compressed[all_multisets[idx]]
  248. # get the set of compressed labels
  249. labels_comp = list(nx.get_node_attributes(G, 'label_tuple').values())
  250. # all_labels_ori.update(labels_comp)
  251. all_num_of_each_label.append(dict(Counter(labels_comp)))
  252. # Compute subtree kernel with h iterations and add it to the final kernel
  253. self._compute_gram_itr(gram_matrix, all_num_of_each_label)
  254. return gram_matrix
  255. def _compute_gram_itr(self, gram_matrix, all_num_of_each_label):
  256. """Compute Gram matrix using the base kernel.
  257. """
  258. # if self._parallel == 'imap_unordered':
  259. # # compute kernels.
  260. # def init_worker(alllabels_toshare):
  261. # global G_alllabels
  262. # G_alllabels = alllabels_toshare
  263. # do_partial = partial(self._wrapper_compute_subtree_kernel, gram_matrix)
  264. # parallel_gm(do_partial, gram_matrix, Gn, init_worker=init_worker,
  265. # glbv=(all_num_of_each_label,), n_jobs=self._n_jobs, verbose=self._verbose)
  266. # elif self._parallel is None:
  267. for i in range(len(gram_matrix)):
  268. for j in range(i, len(gram_matrix)):
  269. gram_matrix[i][j] = self._compute_subtree_kernel(all_num_of_each_label[i],
  270. all_num_of_each_label[j], gram_matrix[i][j])
  271. gram_matrix[j][i] = gram_matrix[i][j]
  272. def _compute_subtree_kernel(self, num_of_each_label1, num_of_each_label2, kernel):
  273. """Compute the subtree kernel.
  274. """
  275. labels = set(list(num_of_each_label1.keys()) + list(num_of_each_label2.keys()))
  276. vector1 = np.array([(num_of_each_label1[label]
  277. if (label in num_of_each_label1.keys()) else 0)
  278. for label in labels])
  279. vector2 = np.array([(num_of_each_label2[label]
  280. if (label in num_of_each_label2.keys()) else 0)
  281. for label in labels])
  282. kernel += np.dot(vector1, vector2)
  283. return kernel
  284. # def _wrapper_compute_subtree_kernel(self, gram_matrix, itr):
  285. # i = itr[0]
  286. # j = itr[1]
  287. # return i, j, self._compute_subtree_kernel(G_alllabels[i], G_alllabels[j], gram_matrix[i][j])
  288. def _wl_spkernel_do(Gn, node_label, edge_label, height):
  289. """Compute Weisfeiler-Lehman shortest path kernels between graphs.
  290. Parameters
  291. ----------
  292. Gn : List of NetworkX graph
  293. List of graphs between which the kernels are computed.
  294. node_label : string
  295. node attribute used as label.
  296. edge_label : string
  297. edge attribute used as label.
  298. height : int
  299. subtree height.
  300. Return
  301. ------
  302. gram_matrix : Numpy matrix
  303. Kernel matrix, each element of which is the Weisfeiler-Lehman kernel between 2 praphs.
  304. """
  305. pass
  306. from gklearn.utils.utils import getSPGraph
  307. # init.
  308. height = int(height)
  309. gram_matrix = np.zeros((len(Gn), len(Gn))) # init kernel
  310. Gn = [ getSPGraph(G, edge_weight = edge_label) for G in Gn ] # get shortest path graphs of Gn
  311. # initial for height = 0
  312. for i in range(0, len(Gn)):
  313. for j in range(i, len(Gn)):
  314. for e1 in Gn[i].edges(data = True):
  315. for e2 in Gn[j].edges(data = True):
  316. if e1[2]['cost'] != 0 and e1[2]['cost'] == e2[2]['cost'] and ((e1[0] == e2[0] and e1[1] == e2[1]) or (e1[0] == e2[1] and e1[1] == e2[0])):
  317. gram_matrix[i][j] += 1
  318. gram_matrix[j][i] = gram_matrix[i][j]
  319. # iterate each height
  320. for h in range(1, height + 1):
  321. all_set_compressed = {} # a dictionary mapping original labels to new ones in all graphs in this iteration
  322. num_of_labels_occured = 0 # number of the set of letters that occur before as node labels at least once in all graphs
  323. for G in Gn: # for each graph
  324. set_multisets = []
  325. for node in G.nodes(data = True):
  326. # Multiset-label determination.
  327. multiset = [ G.node[neighbors][node_label] for neighbors in G[node[0]] ]
  328. # sorting each multiset
  329. multiset.sort()
  330. multiset = node[1][node_label] + ''.join(multiset) # concatenate to a string and add the prefix
  331. set_multisets.append(multiset)
  332. # label compression
  333. set_unique = list(set(set_multisets)) # set of unique multiset labels
  334. # a dictionary mapping original labels to new ones.
  335. set_compressed = {}
  336. # if a label occured before, assign its former compressed label, else assign the number of labels occured + 1 as the compressed label
  337. for value in set_unique:
  338. if value in all_set_compressed.keys():
  339. set_compressed.update({ value : all_set_compressed[value] })
  340. else:
  341. set_compressed.update({ value : str(num_of_labels_occured + 1) })
  342. num_of_labels_occured += 1
  343. all_set_compressed.update(set_compressed)
  344. # relabel nodes
  345. for node in G.nodes(data = True):
  346. node[1][node_label] = set_compressed[set_multisets[node[0]]]
  347. # Compute subtree kernel with h iterations and add it to the final kernel
  348. for i in range(0, len(Gn)):
  349. for j in range(i, len(Gn)):
  350. for e1 in Gn[i].edges(data = True):
  351. for e2 in Gn[j].edges(data = True):
  352. if e1[2]['cost'] != 0 and e1[2]['cost'] == e2[2]['cost'] and ((e1[0] == e2[0] and e1[1] == e2[1]) or (e1[0] == e2[1] and e1[1] == e2[0])):
  353. gram_matrix[i][j] += 1
  354. gram_matrix[j][i] = gram_matrix[i][j]
  355. return gram_matrix
  356. def _wl_edgekernel_do(Gn, node_label, edge_label, height):
  357. """Compute Weisfeiler-Lehman edge kernels between graphs.
  358. Parameters
  359. ----------
  360. Gn : List of NetworkX graph
  361. List of graphs between which the kernels are computed.
  362. node_label : string
  363. node attribute used as label.
  364. edge_label : string
  365. edge attribute used as label.
  366. height : int
  367. subtree height.
  368. Return
  369. ------
  370. gram_matrix : Numpy matrix
  371. Kernel matrix, each element of which is the Weisfeiler-Lehman kernel between 2 praphs.
  372. """
  373. pass
  374. # init.
  375. height = int(height)
  376. gram_matrix = np.zeros((len(Gn), len(Gn))) # init kernel
  377. # initial for height = 0
  378. for i in range(0, len(Gn)):
  379. for j in range(i, len(Gn)):
  380. for e1 in Gn[i].edges(data = True):
  381. for e2 in Gn[j].edges(data = True):
  382. if e1[2][edge_label] == e2[2][edge_label] and ((e1[0] == e2[0] and e1[1] == e2[1]) or (e1[0] == e2[1] and e1[1] == e2[0])):
  383. gram_matrix[i][j] += 1
  384. gram_matrix[j][i] = gram_matrix[i][j]
  385. # iterate each height
  386. for h in range(1, height + 1):
  387. all_set_compressed = {} # a dictionary mapping original labels to new ones in all graphs in this iteration
  388. num_of_labels_occured = 0 # number of the set of letters that occur before as node labels at least once in all graphs
  389. for G in Gn: # for each graph
  390. set_multisets = []
  391. for node in G.nodes(data = True):
  392. # Multiset-label determination.
  393. multiset = [ G.node[neighbors][node_label] for neighbors in G[node[0]] ]
  394. # sorting each multiset
  395. multiset.sort()
  396. multiset = node[1][node_label] + ''.join(multiset) # concatenate to a string and add the prefix
  397. set_multisets.append(multiset)
  398. # label compression
  399. set_unique = list(set(set_multisets)) # set of unique multiset labels
  400. # a dictionary mapping original labels to new ones.
  401. set_compressed = {}
  402. # if a label occured before, assign its former compressed label, else assign the number of labels occured + 1 as the compressed label
  403. for value in set_unique:
  404. if value in all_set_compressed.keys():
  405. set_compressed.update({ value : all_set_compressed[value] })
  406. else:
  407. set_compressed.update({ value : str(num_of_labels_occured + 1) })
  408. num_of_labels_occured += 1
  409. all_set_compressed.update(set_compressed)
  410. # relabel nodes
  411. for node in G.nodes(data = True):
  412. node[1][node_label] = set_compressed[set_multisets[node[0]]]
  413. # Compute subtree kernel with h iterations and add it to the final kernel
  414. for i in range(0, len(Gn)):
  415. for j in range(i, len(Gn)):
  416. for e1 in Gn[i].edges(data = True):
  417. for e2 in Gn[j].edges(data = True):
  418. if e1[2][edge_label] == e2[2][edge_label] and ((e1[0] == e2[0] and e1[1] == e2[1]) or (e1[0] == e2[1] and e1[1] == e2[0])):
  419. gram_matrix[i][j] += 1
  420. gram_matrix[j][i] = gram_matrix[i][j]
  421. return gram_matrix
  422. def _wl_userkernel_do(Gn, node_label, edge_label, height, base_kernel):
  423. """Compute Weisfeiler-Lehman kernels based on user-defined kernel between graphs.
  424. Parameters
  425. ----------
  426. Gn : List of NetworkX graph
  427. List of graphs between which the kernels are computed.
  428. node_label : string
  429. node attribute used as label.
  430. edge_label : string
  431. edge attribute used as label.
  432. height : int
  433. subtree height.
  434. base_kernel : string
  435. Name of the base kernel function used in each iteration of WL kernel. This function returns a Numpy matrix, each element of which is the user-defined Weisfeiler-Lehman kernel between 2 praphs.
  436. Return
  437. ------
  438. gram_matrix : Numpy matrix
  439. Kernel matrix, each element of which is the Weisfeiler-Lehman kernel between 2 praphs.
  440. """
  441. pass
  442. # init.
  443. height = int(height)
  444. gram_matrix = np.zeros((len(Gn), len(Gn))) # init kernel
  445. # initial for height = 0
  446. gram_matrix = base_kernel(Gn, node_label, edge_label)
  447. # iterate each height
  448. for h in range(1, height + 1):
  449. all_set_compressed = {} # a dictionary mapping original labels to new ones in all graphs in this iteration
  450. num_of_labels_occured = 0 # number of the set of letters that occur before as node labels at least once in all graphs
  451. for G in Gn: # for each graph
  452. set_multisets = []
  453. for node in G.nodes(data = True):
  454. # Multiset-label determination.
  455. multiset = [ G.node[neighbors][node_label] for neighbors in G[node[0]] ]
  456. # sorting each multiset
  457. multiset.sort()
  458. multiset = node[1][node_label] + ''.join(multiset) # concatenate to a string and add the prefix
  459. set_multisets.append(multiset)
  460. # label compression
  461. set_unique = list(set(set_multisets)) # set of unique multiset labels
  462. # a dictionary mapping original labels to new ones.
  463. set_compressed = {}
  464. # if a label occured before, assign its former compressed label, else assign the number of labels occured + 1 as the compressed label
  465. for value in set_unique:
  466. if value in all_set_compressed.keys():
  467. set_compressed.update({ value : all_set_compressed[value] })
  468. else:
  469. set_compressed.update({ value : str(num_of_labels_occured + 1) })
  470. num_of_labels_occured += 1
  471. all_set_compressed.update(set_compressed)
  472. # relabel nodes
  473. for node in G.nodes(data = True):
  474. node[1][node_label] = set_compressed[set_multisets[node[0]]]
  475. # Compute kernel with h iterations and add it to the final kernel
  476. gram_matrix += base_kernel(Gn, node_label, edge_label)
  477. return gram_matrix
  478. def _add_dummy_node_labels(self, Gn):
  479. if len(self._node_labels) == 0 or (len(self._node_labels) == 1 and self._node_labels[0] == SpecialLabel.DUMMY):
  480. for i in range(len(Gn)):
  481. nx.set_node_attributes(Gn[i], '0', SpecialLabel.DUMMY)
  482. self._node_labels = [SpecialLabel.DUMMY]
  483. class WLSubtree(WeisfeilerLehman):
  484. def __init__(self, **kwargs):
  485. kwargs['base_kernel'] = 'subtree'
  486. super().__init__(**kwargs)

A Python package for graph kernels, graph edit distances and graph pre-image problem.