You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

treelet.py 20 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505
  1. #!/usr/bin/env python3
  2. # -*- coding: utf-8 -*-
  3. """
  4. Created on Mon Apr 13 18:02:46 2020
  5. @author: ljia
  6. @references:
  7. [1] Gaüzère B, Brun L, Villemin D. Two new graphs kernels in
  8. chemoinformatics. Pattern Recognition Letters. 2012 Nov 1;33(15):2038-47.
  9. """
  10. import sys
  11. from multiprocessing import Pool
  12. from tqdm import tqdm
  13. import numpy as np
  14. import networkx as nx
  15. from collections import Counter
  16. from itertools import chain
  17. from gklearn.utils.parallel import parallel_gm, parallel_me
  18. from gklearn.utils.utils import find_all_paths, get_mlti_dim_node_attrs
  19. from gklearn.kernels import GraphKernel
  20. class Treelet(GraphKernel):
  21. def __init__(self, **kwargs):
  22. GraphKernel.__init__(self)
  23. self.__node_labels = kwargs.get('node_labels', [])
  24. self.__edge_labels = kwargs.get('edge_labels', [])
  25. self.__sub_kernel = kwargs.get('sub_kernel', None)
  26. self.__ds_infos = kwargs.get('ds_infos', {})
  27. if self.__sub_kernel is None:
  28. raise Exception('Sub kernel not set.')
  29. def _compute_gm_series(self):
  30. self.__add_dummy_labels(self._graphs)
  31. # get all canonical keys of all graphs before calculating kernels to save
  32. # time, but this may cost a lot of memory for large dataset.
  33. canonkeys = []
  34. if self._verbose >= 2:
  35. iterator = tqdm(self._graphs, desc='getting canonkeys', file=sys.stdout)
  36. else:
  37. iterator = self._graphs
  38. for g in iterator:
  39. canonkeys.append(self.__get_canonkeys(g))
  40. # compute Gram matrix.
  41. gram_matrix = np.zeros((len(self._graphs), len(self._graphs)))
  42. from itertools import combinations_with_replacement
  43. itr = combinations_with_replacement(range(0, len(self._graphs)), 2)
  44. if self._verbose >= 2:
  45. iterator = tqdm(itr, desc='calculating kernels', file=sys.stdout)
  46. else:
  47. iterator = itr
  48. for i, j in iterator:
  49. kernel = self.__kernel_do(canonkeys[i], canonkeys[j])
  50. gram_matrix[i][j] = kernel
  51. gram_matrix[j][i] = kernel # @todo: no directed graph considered?
  52. return gram_matrix
  53. def _compute_gm_imap_unordered(self):
  54. self.__add_dummy_labels(self._graphs)
  55. # get all canonical keys of all graphs before calculating kernels to save
  56. # time, but this may cost a lot of memory for large dataset.
  57. pool = Pool(self._n_jobs)
  58. itr = zip(self._graphs, range(0, len(self._graphs)))
  59. if len(self._graphs) < 100 * self._n_jobs:
  60. chunksize = int(len(self._graphs) / self._n_jobs) + 1
  61. else:
  62. chunksize = 100
  63. canonkeys = [[] for _ in range(len(self._graphs))]
  64. get_fun = self._wrapper_get_canonkeys
  65. if self._verbose >= 2:
  66. iterator = tqdm(pool.imap_unordered(get_fun, itr, chunksize),
  67. desc='getting canonkeys', file=sys.stdout)
  68. else:
  69. iterator = pool.imap_unordered(get_fun, itr, chunksize)
  70. for i, ck in iterator:
  71. canonkeys[i] = ck
  72. pool.close()
  73. pool.join()
  74. # compute Gram matrix.
  75. gram_matrix = np.zeros((len(self._graphs), len(self._graphs)))
  76. def init_worker(canonkeys_toshare):
  77. global G_canonkeys
  78. G_canonkeys = canonkeys_toshare
  79. do_fun = self._wrapper_kernel_do
  80. parallel_gm(do_fun, gram_matrix, self._graphs, init_worker=init_worker,
  81. glbv=(canonkeys,), n_jobs=self._n_jobs, verbose=self._verbose)
  82. return gram_matrix
  83. def _compute_kernel_list_series(self, g1, g_list):
  84. self.__add_dummy_labels(g_list + [g1])
  85. # get all canonical keys of all graphs before calculating kernels to save
  86. # time, but this may cost a lot of memory for large dataset.
  87. canonkeys_1 = self.__get_canonkeys(g1)
  88. canonkeys_list = []
  89. if self._verbose >= 2:
  90. iterator = tqdm(g_list, desc='getting canonkeys', file=sys.stdout)
  91. else:
  92. iterator = g_list
  93. for g in iterator:
  94. canonkeys_list.append(self.__get_canonkeys(g))
  95. # compute kernel list.
  96. kernel_list = [None] * len(g_list)
  97. if self._verbose >= 2:
  98. iterator = tqdm(range(len(g_list)), desc='calculating kernels', file=sys.stdout)
  99. else:
  100. iterator = range(len(g_list))
  101. for i in iterator:
  102. kernel = self.__kernel_do(canonkeys_1, canonkeys_list[i])
  103. kernel_list[i] = kernel
  104. return kernel_list
  105. def _compute_kernel_list_imap_unordered(self, g1, g_list):
  106. self.__add_dummy_labels(g_list + [g1])
  107. # get all canonical keys of all graphs before calculating kernels to save
  108. # time, but this may cost a lot of memory for large dataset.
  109. canonkeys_1 = self.__get_canonkeys(g1)
  110. canonkeys_list = [[] for _ in range(len(g_list))]
  111. pool = Pool(self._n_jobs)
  112. itr = zip(g_list, range(0, len(g_list)))
  113. if len(g_list) < 100 * self._n_jobs:
  114. chunksize = int(len(g_list) / self._n_jobs) + 1
  115. else:
  116. chunksize = 100
  117. get_fun = self._wrapper_get_canonkeys
  118. if self._verbose >= 2:
  119. iterator = tqdm(pool.imap_unordered(get_fun, itr, chunksize),
  120. desc='getting canonkeys', file=sys.stdout)
  121. else:
  122. iterator = pool.imap_unordered(get_fun, itr, chunksize)
  123. for i, ck in iterator:
  124. canonkeys_list[i] = ck
  125. pool.close()
  126. pool.join()
  127. # compute kernel list.
  128. kernel_list = [None] * len(g_list)
  129. def init_worker(ck_1_toshare, ck_list_toshare):
  130. global G_ck_1, G_ck_list
  131. G_ck_1 = ck_1_toshare
  132. G_ck_list = ck_list_toshare
  133. do_fun = self._wrapper_kernel_list_do
  134. def func_assign(result, var_to_assign):
  135. var_to_assign[result[0]] = result[1]
  136. itr = range(len(g_list))
  137. len_itr = len(g_list)
  138. parallel_me(do_fun, func_assign, kernel_list, itr, len_itr=len_itr,
  139. init_worker=init_worker, glbv=(canonkeys_1, canonkeys_list), method='imap_unordered',
  140. n_jobs=self._n_jobs, itr_desc='calculating kernels', verbose=self._verbose)
  141. return kernel_list
  142. def _wrapper_kernel_list_do(self, itr):
  143. return itr, self.__kernel_do(G_ck_1, G_ck_list[itr])
  144. def _compute_single_kernel_series(self, g1, g2):
  145. self.__add_dummy_labels([g1] + [g2])
  146. canonkeys_1 = self.__get_canonkeys(g1)
  147. canonkeys_2 = self.__get_canonkeys(g2)
  148. kernel = self.__kernel_do(canonkeys_1, canonkeys_2)
  149. return kernel
  150. def __kernel_do(self, canonkey1, canonkey2):
  151. """Calculate treelet graph kernel between 2 graphs.
  152. Parameters
  153. ----------
  154. canonkey1, canonkey2 : list
  155. List of canonical keys in 2 graphs, where each key is represented by a string.
  156. Return
  157. ------
  158. kernel : float
  159. Treelet Kernel between 2 graphs.
  160. """
  161. keys = set(canonkey1.keys()) & set(canonkey2.keys()) # find same canonical keys in both graphs
  162. vector1 = np.array([(canonkey1[key] if (key in canonkey1.keys()) else 0) for key in keys])
  163. vector2 = np.array([(canonkey2[key] if (key in canonkey2.keys()) else 0) for key in keys])
  164. kernel = self.__sub_kernel(vector1, vector2)
  165. return kernel
  166. def _wrapper_kernel_do(self, itr):
  167. i = itr[0]
  168. j = itr[1]
  169. return i, j, self.__kernel_do(G_canonkeys[i], G_canonkeys[j])
  170. def __get_canonkeys(self, G):
  171. """Generate canonical keys of all treelets in a graph.
  172. Parameters
  173. ----------
  174. G : NetworkX graphs
  175. The graph in which keys are generated.
  176. Return
  177. ------
  178. canonkey/canonkey_l : dict
  179. For unlabeled graphs, canonkey is a dictionary which records amount of
  180. every tree pattern. For labeled graphs, canonkey_l is one which keeps
  181. track of amount of every treelet.
  182. """
  183. patterns = {} # a dictionary which consists of lists of patterns for all graphlet.
  184. canonkey = {} # canonical key, a dictionary which records amount of every tree pattern.
  185. ### structural analysis ###
  186. ### In this section, a list of patterns is generated for each graphlet,
  187. ### where every pattern is represented by nodes ordered by Morgan's
  188. ### extended labeling.
  189. # linear patterns
  190. patterns['0'] = list(G.nodes())
  191. canonkey['0'] = nx.number_of_nodes(G)
  192. for i in range(1, 6): # for i in range(1, 6):
  193. patterns[str(i)] = find_all_paths(G, i, self.__ds_infos['directed'])
  194. canonkey[str(i)] = len(patterns[str(i)])
  195. # n-star patterns
  196. patterns['3star'] = [[node] + [neighbor for neighbor in G[node]] for node in G.nodes() if G.degree(node) == 3]
  197. patterns['4star'] = [[node] + [neighbor for neighbor in G[node]] for node in G.nodes() if G.degree(node) == 4]
  198. patterns['5star'] = [[node] + [neighbor for neighbor in G[node]] for node in G.nodes() if G.degree(node) == 5]
  199. # n-star patterns
  200. canonkey['6'] = len(patterns['3star'])
  201. canonkey['8'] = len(patterns['4star'])
  202. canonkey['d'] = len(patterns['5star'])
  203. # pattern 7
  204. patterns['7'] = [] # the 1st line of Table 1 in Ref [1]
  205. for pattern in patterns['3star']:
  206. for i in range(1, len(pattern)): # for each neighbor of node 0
  207. if G.degree(pattern[i]) >= 2:
  208. pattern_t = pattern[:]
  209. # set the node with degree >= 2 as the 4th node
  210. pattern_t[i], pattern_t[3] = pattern_t[3], pattern_t[i]
  211. for neighborx in G[pattern[i]]:
  212. if neighborx != pattern[0]:
  213. new_pattern = pattern_t + [neighborx]
  214. patterns['7'].append(new_pattern)
  215. canonkey['7'] = len(patterns['7'])
  216. # pattern 11
  217. patterns['11'] = [] # the 4th line of Table 1 in Ref [1]
  218. for pattern in patterns['4star']:
  219. for i in range(1, len(pattern)):
  220. if G.degree(pattern[i]) >= 2:
  221. pattern_t = pattern[:]
  222. pattern_t[i], pattern_t[4] = pattern_t[4], pattern_t[i]
  223. for neighborx in G[pattern[i]]:
  224. if neighborx != pattern[0]:
  225. new_pattern = pattern_t + [neighborx]
  226. patterns['11'].append(new_pattern)
  227. canonkey['b'] = len(patterns['11'])
  228. # pattern 12
  229. patterns['12'] = [] # the 5th line of Table 1 in Ref [1]
  230. rootlist = [] # a list of root nodes, whose extended labels are 3
  231. for pattern in patterns['3star']:
  232. if pattern[0] not in rootlist: # prevent to count the same pattern twice from each of the two root nodes
  233. rootlist.append(pattern[0])
  234. for i in range(1, len(pattern)):
  235. if G.degree(pattern[i]) >= 3:
  236. rootlist.append(pattern[i])
  237. pattern_t = pattern[:]
  238. pattern_t[i], pattern_t[3] = pattern_t[3], pattern_t[i]
  239. for neighborx1 in G[pattern[i]]:
  240. if neighborx1 != pattern[0]:
  241. for neighborx2 in G[pattern[i]]:
  242. if neighborx1 > neighborx2 and neighborx2 != pattern[0]:
  243. new_pattern = pattern_t + [neighborx1] + [neighborx2]
  244. # new_patterns = [ pattern + [neighborx1] + [neighborx2] for neighborx1 in G[pattern[i]] if neighborx1 != pattern[0] for neighborx2 in G[pattern[i]] if (neighborx1 > neighborx2 and neighborx2 != pattern[0]) ]
  245. patterns['12'].append(new_pattern)
  246. canonkey['c'] = int(len(patterns['12']) / 2)
  247. # pattern 9
  248. patterns['9'] = [] # the 2nd line of Table 1 in Ref [1]
  249. for pattern in patterns['3star']:
  250. for pairs in [ [neighbor1, neighbor2] for neighbor1 in G[pattern[0]] if G.degree(neighbor1) >= 2 \
  251. for neighbor2 in G[pattern[0]] if G.degree(neighbor2) >= 2 if neighbor1 > neighbor2]:
  252. pattern_t = pattern[:]
  253. # move nodes with extended labels 4 to specific position to correspond to their children
  254. pattern_t[pattern_t.index(pairs[0])], pattern_t[2] = pattern_t[2], pattern_t[pattern_t.index(pairs[0])]
  255. pattern_t[pattern_t.index(pairs[1])], pattern_t[3] = pattern_t[3], pattern_t[pattern_t.index(pairs[1])]
  256. for neighborx1 in G[pairs[0]]:
  257. if neighborx1 != pattern[0]:
  258. for neighborx2 in G[pairs[1]]:
  259. if neighborx2 != pattern[0]:
  260. new_pattern = pattern_t + [neighborx1] + [neighborx2]
  261. patterns['9'].append(new_pattern)
  262. canonkey['9'] = len(patterns['9'])
  263. # pattern 10
  264. patterns['10'] = [] # the 3rd line of Table 1 in Ref [1]
  265. for pattern in patterns['3star']:
  266. for i in range(1, len(pattern)):
  267. if G.degree(pattern[i]) >= 2:
  268. for neighborx in G[pattern[i]]:
  269. if neighborx != pattern[0] and G.degree(neighborx) >= 2:
  270. pattern_t = pattern[:]
  271. pattern_t[i], pattern_t[3] = pattern_t[3], pattern_t[i]
  272. new_patterns = [ pattern_t + [neighborx] + [neighborxx] for neighborxx in G[neighborx] if neighborxx != pattern[i] ]
  273. patterns['10'].extend(new_patterns)
  274. canonkey['a'] = len(patterns['10'])
  275. ### labeling information ###
  276. ### In this section, a list of canonical keys is generated for every
  277. ### pattern obtained in the structural analysis section above, which is a
  278. ### string corresponding to a unique treelet. A dictionary is built to keep
  279. ### track of the amount of every treelet.
  280. if len(self.__node_labels) > 0 or len(self.__edge_labels) > 0:
  281. canonkey_l = {} # canonical key, a dictionary which keeps track of amount of every treelet.
  282. # linear patterns
  283. canonkey_t = Counter(get_mlti_dim_node_attrs(G, self.__node_labels))
  284. for key in canonkey_t:
  285. canonkey_l[('0', key)] = canonkey_t[key]
  286. for i in range(1, 6): # for i in range(1, 6):
  287. treelet = []
  288. for pattern in patterns[str(i)]:
  289. canonlist = []
  290. for idx, node in enumerate(pattern[:-1]):
  291. canonlist.append(tuple(G.nodes[node][nl] for nl in self.__node_labels))
  292. canonlist.append(tuple(G[node][pattern[idx+1]][el] for el in self.__edge_labels))
  293. canonlist.append(tuple(G.nodes[pattern[-1]][nl] for nl in self.__node_labels))
  294. canonkey_t = canonlist if canonlist < canonlist[::-1] else canonlist[::-1]
  295. treelet.append(tuple([str(i)] + canonkey_t))
  296. canonkey_l.update(Counter(treelet))
  297. # n-star patterns
  298. for i in range(3, 6):
  299. treelet = []
  300. for pattern in patterns[str(i) + 'star']:
  301. canonlist = []
  302. for leaf in pattern[1:]:
  303. nlabels = tuple(G.nodes[leaf][nl] for nl in self.__node_labels)
  304. elabels = tuple(G[leaf][pattern[0]][el] for el in self.__edge_labels)
  305. canonlist.append(tuple((nlabels, elabels)))
  306. canonlist.sort()
  307. canonlist = list(chain.from_iterable(canonlist))
  308. canonkey_t = tuple(['d' if i == 5 else str(i * 2)] +
  309. [tuple(G.nodes[pattern[0]][nl] for nl in self.__node_labels)]
  310. + canonlist)
  311. treelet.append(canonkey_t)
  312. canonkey_l.update(Counter(treelet))
  313. # pattern 7
  314. treelet = []
  315. for pattern in patterns['7']:
  316. canonlist = []
  317. for leaf in pattern[1:3]:
  318. nlabels = tuple(G.nodes[leaf][nl] for nl in self.__node_labels)
  319. elabels = tuple(G[leaf][pattern[0]][el] for el in self.__edge_labels)
  320. canonlist.append(tuple((nlabels, elabels)))
  321. canonlist.sort()
  322. canonlist = list(chain.from_iterable(canonlist))
  323. canonkey_t = tuple(['7']
  324. + [tuple(G.nodes[pattern[0]][nl] for nl in self.__node_labels)] + canonlist
  325. + [tuple(G.nodes[pattern[3]][nl] for nl in self.__node_labels)]
  326. + [tuple(G[pattern[3]][pattern[0]][el] for el in self.__edge_labels)]
  327. + [tuple(G.nodes[pattern[4]][nl] for nl in self.__node_labels)]
  328. + [tuple(G[pattern[4]][pattern[3]][el] for el in self.__edge_labels)])
  329. treelet.append(canonkey_t)
  330. canonkey_l.update(Counter(treelet))
  331. # pattern 11
  332. treelet = []
  333. for pattern in patterns['11']:
  334. canonlist = []
  335. for leaf in pattern[1:4]:
  336. nlabels = tuple(G.nodes[leaf][nl] for nl in self.__node_labels)
  337. elabels = tuple(G[leaf][pattern[0]][el] for el in self.__edge_labels)
  338. canonlist.append(tuple((nlabels, elabels)))
  339. canonlist.sort()
  340. canonlist = list(chain.from_iterable(canonlist))
  341. canonkey_t = tuple(['b']
  342. + [tuple(G.nodes[pattern[0]][nl] for nl in self.__node_labels)] + canonlist
  343. + [tuple(G.nodes[pattern[4]][nl] for nl in self.__node_labels)]
  344. + [tuple(G[pattern[4]][pattern[0]][el] for el in self.__edge_labels)]
  345. + [tuple(G.nodes[pattern[5]][nl] for nl in self.__node_labels)]
  346. + [tuple(G[pattern[5]][pattern[4]][el] for el in self.__edge_labels)])
  347. treelet.append(canonkey_t)
  348. canonkey_l.update(Counter(treelet))
  349. # pattern 10
  350. treelet = []
  351. for pattern in patterns['10']:
  352. canonkey4 = [tuple(G.nodes[pattern[5]][nl] for nl in self.__node_labels),
  353. tuple(G[pattern[5]][pattern[4]][el] for el in self.__edge_labels)]
  354. canonlist = []
  355. for leaf in pattern[1:3]:
  356. nlabels = tuple(G.nodes[leaf][nl] for nl in self.__node_labels)
  357. elabels = tuple(G[leaf][pattern[0]][el] for el in self.__edge_labels)
  358. canonlist.append(tuple((nlabels, elabels)))
  359. canonlist.sort()
  360. canonkey0 = list(chain.from_iterable(canonlist))
  361. canonkey_t = tuple(['a']
  362. + [tuple(G.nodes[pattern[3]][nl] for nl in self.__node_labels)]
  363. + [tuple(G.nodes[pattern[4]][nl] for nl in self.__node_labels)]
  364. + [tuple(G[pattern[4]][pattern[3]][el] for el in self.__edge_labels)]
  365. + [tuple(G.nodes[pattern[0]][nl] for nl in self.__node_labels)]
  366. + [tuple(G[pattern[0]][pattern[3]][el] for el in self.__edge_labels)]
  367. + canonkey4 + canonkey0)
  368. treelet.append(canonkey_t)
  369. canonkey_l.update(Counter(treelet))
  370. # pattern 12
  371. treelet = []
  372. for pattern in patterns['12']:
  373. canonlist0 = []
  374. for leaf in pattern[1:3]:
  375. nlabels = tuple(G.nodes[leaf][nl] for nl in self.__node_labels)
  376. elabels = tuple(G[leaf][pattern[0]][el] for el in self.__edge_labels)
  377. canonlist0.append(tuple((nlabels, elabels)))
  378. canonlist0.sort()
  379. canonlist0 = list(chain.from_iterable(canonlist0))
  380. canonlist3 = []
  381. for leaf in pattern[4:6]:
  382. nlabels = tuple(G.nodes[leaf][nl] for nl in self.__node_labels)
  383. elabels = tuple(G[leaf][pattern[3]][el] for el in self.__edge_labels)
  384. canonlist3.append(tuple((nlabels, elabels)))
  385. canonlist3.sort()
  386. canonlist3 = list(chain.from_iterable(canonlist3))
  387. # 2 possible key can be generated from 2 nodes with extended label 3,
  388. # select the one with lower lexicographic order.
  389. canonkey_t1 = tuple(['c']
  390. + [tuple(G.nodes[pattern[0]][nl] for nl in self.__node_labels)] + canonlist0
  391. + [tuple(G.nodes[pattern[3]][nl] for nl in self.__node_labels)]
  392. + [tuple(G[pattern[3]][pattern[0]][el] for el in self.__edge_labels)]
  393. + canonlist3)
  394. canonkey_t2 = tuple(['c']
  395. + [tuple(G.nodes[pattern[3]][nl] for nl in self.__node_labels)] + canonlist3
  396. + [tuple(G.nodes[pattern[0]][nl] for nl in self.__node_labels)]
  397. + [tuple(G[pattern[0]][pattern[3]][el] for el in self.__edge_labels)]
  398. + canonlist0)
  399. treelet.append(canonkey_t1 if canonkey_t1 < canonkey_t2 else canonkey_t2)
  400. canonkey_l.update(Counter(treelet))
  401. # pattern 9
  402. treelet = []
  403. for pattern in patterns['9']:
  404. canonkey2 = [tuple(G.nodes[pattern[4]][nl] for nl in self.__node_labels),
  405. tuple(G[pattern[4]][pattern[2]][el] for el in self.__edge_labels)]
  406. canonkey3 = [tuple(G.nodes[pattern[5]][nl] for nl in self.__node_labels),
  407. tuple(G[pattern[5]][pattern[3]][el] for el in self.__edge_labels)]
  408. prekey2 = [tuple(G.nodes[pattern[2]][nl] for nl in self.__node_labels),
  409. tuple(G[pattern[2]][pattern[0]][el] for el in self.__edge_labels)]
  410. prekey3 = [tuple(G.nodes[pattern[3]][nl] for nl in self.__node_labels),
  411. tuple(G[pattern[3]][pattern[0]][el] for el in self.__edge_labels)]
  412. if prekey2 + canonkey2 < prekey3 + canonkey3:
  413. canonkey_t = [tuple(G.nodes[pattern[1]][nl] for nl in self.__node_labels)] \
  414. + [tuple(G[pattern[1]][pattern[0]][el] for el in self.__edge_labels)] \
  415. + prekey2 + prekey3 + canonkey2 + canonkey3
  416. else:
  417. canonkey_t = [tuple(G.nodes[pattern[1]][nl] for nl in self.__node_labels)] \
  418. + [tuple(G[pattern[1]][pattern[0]][el] for el in self.__edge_labels)] \
  419. + prekey3 + prekey2 + canonkey3 + canonkey2
  420. treelet.append(tuple(['9']
  421. + [tuple(G.nodes[pattern[0]][nl] for nl in self.__node_labels)]
  422. + canonkey_t))
  423. canonkey_l.update(Counter(treelet))
  424. return canonkey_l
  425. return canonkey
  426. def _wrapper_get_canonkeys(self, itr_item):
  427. g = itr_item[0]
  428. i = itr_item[1]
  429. return i, self.__get_canonkeys(g)
  430. def __add_dummy_labels(self, Gn):
  431. if len(self.__node_labels) == 0:
  432. for G in Gn:
  433. nx.set_node_attributes(G, '0', 'dummy')
  434. self.__node_labels.append('dummy')
  435. if len(self.__edge_labels) == 0:
  436. for G in Gn:
  437. nx.set_edge_attributes(G, '0', 'dummy')
  438. self.__edge_labels.append('dummy')

A Python package for graph kernels, graph edit distances and graph pre-image problem.