You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

path_up_to_h.py 20 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603
  1. #!/usr/bin/env python3
  2. # -*- coding: utf-8 -*-
  3. """
  4. Created on Fri Apr 10 18:33:13 2020
  5. @author: ljia
  6. @references:
  7. [1] Liva Ralaivola, Sanjay J Swamidass, Hiroto Saigo, and Pierre
  8. Baldi. Graph kernels for chemical informatics. Neural networks,
  9. 18(8):1093–1110, 2005.
  10. """
  11. import sys
  12. from multiprocessing import Pool
  13. from tqdm import tqdm
  14. import numpy as np
  15. import networkx as nx
  16. from collections import Counter
  17. from functools import partial
  18. from gklearn.utils import SpecialLabel
  19. from gklearn.utils.parallel import parallel_gm, parallel_me
  20. from gklearn.kernels import GraphKernel
  21. from gklearn.utils import Trie
  22. class PathUpToH(GraphKernel): # @todo: add function for k_func is None
  23. def __init__(self, **kwargs):
  24. GraphKernel.__init__(self)
  25. self._node_labels = kwargs.get('node_labels', [])
  26. self._edge_labels = kwargs.get('edge_labels', [])
  27. self._depth = int(kwargs.get('depth', 10))
  28. self._k_func = kwargs.get('k_func', 'MinMax')
  29. self._compute_method = kwargs.get('compute_method', 'trie')
  30. self._ds_infos = kwargs.get('ds_infos', {})
  31. def _compute_gm_series(self):
  32. self._add_dummy_labels(self._graphs)
  33. from itertools import combinations_with_replacement
  34. itr_kernel = combinations_with_replacement(range(0, len(self._graphs)), 2)
  35. if self._verbose >= 2:
  36. iterator_ps = tqdm(range(0, len(self._graphs)), desc='getting paths', file=sys.stdout)
  37. iterator_kernel = tqdm(itr_kernel, desc='Computing kernels', file=sys.stdout)
  38. else:
  39. iterator_ps = range(0, len(self._graphs))
  40. iterator_kernel = itr_kernel
  41. gram_matrix = np.zeros((len(self._graphs), len(self._graphs)))
  42. if self._compute_method == 'trie':
  43. all_paths = [self._find_all_path_as_trie(self._graphs[i]) for i in iterator_ps]
  44. for i, j in iterator_kernel:
  45. kernel = self._kernel_do_trie(all_paths[i], all_paths[j])
  46. gram_matrix[i][j] = kernel
  47. gram_matrix[j][i] = kernel
  48. else:
  49. all_paths = [self._find_all_paths_until_length(self._graphs[i]) for i in iterator_ps]
  50. for i, j in iterator_kernel:
  51. kernel = self._kernel_do_naive(all_paths[i], all_paths[j])
  52. gram_matrix[i][j] = kernel
  53. gram_matrix[j][i] = kernel
  54. return gram_matrix
  55. def _compute_gm_imap_unordered(self):
  56. self._add_dummy_labels(self._graphs)
  57. # get all paths of all graphs before computing kernels to save time,
  58. # but this may cost a lot of memory for large datasets.
  59. pool = Pool(self._n_jobs)
  60. itr = zip(self._graphs, range(0, len(self._graphs)))
  61. if len(self._graphs) < 100 * self._n_jobs:
  62. chunksize = int(len(self._graphs) / self._n_jobs) + 1
  63. else:
  64. chunksize = 100
  65. all_paths = [[] for _ in range(len(self._graphs))]
  66. if self._compute_method == 'trie' and self._k_func is not None:
  67. get_ps_fun = self._wrapper_find_all_path_as_trie
  68. elif self._compute_method != 'trie' and self._k_func is not None:
  69. get_ps_fun = partial(self._wrapper_find_all_paths_until_length, True)
  70. else:
  71. get_ps_fun = partial(self._wrapper_find_all_paths_until_length, False)
  72. if self._verbose >= 2:
  73. iterator = tqdm(pool.imap_unordered(get_ps_fun, itr, chunksize),
  74. desc='getting paths', file=sys.stdout)
  75. else:
  76. iterator = pool.imap_unordered(get_ps_fun, itr, chunksize)
  77. for i, ps in iterator:
  78. all_paths[i] = ps
  79. pool.close()
  80. pool.join()
  81. # compute Gram matrix.
  82. gram_matrix = np.zeros((len(self._graphs), len(self._graphs)))
  83. if self._compute_method == 'trie' and self._k_func is not None:
  84. def init_worker(trie_toshare):
  85. global G_trie
  86. G_trie = trie_toshare
  87. do_fun = self._wrapper_kernel_do_trie
  88. elif self._compute_method != 'trie' and self._k_func is not None:
  89. def init_worker(plist_toshare):
  90. global G_plist
  91. G_plist = plist_toshare
  92. do_fun = self._wrapper_kernel_do_naive
  93. else:
  94. def init_worker(plist_toshare):
  95. global G_plist
  96. G_plist = plist_toshare
  97. do_fun = self._wrapper_kernel_do_kernelless # @todo: what is this?
  98. parallel_gm(do_fun, gram_matrix, self._graphs, init_worker=init_worker,
  99. glbv=(all_paths,), n_jobs=self._n_jobs, verbose=self._verbose)
  100. return gram_matrix
  101. def _compute_kernel_list_series(self, g1, g_list):
  102. self._add_dummy_labels(g_list + [g1])
  103. if self._verbose >= 2:
  104. iterator_ps = tqdm(g_list, desc='getting paths', file=sys.stdout)
  105. iterator_kernel = tqdm(range(len(g_list)), desc='Computing kernels', file=sys.stdout)
  106. else:
  107. iterator_ps = g_list
  108. iterator_kernel = range(len(g_list))
  109. kernel_list = [None] * len(g_list)
  110. if self._compute_method == 'trie':
  111. paths_g1 = self._find_all_path_as_trie(g1)
  112. paths_g_list = [self._find_all_path_as_trie(g) for g in iterator_ps]
  113. for i in iterator_kernel:
  114. kernel = self._kernel_do_trie(paths_g1, paths_g_list[i])
  115. kernel_list[i] = kernel
  116. else:
  117. paths_g1 = self._find_all_paths_until_length(g1)
  118. paths_g_list = [self._find_all_paths_until_length(g) for g in iterator_ps]
  119. for i in iterator_kernel:
  120. kernel = self._kernel_do_naive(paths_g1, paths_g_list[i])
  121. kernel_list[i] = kernel
  122. return kernel_list
  123. def _compute_kernel_list_imap_unordered(self, g1, g_list):
  124. self._add_dummy_labels(g_list + [g1])
  125. # get all paths of all graphs before computing kernels to save time,
  126. # but this may cost a lot of memory for large datasets.
  127. pool = Pool(self._n_jobs)
  128. itr = zip(g_list, range(0, len(g_list)))
  129. if len(g_list) < 100 * self._n_jobs:
  130. chunksize = int(len(g_list) / self._n_jobs) + 1
  131. else:
  132. chunksize = 100
  133. paths_g_list = [[] for _ in range(len(g_list))]
  134. if self._compute_method == 'trie' and self._k_func is not None:
  135. paths_g1 = self._find_all_path_as_trie(g1)
  136. get_ps_fun = self._wrapper_find_all_path_as_trie
  137. elif self._compute_method != 'trie' and self._k_func is not None:
  138. paths_g1 = self._find_all_paths_until_length(g1)
  139. get_ps_fun = partial(self._wrapper_find_all_paths_until_length, True)
  140. else:
  141. paths_g1 = self._find_all_paths_until_length(g1)
  142. get_ps_fun = partial(self._wrapper_find_all_paths_until_length, False)
  143. if self._verbose >= 2:
  144. iterator = tqdm(pool.imap_unordered(get_ps_fun, itr, chunksize),
  145. desc='getting paths', file=sys.stdout)
  146. else:
  147. iterator = pool.imap_unordered(get_ps_fun, itr, chunksize)
  148. for i, ps in iterator:
  149. paths_g_list[i] = ps
  150. pool.close()
  151. pool.join()
  152. # compute kernel list.
  153. kernel_list = [None] * len(g_list)
  154. def init_worker(p1_toshare, plist_toshare):
  155. global G_p1, G_plist
  156. G_p1 = p1_toshare
  157. G_plist = plist_toshare
  158. do_fun = self._wrapper_kernel_list_do
  159. def func_assign(result, var_to_assign):
  160. var_to_assign[result[0]] = result[1]
  161. itr = range(len(g_list))
  162. len_itr = len(g_list)
  163. parallel_me(do_fun, func_assign, kernel_list, itr, len_itr=len_itr,
  164. init_worker=init_worker, glbv=(paths_g1, paths_g_list), method='imap_unordered', n_jobs=self._n_jobs, itr_desc='Computing kernels', verbose=self._verbose)
  165. return kernel_list
  166. def _wrapper_kernel_list_do(self, itr):
  167. if self._compute_method == 'trie' and self._k_func is not None:
  168. return itr, self._kernel_do_trie(G_p1, G_plist[itr])
  169. elif self._compute_method != 'trie' and self._k_func is not None:
  170. return itr, self._kernel_do_naive(G_p1, G_plist[itr])
  171. else:
  172. return itr, self._kernel_do_kernelless(G_p1, G_plist[itr])
  173. def _compute_single_kernel_series(self, g1, g2):
  174. self._add_dummy_labels([g1] + [g2])
  175. if self._compute_method == 'trie':
  176. paths_g1 = self._find_all_path_as_trie(g1)
  177. paths_g2 = self._find_all_path_as_trie(g2)
  178. kernel = self._kernel_do_trie(paths_g1, paths_g2)
  179. else:
  180. paths_g1 = self._find_all_paths_until_length(g1)
  181. paths_g2 = self._find_all_paths_until_length(g2)
  182. kernel = self._kernel_do_naive(paths_g1, paths_g2)
  183. return kernel
  184. def _kernel_do_trie(self, trie1, trie2):
  185. """Compute path graph kernels up to depth d between 2 graphs using trie.
  186. Parameters
  187. ----------
  188. trie1, trie2 : list
  189. Tries that contains all paths in 2 graphs.
  190. k_func : function
  191. A kernel function applied using different notions of fingerprint
  192. similarity.
  193. Return
  194. ------
  195. kernel : float
  196. Path kernel up to h between 2 graphs.
  197. """
  198. if self._k_func == 'tanimoto':
  199. # traverse all paths in graph1 and search them in graph2. Deep-first
  200. # search is applied.
  201. def traverseTrie1t(root, trie2, setlist, pcurrent=[]):
  202. for key, node in root['children'].items():
  203. pcurrent.append(key)
  204. if node['isEndOfWord']:
  205. setlist[1] += 1
  206. count2 = trie2.searchWord(pcurrent)
  207. if count2 != 0:
  208. setlist[0] += 1
  209. if node['children'] != {}:
  210. traverseTrie1t(node, trie2, setlist, pcurrent)
  211. else:
  212. del pcurrent[-1]
  213. if pcurrent != []:
  214. del pcurrent[-1]
  215. # traverse all paths in graph2 and find out those that are not in
  216. # graph1. Deep-first search is applied.
  217. def traverseTrie2t(root, trie1, setlist, pcurrent=[]):
  218. for key, node in root['children'].items():
  219. pcurrent.append(key)
  220. if node['isEndOfWord']:
  221. # print(node['count'])
  222. count1 = trie1.searchWord(pcurrent)
  223. if count1 == 0:
  224. setlist[1] += 1
  225. if node['children'] != {}:
  226. traverseTrie2t(node, trie1, setlist, pcurrent)
  227. else:
  228. del pcurrent[-1]
  229. if pcurrent != []:
  230. del pcurrent[-1]
  231. setlist = [0, 0] # intersection and union of path sets of g1, g2.
  232. # print(trie1.root)
  233. # print(trie2.root)
  234. traverseTrie1t(trie1.root, trie2, setlist)
  235. # print(setlist)
  236. traverseTrie2t(trie2.root, trie1, setlist)
  237. # print(setlist)
  238. kernel = setlist[0] / setlist[1]
  239. elif self._k_func == 'MinMax': # MinMax kernel
  240. # traverse all paths in graph1 and search them in graph2. Deep-first
  241. # search is applied.
  242. def traverseTrie1m(root, trie2, sumlist, pcurrent=[]):
  243. for key, node in root['children'].items():
  244. pcurrent.append(key)
  245. if node['isEndOfWord']:
  246. # print(node['count'])
  247. count1 = node['count']
  248. count2 = trie2.searchWord(pcurrent)
  249. sumlist[0] += min(count1, count2)
  250. sumlist[1] += max(count1, count2)
  251. if node['children'] != {}:
  252. traverseTrie1m(node, trie2, sumlist, pcurrent)
  253. else:
  254. del pcurrent[-1]
  255. if pcurrent != []:
  256. del pcurrent[-1]
  257. # traverse all paths in graph2 and find out those that are not in
  258. # graph1. Deep-first search is applied.
  259. def traverseTrie2m(root, trie1, sumlist, pcurrent=[]):
  260. for key, node in root['children'].items():
  261. pcurrent.append(key)
  262. if node['isEndOfWord']:
  263. # print(node['count'])
  264. count1 = trie1.searchWord(pcurrent)
  265. if count1 == 0:
  266. sumlist[1] += node['count']
  267. if node['children'] != {}:
  268. traverseTrie2m(node, trie1, sumlist, pcurrent)
  269. else:
  270. del pcurrent[-1]
  271. if pcurrent != []:
  272. del pcurrent[-1]
  273. sumlist = [0, 0] # sum of mins and sum of maxs
  274. # print(trie1.root)
  275. # print(trie2.root)
  276. traverseTrie1m(trie1.root, trie2, sumlist)
  277. # print(sumlist)
  278. traverseTrie2m(trie2.root, trie1, sumlist)
  279. # print(sumlist)
  280. kernel = sumlist[0] / sumlist[1]
  281. else:
  282. raise Exception('The given "k_func" cannot be recognized. Possible choices include: "tanimoto", "MinMax".')
  283. return kernel
  284. def _wrapper_kernel_do_trie(self, itr):
  285. i = itr[0]
  286. j = itr[1]
  287. return i, j, self._kernel_do_trie(G_trie[i], G_trie[j])
  288. def _kernel_do_naive(self, paths1, paths2):
  289. """Compute path graph kernels up to depth d between 2 graphs naively.
  290. Parameters
  291. ----------
  292. paths_list : list of list
  293. List of list of paths in all graphs, where for unlabeled graphs, each
  294. path is represented by a list of nodes; while for labeled graphs, each
  295. path is represented by a string consists of labels of nodes and/or
  296. edges on that path.
  297. k_func : function
  298. A kernel function applied using different notions of fingerprint
  299. similarity.
  300. Return
  301. ------
  302. kernel : float
  303. Path kernel up to h between 2 graphs.
  304. """
  305. all_paths = list(set(paths1 + paths2))
  306. if self._k_func == 'tanimoto':
  307. length_union = len(set(paths1 + paths2))
  308. kernel = (len(set(paths1)) + len(set(paths2)) -
  309. length_union) / length_union
  310. # vector1 = [(1 if path in paths1 else 0) for path in all_paths]
  311. # vector2 = [(1 if path in paths2 else 0) for path in all_paths]
  312. # kernel_uv = np.dot(vector1, vector2)
  313. # kernel = kernel_uv / (len(set(paths1)) + len(set(paths2)) - kernel_uv)
  314. elif self._k_func == 'MinMax': # MinMax kernel
  315. path_count1 = Counter(paths1)
  316. path_count2 = Counter(paths2)
  317. vector1 = [(path_count1[key] if (key in path_count1.keys()) else 0)
  318. for key in all_paths]
  319. vector2 = [(path_count2[key] if (key in path_count2.keys()) else 0)
  320. for key in all_paths]
  321. kernel = np.sum(np.minimum(vector1, vector2)) / \
  322. np.sum(np.maximum(vector1, vector2))
  323. elif self._k_func is None: # no sub-kernel used; compare paths directly.
  324. path_count1 = Counter(paths1)
  325. path_count2 = Counter(paths2)
  326. vector1 = [(path_count1[key] if (key in path_count1.keys()) else 0)
  327. for key in all_paths]
  328. vector2 = [(path_count2[key] if (key in path_count2.keys()) else 0)
  329. for key in all_paths]
  330. kernel = np.dot(vector1, vector2)
  331. else:
  332. raise Exception('The given "k_func" cannot be recognized. Possible choices include: "tanimoto", "MinMax" and None.')
  333. return kernel
  334. def _wrapper_kernel_do_naive(self, itr):
  335. i = itr[0]
  336. j = itr[1]
  337. return i, j, self._kernel_do_naive(G_plist[i], G_plist[j])
  338. def _find_all_path_as_trie(self, G):
  339. # all_path = find_all_paths_until_length(G, length, ds_attrs,
  340. # node_label=node_label,
  341. # edge_label=edge_label)
  342. # ptrie = Trie()
  343. # for path in all_path:
  344. # ptrie.insertWord(path)
  345. # ptrie = Trie()
  346. # path_l = [[n] for n in G.nodes] # paths of length l
  347. # path_l_str = paths2labelseqs(path_l, G, ds_attrs, node_label, edge_label)
  348. # for p in path_l_str:
  349. # ptrie.insertWord(p)
  350. # for l in range(1, length + 1):
  351. # path_lplus1 = []
  352. # for path in path_l:
  353. # for neighbor in G[path[-1]]:
  354. # if neighbor not in path:
  355. # tmp = path + [neighbor]
  356. ## if tmp[::-1] not in path_lplus1:
  357. # path_lplus1.append(tmp)
  358. # path_l = path_lplus1[:]
  359. # # consider labels
  360. # path_l_str = paths2labelseqs(path_l, G, ds_attrs, node_label, edge_label)
  361. # for p in path_l_str:
  362. # ptrie.insertWord(p)
  363. #
  364. # print(time.time() - time1)
  365. # print(ptrie.root)
  366. # print()
  367. # traverse all paths up to length h in a graph and construct a trie with
  368. # them. Deep-first search is applied. Notice the reverse of each path is
  369. # also stored to the trie.
  370. def traverseGraph(root, ptrie, G, pcurrent=[]):
  371. if len(pcurrent) < self._depth + 1:
  372. for neighbor in G[root]:
  373. if neighbor not in pcurrent:
  374. pcurrent.append(neighbor)
  375. plstr = self._paths2labelseqs([pcurrent], G)
  376. ptrie.insertWord(plstr[0])
  377. traverseGraph(neighbor, ptrie, G, pcurrent)
  378. del pcurrent[-1]
  379. ptrie = Trie()
  380. path_l = [[n] for n in G.nodes] # paths of length l
  381. path_l_str = self._paths2labelseqs(path_l, G)
  382. for p in path_l_str:
  383. ptrie.insertWord(p)
  384. for n in G.nodes:
  385. traverseGraph(n, ptrie, G, pcurrent=[n])
  386. # def traverseGraph(root, all_paths, length, G, ds_attrs, node_label, edge_label,
  387. # pcurrent=[]):
  388. # if len(pcurrent) < length + 1:
  389. # for neighbor in G[root]:
  390. # if neighbor not in pcurrent:
  391. # pcurrent.append(neighbor)
  392. # plstr = paths2labelseqs([pcurrent], G, ds_attrs,
  393. # node_label, edge_label)
  394. # all_paths.append(pcurrent[:])
  395. # traverseGraph(neighbor, all_paths, length, G, ds_attrs,
  396. # node_label, edge_label, pcurrent)
  397. # del pcurrent[-1]
  398. #
  399. #
  400. # path_l = [[n] for n in G.nodes] # paths of length l
  401. # all_paths = path_l[:]
  402. # path_l_str = paths2labelseqs(path_l, G, ds_attrs, node_label, edge_label)
  403. ## for p in path_l_str:
  404. ## ptrie.insertWord(p)
  405. # for n in G.nodes:
  406. # traverseGraph(n, all_paths, length, G, ds_attrs, node_label, edge_label,
  407. # pcurrent=[n])
  408. # print(ptrie.root)
  409. return ptrie
  410. def _wrapper_find_all_path_as_trie(self, itr_item):
  411. g = itr_item[0]
  412. i = itr_item[1]
  413. return i, self._find_all_path_as_trie(g)
  414. # @todo: (can be removed maybe) this method find paths repetively, it could be faster.
  415. def _find_all_paths_until_length(self, G, tolabelseqs=True):
  416. """Find all paths no longer than a certain maximum length in a graph. A
  417. recursive depth first search is applied.
  418. Parameters
  419. ----------
  420. G : NetworkX graphs
  421. The graph in which paths are searched.
  422. length : integer
  423. The maximum length of paths.
  424. ds_attrs: dict
  425. Dataset attributes.
  426. node_label : string
  427. Node attribute used as label. The default node label is atom.
  428. edge_label : string
  429. Edge attribute used as label. The default edge label is bond_type.
  430. Return
  431. ------
  432. path : list
  433. List of paths retrieved, where for unlabeled graphs, each path is
  434. represented by a list of nodes; while for labeled graphs, each path is
  435. represented by a list of strings consists of labels of nodes and/or
  436. edges on that path.
  437. """
  438. # path_l = [tuple([n]) for n in G.nodes] # paths of length l
  439. # all_paths = path_l[:]
  440. # for l in range(1, self._depth + 1):
  441. # path_l_new = []
  442. # for path in path_l:
  443. # for neighbor in G[path[-1]]:
  444. # if len(path) < 2 or neighbor != path[-2]:
  445. # tmp = path + (neighbor, )
  446. # if tuple(tmp[::-1]) not in path_l_new:
  447. # path_l_new.append(tuple(tmp))
  448. # all_paths += path_l_new
  449. # path_l = path_l_new[:]
  450. path_l = [[n] for n in G.nodes] # paths of length l
  451. all_paths = [p.copy() for p in path_l]
  452. for l in range(1, self._depth + 1):
  453. path_lplus1 = []
  454. for path in path_l:
  455. for neighbor in G[path[-1]]:
  456. if neighbor not in path:
  457. tmp = path + [neighbor]
  458. # if tmp[::-1] not in path_lplus1:
  459. path_lplus1.append(tmp)
  460. all_paths += path_lplus1
  461. path_l = [p.copy() for p in path_lplus1]
  462. # for i in range(0, self._depth + 1):
  463. # new_paths = find_all_paths(G, i)
  464. # if new_paths == []:
  465. # break
  466. # all_paths.extend(new_paths)
  467. # consider labels
  468. # print(paths2labelseqs(all_paths, G, ds_attrs, node_label, edge_label))
  469. # print()
  470. return (self._paths2labelseqs(all_paths, G) if tolabelseqs else all_paths)
  471. def _wrapper_find_all_paths_until_length(self, tolabelseqs, itr_item):
  472. g = itr_item[0]
  473. i = itr_item[1]
  474. return i, self._find_all_paths_until_length(g, tolabelseqs=tolabelseqs)
  475. def _paths2labelseqs(self, plist, G):
  476. if len(self._node_labels) > 0:
  477. if len(self._edge_labels) > 0:
  478. path_strs = []
  479. for path in plist:
  480. pths_tmp = []
  481. for idx, node in enumerate(path[:-1]):
  482. pths_tmp.append(tuple(G.nodes[node][nl] for nl in self._node_labels))
  483. pths_tmp.append(tuple(G[node][path[idx + 1]][el] for el in self._edge_labels))
  484. pths_tmp.append(tuple(G.nodes[path[-1]][nl] for nl in self._node_labels))
  485. path_strs.append(tuple(pths_tmp))
  486. else:
  487. path_strs = []
  488. for path in plist:
  489. pths_tmp = []
  490. for node in path:
  491. pths_tmp.append(tuple(G.nodes[node][nl] for nl in self._node_labels))
  492. path_strs.append(tuple(pths_tmp))
  493. return path_strs
  494. else:
  495. if len(self._edge_labels) > 0:
  496. path_strs = []
  497. for path in plist:
  498. if len(path) == 1:
  499. path_strs.append(tuple())
  500. else:
  501. pths_tmp = []
  502. for idx, node in enumerate(path[:-1]):
  503. pths_tmp.append(tuple(G[node][path[idx + 1]][el] for el in self._edge_labels))
  504. path_strs.append(tuple(pths_tmp))
  505. return path_strs
  506. else:
  507. return [tuple(['0' for node in path]) for path in plist]
  508. # return [tuple([len(path)]) for path in all_paths]
  509. def _add_dummy_labels(self, Gn):
  510. if self._k_func is not None:
  511. if len(self._node_labels) == 0 or (len(self._node_labels) == 1 and self._node_labels[0] == SpecialLabel.DUMMY):
  512. for i in range(len(Gn)):
  513. nx.set_node_attributes(Gn[i], '0', SpecialLabel.DUMMY)
  514. self._node_labels = [SpecialLabel.DUMMY]
  515. if len(self._edge_labels) == 0 or (len(self._edge_labels) == 1 and self._edge_labels[0] == SpecialLabel.DUMMY):
  516. for i in range(len(Gn)):
  517. nx.set_edge_attributes(Gn[i], '0', SpecialLabel.DUMMY)
  518. self._edge_labels = [SpecialLabel.DUMMY]

A Python package for graph kernels, graph edit distances and graph pre-image problem.