You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

common_walk.py 8.8 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293
  1. #!/usr/bin/env python3
  2. # -*- coding: utf-8 -*-
  3. """
  4. Created on Tue Aug 18 11:21:31 2020
  5. @author: ljia
  6. @references:
  7. [1] Thomas Gärtner, Peter Flach, and Stefan Wrobel. On graph kernels:
  8. Hardness results and efficient alternatives. Learning Theory and Kernel
  9. Machines, pages 129–143, 2003.
  10. """
  11. import sys
  12. from gklearn.utils import get_iters
  13. import numpy as np
  14. import networkx as nx
  15. from gklearn.utils import SpecialLabel
  16. from gklearn.utils.parallel import parallel_gm, parallel_me
  17. from gklearn.utils.utils import direct_product_graph
  18. from gklearn.kernels import GraphKernel
  19. class CommonWalk(GraphKernel):
  20. def __init__(self, **kwargs):
  21. GraphKernel.__init__(self)
  22. self._node_labels = kwargs.get('node_labels', [])
  23. self._edge_labels = kwargs.get('edge_labels', [])
  24. self._weight = kwargs.get('weight', 1)
  25. self._compute_method = kwargs.get('compute_method', None)
  26. self._ds_infos = kwargs.get('ds_infos', {})
  27. self._compute_method = self._compute_method.lower()
  28. def _compute_gm_series(self):
  29. self._check_graphs(self._graphs)
  30. self._add_dummy_labels(self._graphs)
  31. if not self._ds_infos['directed']: # convert
  32. self._graphs = [G.to_directed() for G in self._graphs]
  33. # compute Gram matrix.
  34. gram_matrix = np.zeros((len(self._graphs), len(self._graphs)))
  35. from itertools import combinations_with_replacement
  36. itr = combinations_with_replacement(range(0, len(self._graphs)), 2)
  37. len_itr = int(len(self._graphs) * (len(self._graphs) + 1) / 2)
  38. iterator = get_iters(itr, desc='Computing kernels', file=sys.stdout,
  39. length=len_itr, verbose=(self.verbose >= 2))
  40. # direct product graph method - exponential
  41. if self._compute_method == 'exp':
  42. for i, j in iterator:
  43. kernel = self._kernel_do_exp(self._graphs[i], self._graphs[j], self._weight)
  44. gram_matrix[i][j] = kernel
  45. gram_matrix[j][i] = kernel
  46. # direct product graph method - geometric
  47. elif self._compute_method == 'geo':
  48. for i, j in iterator:
  49. kernel = self._kernel_do_geo(self._graphs[i], self._graphs[j], self._weight)
  50. gram_matrix[i][j] = kernel
  51. gram_matrix[j][i] = kernel
  52. return gram_matrix
  53. def _compute_gm_imap_unordered(self):
  54. self._check_graphs(self._graphs)
  55. self._add_dummy_labels(self._graphs)
  56. if not self._ds_infos['directed']: # convert
  57. self._graphs = [G.to_directed() for G in self._graphs]
  58. # compute Gram matrix.
  59. gram_matrix = np.zeros((len(self._graphs), len(self._graphs)))
  60. # def init_worker(gn_toshare):
  61. # global G_gn
  62. # G_gn = gn_toshare
  63. # direct product graph method - exponential
  64. if self._compute_method == 'exp':
  65. do_fun = self._wrapper_kernel_do_exp
  66. # direct product graph method - geometric
  67. elif self._compute_method == 'geo':
  68. do_fun = self._wrapper_kernel_do_geo
  69. parallel_gm(do_fun, gram_matrix, self._graphs, init_worker=_init_worker_gm,
  70. glbv=(self._graphs,), n_jobs=self.n_jobs, verbose=self.verbose)
  71. return gram_matrix
  72. def _compute_kernel_list_series(self, g1, g_list):
  73. self._check_graphs(g_list + [g1])
  74. self._add_dummy_labels(g_list + [g1])
  75. if not self._ds_infos['directed']: # convert
  76. g1 = g1.to_directed()
  77. g_list = [G.to_directed() for G in g_list]
  78. # compute kernel list.
  79. kernel_list = [None] * len(g_list)
  80. if self.verbose >= 2:
  81. iterator = get_iters(range(len(g_list)), desc='Computing kernels',
  82. file=sys.stdout, length=len(g_list), verbose=(self.verbose >= 2))
  83. else:
  84. iterator = range(len(g_list))
  85. # direct product graph method - exponential
  86. if self._compute_method == 'exp':
  87. for i in iterator:
  88. kernel = self._kernel_do_exp(g1, g_list[i], self._weight)
  89. kernel_list[i] = kernel
  90. # direct product graph method - geometric
  91. elif self._compute_method == 'geo':
  92. for i in iterator:
  93. kernel = self._kernel_do_geo(g1, g_list[i], self._weight)
  94. kernel_list[i] = kernel
  95. return kernel_list
  96. def _compute_kernel_list_imap_unordered(self, g1, g_list):
  97. self._check_graphs(g_list + [g1])
  98. self._add_dummy_labels(g_list + [g1])
  99. if not self._ds_infos['directed']: # convert
  100. g1 = g1.to_directed()
  101. g_list = [G.to_directed() for G in g_list]
  102. # compute kernel list.
  103. kernel_list = [None] * len(g_list)
  104. # def init_worker(g1_toshare, g_list_toshare):
  105. # global G_g1, G_g_list
  106. # G_g1 = g1_toshare
  107. # G_g_list = g_list_toshare
  108. # direct product graph method - exponential
  109. if self._compute_method == 'exp':
  110. do_fun = self._wrapper_kernel_list_do_exp
  111. # direct product graph method - geometric
  112. elif self._compute_method == 'geo':
  113. do_fun = self._wrapper_kernel_list_do_geo
  114. def func_assign(result, var_to_assign):
  115. var_to_assign[result[0]] = result[1]
  116. itr = range(len(g_list))
  117. len_itr = len(g_list)
  118. parallel_me(do_fun, func_assign, kernel_list, itr, len_itr=len_itr,
  119. init_worker=_init_worker_list, glbv=(g1, g_list), method='imap_unordered',
  120. n_jobs=self.n_jobs, itr_desc='Computing kernels', verbose=self.verbose)
  121. return kernel_list
  122. def _wrapper_kernel_list_do_exp(self, itr):
  123. return itr, self._kernel_do_exp(G_g1, G_g_list[itr], self._weight)
  124. def _wrapper_kernel_list_do_geo(self, itr):
  125. return itr, self._kernel_do_geo(G_g1, G_g_list[itr], self._weight)
  126. def _compute_single_kernel_series(self, g1, g2):
  127. self._check_graphs([g1] + [g2])
  128. self._add_dummy_labels([g1] + [g2])
  129. if not self._ds_infos['directed']: # convert
  130. g1 = g1.to_directed()
  131. g2 = g2.to_directed()
  132. # direct product graph method - exponential
  133. if self._compute_method == 'exp':
  134. kernel = self._kernel_do_exp(g1, g2, self._weight)
  135. # direct product graph method - geometric
  136. elif self._compute_method == 'geo':
  137. kernel = self._kernel_do_geo(g1, g2, self._weight)
  138. return kernel
  139. def _kernel_do_exp(self, g1, g2, beta):
  140. """Compute common walk graph kernel between 2 graphs using exponential
  141. series.
  142. Parameters
  143. ----------
  144. g1, g2 : NetworkX graphs
  145. Graphs between which the kernels are computed.
  146. beta : integer
  147. Weight.
  148. Return
  149. ------
  150. kernel : float
  151. The common walk Kernel between 2 graphs.
  152. """
  153. # get tensor product / direct product
  154. gp = direct_product_graph(g1, g2, self._node_labels, self._edge_labels)
  155. # return 0 if the direct product graph have no more than 1 node.
  156. if nx.number_of_nodes(gp) < 2:
  157. return 0
  158. A = nx.adjacency_matrix(gp).todense()
  159. ew, ev = np.linalg.eig(A)
  160. # # remove imaginary part if possible.
  161. # # @todo: don't know if it is necessary.
  162. # for i in range(len(ew)):
  163. # if np.abs(ew[i].imag) < 1e-9:
  164. # ew[i] = ew[i].real
  165. # for i in range(ev.shape[0]):
  166. # for j in range(ev.shape[1]):
  167. # if np.abs(ev[i, j].imag) < 1e-9:
  168. # ev[i, j] = ev[i, j].real
  169. D = np.zeros((len(ew), len(ew)), dtype=complex) # @todo: use complex?
  170. for i in range(len(ew)):
  171. D[i][i] = np.exp(beta * ew[i])
  172. exp_D = ev * D * ev.T
  173. kernel = exp_D.sum()
  174. if (kernel.real == 0 and np.abs(kernel.imag) < 1e-9) or np.abs(kernel.imag / kernel.real) < 1e-9:
  175. kernel = kernel.real
  176. return kernel
  177. def _wrapper_kernel_do_exp(self, itr):
  178. i = itr[0]
  179. j = itr[1]
  180. return i, j, self._kernel_do_exp(G_gn[i], G_gn[j], self._weight)
  181. def _kernel_do_geo(self, g1, g2, gamma):
  182. """Compute common walk graph kernel between 2 graphs using geometric
  183. series.
  184. Parameters
  185. ----------
  186. g1, g2 : NetworkX graphs
  187. Graphs between which the kernels are computed.
  188. gamma : integer
  189. Weight.
  190. Return
  191. ------
  192. kernel : float
  193. The common walk Kernel between 2 graphs.
  194. """
  195. # get tensor product / direct product
  196. gp = direct_product_graph(g1, g2, self._node_labels, self._edge_labels)
  197. # return 0 if the direct product graph have no more than 1 node.
  198. if nx.number_of_nodes(gp) < 2:
  199. return 0
  200. A = nx.adjacency_matrix(gp).todense()
  201. mat = np.identity(len(A)) - gamma * A
  202. # try:
  203. return mat.I.sum()
  204. # except np.linalg.LinAlgError:
  205. # return np.nan
  206. def _wrapper_kernel_do_geo(self, itr):
  207. i = itr[0]
  208. j = itr[1]
  209. return i, j, self._kernel_do_geo(G_gn[i], G_gn[j], self._weight)
  210. def _check_graphs(self, Gn):
  211. for g in Gn:
  212. if nx.number_of_nodes(g) == 1:
  213. raise Exception('Graphs must contain more than 1 nodes to construct adjacency matrices.')
  214. def _add_dummy_labels(self, Gn):
  215. if len(self._node_labels) == 0 or (len(self._node_labels) == 1 and self._node_labels[0] == SpecialLabel.DUMMY):
  216. for i in range(len(Gn)):
  217. nx.set_node_attributes(Gn[i], '0', SpecialLabel.DUMMY)
  218. self._node_labels = [SpecialLabel.DUMMY]
  219. if len(self._edge_labels) == 0 or (len(self._edge_labels) == 1 and self._edge_labels[0] == SpecialLabel.DUMMY):
  220. for i in range(len(Gn)):
  221. nx.set_edge_attributes(Gn[i], '0', SpecialLabel.DUMMY)
  222. self._edge_labels = [SpecialLabel.DUMMY]
  223. def _init_worker_gm(gn_toshare):
  224. global G_gn
  225. G_gn = gn_toshare
  226. def _init_worker_list(g1_toshare, g_list_toshare):
  227. global G_g1, G_g_list
  228. G_g1 = g1_toshare
  229. G_g_list = g_list_toshare

A Python package for graph kernels, graph edit distances and graph pre-image problem.