You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

spectral_decomposition.py 9.7 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283
  1. #!/usr/bin/env python3
  2. # -*- coding: utf-8 -*-
  3. """
  4. Created on Thu Aug 20 16:12:45 2020
  5. @author: ljia
  6. @references:
  7. [1] S Vichy N Vishwanathan, Nicol N Schraudolph, Risi Kondor, and Karsten M Borgwardt. Graph kernels. Journal of Machine Learning Research, 11(Apr):1201–1242, 2010.
  8. """
  9. import sys
  10. from tqdm import tqdm
  11. import numpy as np
  12. import networkx as nx
  13. from scipy.sparse import kron
  14. from gklearn.utils.parallel import parallel_gm, parallel_me
  15. from gklearn.kernels import RandomWalkMeta
  16. class SpectralDecomposition(RandomWalkMeta):
  17. def __init__(self, **kwargs):
  18. super().__init__(**kwargs)
  19. self._sub_kernel = kwargs.get('sub_kernel', None)
  20. def _compute_gm_series(self):
  21. self._check_edge_weight(self._graphs, self._verbose)
  22. self._check_graphs(self._graphs)
  23. if self._verbose >= 2:
  24. import warnings
  25. warnings.warn('All labels are ignored. Only works for undirected graphs.')
  26. # compute Gram matrix.
  27. gram_matrix = np.zeros((len(self._graphs), len(self._graphs)))
  28. if self._q is None:
  29. # precompute the spectral decomposition of each graph.
  30. P_list = []
  31. D_list = []
  32. if self._verbose >= 2:
  33. iterator = tqdm(self._graphs, desc='spectral decompose', file=sys.stdout)
  34. else:
  35. iterator = self._graphs
  36. for G in iterator:
  37. # don't normalize adjacency matrices if q is a uniform vector. Note
  38. # A actually is the transpose of the adjacency matrix.
  39. A = nx.adjacency_matrix(G, self._edge_weight).todense().transpose()
  40. ew, ev = np.linalg.eig(A)
  41. D_list.append(ew)
  42. P_list.append(ev)
  43. # P_inv_list = [p.T for p in P_list] # @todo: also works for directed graphs?
  44. if self._p is None: # p is uniform distribution as default.
  45. q_T_list = [np.full((1, nx.number_of_nodes(G)), 1 / nx.number_of_nodes(G)) for G in self._graphs]
  46. # q_T_list = [q.T for q in q_list]
  47. from itertools import combinations_with_replacement
  48. itr = combinations_with_replacement(range(0, len(self._graphs)), 2)
  49. if self._verbose >= 2:
  50. iterator = tqdm(itr, desc='Computing kernels', file=sys.stdout)
  51. else:
  52. iterator = itr
  53. for i, j in iterator:
  54. kernel = self._kernel_do(q_T_list[i], q_T_list[j], P_list[i], P_list[j], D_list[i], D_list[j], self._weight, self._sub_kernel)
  55. gram_matrix[i][j] = kernel
  56. gram_matrix[j][i] = kernel
  57. else: # @todo
  58. pass
  59. else: # @todo
  60. pass
  61. return gram_matrix
  62. def _compute_gm_imap_unordered(self):
  63. self._check_edge_weight(self._graphs, self._verbose)
  64. self._check_graphs(self._graphs)
  65. if self._verbose >= 2:
  66. import warnings
  67. warnings.warn('All labels are ignored. Only works for undirected graphs.')
  68. # compute Gram matrix.
  69. gram_matrix = np.zeros((len(self._graphs), len(self._graphs)))
  70. if self._q is None:
  71. # precompute the spectral decomposition of each graph.
  72. P_list = []
  73. D_list = []
  74. if self._verbose >= 2:
  75. iterator = tqdm(self._graphs, desc='spectral decompose', file=sys.stdout)
  76. else:
  77. iterator = self._graphs
  78. for G in iterator:
  79. # don't normalize adjacency matrices if q is a uniform vector. Note
  80. # A actually is the transpose of the adjacency matrix.
  81. A = nx.adjacency_matrix(G, self._edge_weight).todense().transpose()
  82. ew, ev = np.linalg.eig(A)
  83. D_list.append(ew)
  84. P_list.append(ev) # @todo: parallel?
  85. if self._p is None: # p is uniform distribution as default.
  86. q_T_list = [np.full((1, nx.number_of_nodes(G)), 1 / nx.number_of_nodes(G)) for G in self._graphs] # @todo: parallel?
  87. def init_worker(q_T_list_toshare, P_list_toshare, D_list_toshare):
  88. global G_q_T_list, G_P_list, G_D_list
  89. G_q_T_list = q_T_list_toshare
  90. G_P_list = P_list_toshare
  91. G_D_list = D_list_toshare
  92. do_fun = self._wrapper_kernel_do
  93. parallel_gm(do_fun, gram_matrix, self._graphs, init_worker=init_worker,
  94. glbv=(q_T_list, P_list, D_list), n_jobs=self._n_jobs, verbose=self._verbose)
  95. else: # @todo
  96. pass
  97. else: # @todo
  98. pass
  99. return gram_matrix
  100. def _compute_kernel_list_series(self, g1, g_list):
  101. self._check_edge_weight(g_list + [g1], self._verbose)
  102. self._check_graphs(g_list + [g1])
  103. if self._verbose >= 2:
  104. import warnings
  105. warnings.warn('All labels are ignored. Only works for undirected graphs.')
  106. # compute kernel list.
  107. kernel_list = [None] * len(g_list)
  108. if self._q is None:
  109. # precompute the spectral decomposition of each graph.
  110. A1 = nx.adjacency_matrix(g1, self._edge_weight).todense().transpose()
  111. D1, P1 = np.linalg.eig(A1)
  112. P_list = []
  113. D_list = []
  114. if self._verbose >= 2:
  115. iterator = tqdm(g_list, desc='spectral decompose', file=sys.stdout)
  116. else:
  117. iterator = g_list
  118. for G in iterator:
  119. # don't normalize adjacency matrices if q is a uniform vector. Note
  120. # A actually is the transpose of the adjacency matrix.
  121. A = nx.adjacency_matrix(G, self._edge_weight).todense().transpose()
  122. ew, ev = np.linalg.eig(A)
  123. D_list.append(ew)
  124. P_list.append(ev)
  125. if self._p is None: # p is uniform distribution as default.
  126. q_T1 = 1 / nx.number_of_nodes(g1)
  127. q_T_list = [np.full((1, nx.number_of_nodes(G)), 1 / nx.number_of_nodes(G)) for G in g_list]
  128. if self._verbose >= 2:
  129. iterator = tqdm(range(len(g_list)), desc='Computing kernels', file=sys.stdout)
  130. else:
  131. iterator = range(len(g_list))
  132. for i in iterator:
  133. kernel = self._kernel_do(q_T1, q_T_list[i], P1, P_list[i], D1, D_list[i], self._weight, self._sub_kernel)
  134. kernel_list[i] = kernel
  135. else: # @todo
  136. pass
  137. else: # @todo
  138. pass
  139. return kernel_list
  140. def _compute_kernel_list_imap_unordered(self, g1, g_list):
  141. self._check_edge_weight(g_list + [g1], self._verbose)
  142. self._check_graphs(g_list + [g1])
  143. if self._verbose >= 2:
  144. import warnings
  145. warnings.warn('All labels are ignored. Only works for undirected graphs.')
  146. # compute kernel list.
  147. kernel_list = [None] * len(g_list)
  148. if self._q is None:
  149. # precompute the spectral decomposition of each graph.
  150. A1 = nx.adjacency_matrix(g1, self._edge_weight).todense().transpose()
  151. D1, P1 = np.linalg.eig(A1)
  152. P_list = []
  153. D_list = []
  154. if self._verbose >= 2:
  155. iterator = tqdm(g_list, desc='spectral decompose', file=sys.stdout)
  156. else:
  157. iterator = g_list
  158. for G in iterator:
  159. # don't normalize adjacency matrices if q is a uniform vector. Note
  160. # A actually is the transpose of the adjacency matrix.
  161. A = nx.adjacency_matrix(G, self._edge_weight).todense().transpose()
  162. ew, ev = np.linalg.eig(A)
  163. D_list.append(ew)
  164. P_list.append(ev) # @todo: parallel?
  165. if self._p is None: # p is uniform distribution as default.
  166. q_T1 = 1 / nx.number_of_nodes(g1)
  167. q_T_list = [np.full((1, nx.number_of_nodes(G)), 1 / nx.number_of_nodes(G)) for G in g_list] # @todo: parallel?
  168. def init_worker(q_T1_toshare, P1_toshare, D1_toshare, q_T_list_toshare, P_list_toshare, D_list_toshare):
  169. global G_q_T1, G_P1, G_D1, G_q_T_list, G_P_list, G_D_list
  170. G_q_T1 = q_T1_toshare
  171. G_P1 = P1_toshare
  172. G_D1 = D1_toshare
  173. G_q_T_list = q_T_list_toshare
  174. G_P_list = P_list_toshare
  175. G_D_list = D_list_toshare
  176. do_fun = self._wrapper_kernel_list_do
  177. def func_assign(result, var_to_assign):
  178. var_to_assign[result[0]] = result[1]
  179. itr = range(len(g_list))
  180. len_itr = len(g_list)
  181. parallel_me(do_fun, func_assign, kernel_list, itr, len_itr=len_itr,
  182. init_worker=init_worker, glbv=(q_T1, P1, D1, q_T_list, P_list, D_list), method='imap_unordered', n_jobs=self._n_jobs, itr_desc='Computing kernels', verbose=self._verbose)
  183. else: # @todo
  184. pass
  185. else: # @todo
  186. pass
  187. return kernel_list
  188. def _wrapper_kernel_list_do(self, itr):
  189. return itr, self._kernel_do(G_q_T1, G_q_T_list[itr], G_P1, G_P_list[itr], G_D1, G_D_list[itr], self._weight, self._sub_kernel)
  190. def _compute_single_kernel_series(self, g1, g2):
  191. self._check_edge_weight([g1] + [g2], self._verbose)
  192. self._check_graphs([g1] + [g2])
  193. if self._verbose >= 2:
  194. import warnings
  195. warnings.warn('All labels are ignored. Only works for undirected graphs.')
  196. if self._q is None:
  197. # precompute the spectral decomposition of each graph.
  198. A1 = nx.adjacency_matrix(g1, self._edge_weight).todense().transpose()
  199. D1, P1 = np.linalg.eig(A1)
  200. A2 = nx.adjacency_matrix(g2, self._edge_weight).todense().transpose()
  201. D2, P2 = np.linalg.eig(A2)
  202. if self._p is None: # p is uniform distribution as default.
  203. q_T1 = 1 / nx.number_of_nodes(g1)
  204. q_T2 = 1 / nx.number_of_nodes(g2)
  205. kernel = self._kernel_do(q_T1, q_T2, P1, P2, D1, D2, self._weight, self._sub_kernel)
  206. else: # @todo
  207. pass
  208. else: # @todo
  209. pass
  210. return kernel
  211. def _kernel_do(self, q_T1, q_T2, P1, P2, D1, D2, weight, sub_kernel):
  212. # use uniform distribution if there is no prior knowledge.
  213. kl = kron(np.dot(q_T1, P1), np.dot(q_T2, P2)).todense()
  214. # @todo: this is not needed when p = q (kr = kl.T) for undirected graphs.
  215. # kr = kron(np.dot(P_inv_list[i], q_list[i]), np.dot(P_inv_list[j], q_list[j])).todense()
  216. if sub_kernel == 'exp':
  217. D_diag = np.array([d1 * d2 for d1 in D1 for d2 in D2])
  218. kmiddle = np.diag(np.exp(weight * D_diag))
  219. elif sub_kernel == 'geo':
  220. D_diag = np.array([d1 * d2 for d1 in D1 for d2 in D2])
  221. kmiddle = np.diag(weight * D_diag)
  222. kmiddle = np.identity(len(kmiddle)) - weight * kmiddle
  223. kmiddle = np.linalg.inv(kmiddle)
  224. return np.dot(np.dot(kl, kmiddle), kl.T)[0, 0]
  225. def _wrapper_kernel_do(self, itr):
  226. i = itr[0]
  227. j = itr[1]
  228. return i, j, self._kernel_do(G_q_T_list[i], G_q_T_list[j], G_P_list[i], G_P_list[j], G_D_list[i], G_D_list[j], self._weight, self._sub_kernel)

A Python package for graph kernels, graph edit distances and graph pre-image problem.