You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

conjugate_gradient.py 11 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322
  1. #!/usr/bin/env python3
  2. # -*- coding: utf-8 -*-
  3. """
  4. Created on Thu Aug 20 16:09:51 2020
  5. @author: ljia
  6. @references:
  7. [1] S Vichy N Vishwanathan, Nicol N Schraudolph, Risi Kondor, and Karsten M Borgwardt. Graph kernels. Journal of Machine Learning Research, 11(Apr):1201–1242, 2010.
  8. """
  9. import sys
  10. from tqdm import tqdm
  11. import numpy as np
  12. import networkx as nx
  13. from scipy.sparse import identity
  14. from scipy.sparse.linalg import cg
  15. from gklearn.utils.parallel import parallel_gm, parallel_me
  16. from gklearn.kernels import RandomWalkMeta
  17. from gklearn.utils.utils import compute_vertex_kernels
  18. class ConjugateGradient(RandomWalkMeta):
  19. def __init__(self, **kwargs):
  20. super().__init__(**kwargs)
  21. self._node_kernels = kwargs.get('node_kernels', None)
  22. self._edge_kernels = kwargs.get('edge_kernels', None)
  23. self._node_labels = kwargs.get('node_labels', [])
  24. self._edge_labels = kwargs.get('edge_labels', [])
  25. self._node_attrs = kwargs.get('node_attrs', [])
  26. self._edge_attrs = kwargs.get('edge_attrs', [])
  27. def _compute_gm_series(self):
  28. self._check_edge_weight(self._graphs, self._verbose)
  29. self._check_graphs(self._graphs)
  30. lmda = self._weight
  31. # Compute Gram matrix.
  32. gram_matrix = np.zeros((len(self._graphs), len(self._graphs)))
  33. # Reindex nodes using consecutive integers for the convenience of kernel computation.
  34. if self._verbose >= 2:
  35. iterator = tqdm(self._graphs, desc='Reindex vertices', file=sys.stdout)
  36. else:
  37. iterator = self._graphs
  38. self._graphs = [nx.convert_node_labels_to_integers(g, first_label=0, label_attribute='label_orignal') for g in iterator]
  39. if self._p is None and self._q is None: # p and q are uniform distributions as default.
  40. from itertools import combinations_with_replacement
  41. itr = combinations_with_replacement(range(0, len(self._graphs)), 2)
  42. if self._verbose >= 2:
  43. iterator = tqdm(itr, desc='Computing kernels', file=sys.stdout)
  44. else:
  45. iterator = itr
  46. for i, j in iterator:
  47. kernel = self._kernel_do(self._graphs[i], self._graphs[j], lmda)
  48. gram_matrix[i][j] = kernel
  49. gram_matrix[j][i] = kernel
  50. else: # @todo
  51. pass
  52. return gram_matrix
  53. def _compute_gm_imap_unordered(self):
  54. self._check_edge_weight(self._graphs, self._verbose)
  55. self._check_graphs(self._graphs)
  56. # Compute Gram matrix.
  57. gram_matrix = np.zeros((len(self._graphs), len(self._graphs)))
  58. # @todo: parallel this.
  59. # Reindex nodes using consecutive integers for the convenience of kernel computation.
  60. if self._verbose >= 2:
  61. iterator = tqdm(self._graphs, desc='Reindex vertices', file=sys.stdout)
  62. else:
  63. iterator = self._graphs
  64. self._graphs = [nx.convert_node_labels_to_integers(g, first_label=0, label_attribute='label_orignal') for g in iterator]
  65. if self._p is None and self._q is None: # p and q are uniform distributions as default.
  66. def init_worker(gn_toshare):
  67. global G_gn
  68. G_gn = gn_toshare
  69. do_fun = self._wrapper_kernel_do
  70. parallel_gm(do_fun, gram_matrix, self._graphs, init_worker=init_worker,
  71. glbv=(self._graphs,), n_jobs=self._n_jobs, verbose=self._verbose)
  72. else: # @todo
  73. pass
  74. return gram_matrix
  75. def _compute_kernel_list_series(self, g1, g_list):
  76. self._check_edge_weight(g_list + [g1], self._verbose)
  77. self._check_graphs(g_list + [g1])
  78. lmda = self._weight
  79. # compute kernel list.
  80. kernel_list = [None] * len(g_list)
  81. # Reindex nodes using consecutive integers for the convenience of kernel computation.
  82. g1 = nx.convert_node_labels_to_integers(g1, first_label=0, label_attribute='label_orignal')
  83. if self._verbose >= 2:
  84. iterator = tqdm(g_list, desc='Reindex vertices', file=sys.stdout)
  85. else:
  86. iterator = g_list
  87. g_list = [nx.convert_node_labels_to_integers(g, first_label=0, label_attribute='label_orignal') for g in iterator]
  88. if self._p is None and self._q is None: # p and q are uniform distributions as default.
  89. if self._verbose >= 2:
  90. iterator = tqdm(range(len(g_list)), desc='Computing kernels', file=sys.stdout)
  91. else:
  92. iterator = range(len(g_list))
  93. for i in iterator:
  94. kernel = self._kernel_do(g1, g_list[i], lmda)
  95. kernel_list[i] = kernel
  96. else: # @todo
  97. pass
  98. return kernel_list
  99. def _compute_kernel_list_imap_unordered(self, g1, g_list):
  100. self._check_edge_weight(g_list + [g1], self._verbose)
  101. self._check_graphs(g_list + [g1])
  102. # compute kernel list.
  103. kernel_list = [None] * len(g_list)
  104. # Reindex nodes using consecutive integers for the convenience of kernel computation.
  105. g1 = nx.convert_node_labels_to_integers(g1, first_label=0, label_attribute='label_orignal')
  106. # @todo: parallel this.
  107. if self._verbose >= 2:
  108. iterator = tqdm(g_list, desc='Reindex vertices', file=sys.stdout)
  109. else:
  110. iterator = g_list
  111. g_list = [nx.convert_node_labels_to_integers(g, first_label=0, label_attribute='label_orignal') for g in iterator]
  112. if self._p is None and self._q is None: # p and q are uniform distributions as default.
  113. def init_worker(g1_toshare, g_list_toshare):
  114. global G_g1, G_g_list
  115. G_g1 = g1_toshare
  116. G_g_list = g_list_toshare
  117. do_fun = self._wrapper_kernel_list_do
  118. def func_assign(result, var_to_assign):
  119. var_to_assign[result[0]] = result[1]
  120. itr = range(len(g_list))
  121. len_itr = len(g_list)
  122. parallel_me(do_fun, func_assign, kernel_list, itr, len_itr=len_itr,
  123. init_worker=init_worker, glbv=(g1, g_list), method='imap_unordered',
  124. n_jobs=self._n_jobs, itr_desc='Computing kernels', verbose=self._verbose)
  125. else: # @todo
  126. pass
  127. return kernel_list
  128. def _wrapper_kernel_list_do(self, itr):
  129. return itr, self._kernel_do(G_g1, G_g_list[itr], self._weight)
  130. def _compute_single_kernel_series(self, g1, g2):
  131. self._check_edge_weight([g1] + [g2], self._verbose)
  132. self._check_graphs([g1] + [g2])
  133. lmda = self._weight
  134. # Reindex nodes using consecutive integers for the convenience of kernel computation.
  135. g1 = nx.convert_node_labels_to_integers(g1, first_label=0, label_attribute='label_orignal')
  136. g2 = nx.convert_node_labels_to_integers(g2, first_label=0, label_attribute='label_orignal')
  137. if self._p is None and self._q is None: # p and q are uniform distributions as default.
  138. kernel = self._kernel_do(g1, g2, lmda)
  139. else: # @todo
  140. pass
  141. return kernel
  142. def _kernel_do(self, g1, g2, lmda):
  143. # Frist, compute kernels between all pairs of nodes using the method borrowed
  144. # from FCSP. It is faster than directly computing all edge kernels
  145. # when $d_1d_2>2$, where $d_1$ and $d_2$ are vertex degrees of the
  146. # graphs compared, which is the most case we went though. For very
  147. # sparse graphs, this would be slow.
  148. vk_dict = self._compute_vertex_kernels(g1, g2)
  149. # Compute the weight matrix of the direct product graph.
  150. w_times, w_dim = self._compute_weight_matrix(g1, g2, vk_dict)
  151. # use uniform distribution if there is no prior knowledge.
  152. p_times_uni = 1 / w_dim
  153. A = identity(w_times.shape[0]) - w_times * lmda
  154. b = np.full((w_dim, 1), p_times_uni)
  155. x, _ = cg(A, b)
  156. # use uniform distribution if there is no prior knowledge.
  157. q_times = np.full((1, w_dim), p_times_uni)
  158. return np.dot(q_times, x)
  159. def _wrapper_kernel_do(self, itr):
  160. i = itr[0]
  161. j = itr[1]
  162. return i, j, self._kernel_do(G_gn[i], G_gn[j], self._weight)
  163. def _func_fp(x, p_times, lmda, w_times):
  164. haha = w_times * x
  165. haha = lmda * haha
  166. haha = p_times + haha
  167. return p_times + lmda * np.dot(w_times, x)
  168. def _compute_vertex_kernels(self, g1, g2):
  169. """Compute vertex kernels between vertices of two graphs.
  170. """
  171. return compute_vertex_kernels(g1, g2, self._node_kernels, node_labels=self._node_labels, node_attrs=self._node_attrs)
  172. # @todo: move if out to make it faster.
  173. # @todo: node/edge kernels use direct function rather than dicts.
  174. def _compute_weight_matrix(self, g1, g2, vk_dict):
  175. """Compute the weight matrix of the direct product graph.
  176. """
  177. # Define edge kernels.
  178. def compute_ek_11(e1, e2, ke):
  179. e1_labels = [e1[2][el] for el in self._edge_labels]
  180. e2_labels = [e2[2][el] for el in self._edge_labels]
  181. e1_attrs = [e1[2][ea] for ea in self._edge_attrs]
  182. e2_attrs = [e2[2][ea] for ea in self._edge_attrs]
  183. return ke(e1_labels, e2_labels, e1_attrs, e2_attrs)
  184. def compute_ek_10(e1, e2, ke):
  185. e1_labels = [e1[2][el] for el in self._edge_labels]
  186. e2_labels = [e2[2][el] for el in self._edge_labels]
  187. return ke(e1_labels, e2_labels)
  188. def compute_ek_01(e1, e2, ke):
  189. e1_attrs = [e1[2][ea] for ea in self._edge_attrs]
  190. e2_attrs = [e2[2][ea] for ea in self._edge_attrs]
  191. return ke(e1_attrs, e2_attrs)
  192. def compute_ek_00(e1, e2, ke):
  193. return 1
  194. # Select the proper edge kernel.
  195. if len(self._edge_labels) > 0:
  196. # edge symb and non-synb labeled
  197. if len(self._edge_attrs) > 0:
  198. ke = self._edge_kernels['mix']
  199. ek_temp = compute_ek_11
  200. # edge symb labeled
  201. else:
  202. ke = self._edge_kernels['symb']
  203. ek_temp = compute_ek_10
  204. else:
  205. # edge non-synb labeled
  206. if len(self._edge_attrs) > 0:
  207. ke = self._edge_kernels['nsymb']
  208. ek_temp = compute_ek_01
  209. # edge unlabeled
  210. else:
  211. ke = None
  212. ek_temp = compute_ek_00 # @todo: check how much slower is this.
  213. # Compute the weight matrix.
  214. w_dim = nx.number_of_nodes(g1) * nx.number_of_nodes(g2)
  215. w_times = np.zeros((w_dim, w_dim))
  216. if vk_dict: # node labeled
  217. if self._ds_infos['directed']:
  218. for e1 in g1.edges(data=True):
  219. for e2 in g2.edges(data=True):
  220. w_idx = (e1[0] * nx.number_of_nodes(g2) + e2[0], e1[1] * nx.number_of_nodes(g2) + e2[1])
  221. w_times[w_idx] = vk_dict[(e1[0], e2[0])] * ek_temp(e1, e2, ke) * vk_dict[(e1[1], e2[1])]
  222. else: # undirected
  223. for e1 in g1.edges(data=True):
  224. for e2 in g2.edges(data=True):
  225. w_idx = (e1[0] * nx.number_of_nodes(g2) + e2[0], e1[1] * nx.number_of_nodes(g2) + e2[1])
  226. w_times[w_idx] = vk_dict[(e1[0], e2[0])] * ek_temp(e1, e2, ke) * vk_dict[(e1[1], e2[1])] + vk_dict[(e1[0], e2[1])] * ek_temp(e1, e2, ke) * vk_dict[(e1[1], e2[0])]
  227. w_times[w_idx[1], w_idx[0]] = w_times[w_idx[0], w_idx[1]]
  228. w_idx2 = (e1[0] * nx.number_of_nodes(g2) + e2[1], e1[1] * nx.number_of_nodes(g2) + e2[0])
  229. w_times[w_idx2[0], w_idx2[1]] = w_times[w_idx[0], w_idx[1]]
  230. w_times[w_idx2[1], w_idx2[0]] = w_times[w_idx[0], w_idx[1]]
  231. else: # node unlabeled
  232. if self._ds_infos['directed']:
  233. for e1 in g1.edges(data=True):
  234. for e2 in g2.edges(data=True):
  235. w_idx = (e1[0] * nx.number_of_nodes(g2) + e2[0], e1[1] * nx.number_of_nodes(g2) + e2[1])
  236. w_times[w_idx] = ek_temp(e1, e2, ke)
  237. else: # undirected
  238. for e1 in g1.edges(data=True):
  239. for e2 in g2.edges(data=True):
  240. w_idx = (e1[0] * nx.number_of_nodes(g2) + e2[0], e1[1] * nx.number_of_nodes(g2) + e2[1])
  241. w_times[w_idx] = ek_temp(e1, e2, ke)
  242. w_times[w_idx[1], w_idx[0]] = w_times[w_idx[0], w_idx[1]]
  243. w_idx2 = (e1[0] * nx.number_of_nodes(g2) + e2[1], e1[1] * nx.number_of_nodes(g2) + e2[0])
  244. w_times[w_idx2[0], w_idx2[1]] = w_times[w_idx[0], w_idx[1]]
  245. w_times[w_idx2[1], w_idx2[0]] = w_times[w_idx[0], w_idx[1]]
  246. return w_times, w_dim

A Python package for graph kernels, graph edit distances and graph pre-image problem.