You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

conjugate_gradient.py 11 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304
  1. #!/usr/bin/env python3
  2. # -*- coding: utf-8 -*-
  3. """
  4. Created on Thu Aug 20 16:09:51 2020
  5. @author: ljia
  6. @references:
  7. [1] S Vichy N Vishwanathan, Nicol N Schraudolph, Risi Kondor, and Karsten M Borgwardt. Graph kernels. Journal of Machine Learning Research, 11(Apr):1201–1242, 2010.
  8. """
  9. import sys
  10. from gklearn.utils import get_iters
  11. import numpy as np
  12. import networkx as nx
  13. from scipy.sparse import identity
  14. from scipy.sparse.linalg import cg
  15. from gklearn.utils.parallel import parallel_gm, parallel_me
  16. from gklearn.kernels import RandomWalkMeta
  17. from gklearn.utils.utils import compute_vertex_kernels
  18. class ConjugateGradient(RandomWalkMeta):
  19. def __init__(self, **kwargs):
  20. super().__init__(**kwargs)
  21. self._node_kernels = kwargs.get('node_kernels', None)
  22. self._edge_kernels = kwargs.get('edge_kernels', None)
  23. self._node_labels = kwargs.get('node_labels', [])
  24. self._edge_labels = kwargs.get('edge_labels', [])
  25. self._node_attrs = kwargs.get('node_attrs', [])
  26. self._edge_attrs = kwargs.get('edge_attrs', [])
  27. def _compute_gm_series(self):
  28. self._check_edge_weight(self._graphs, self._verbose)
  29. self._check_graphs(self._graphs)
  30. lmda = self._weight
  31. # Compute Gram matrix.
  32. gram_matrix = np.zeros((len(self._graphs), len(self._graphs)))
  33. # Reindex nodes using consecutive integers for the convenience of kernel computation.
  34. iterator = get_iters(self._graphs, desc='Reindex vertices', file=sys.stdout, verbose=(self._verbose >= 2))
  35. self._graphs = [nx.convert_node_labels_to_integers(g, first_label=0, label_attribute='label_orignal') for g in iterator]
  36. if self._p is None and self._q is None: # p and q are uniform distributions as default.
  37. from itertools import combinations_with_replacement
  38. itr = combinations_with_replacement(range(0, len(self._graphs)), 2)
  39. len_itr = int(len(self._graphs) * (len(self._graphs) + 1) / 2)
  40. iterator = get_iters(itr, desc='Computing kernels', file=sys.stdout, length=len_itr, verbose=(self._verbose >= 2))
  41. for i, j in iterator:
  42. kernel = self._kernel_do(self._graphs[i], self._graphs[j], lmda)
  43. gram_matrix[i][j] = kernel
  44. gram_matrix[j][i] = kernel
  45. else: # @todo
  46. pass
  47. return gram_matrix
  48. def _compute_gm_imap_unordered(self):
  49. self._check_edge_weight(self._graphs, self._verbose)
  50. self._check_graphs(self._graphs)
  51. # Compute Gram matrix.
  52. gram_matrix = np.zeros((len(self._graphs), len(self._graphs)))
  53. # @todo: parallel this.
  54. # Reindex nodes using consecutive integers for the convenience of kernel computation.
  55. iterator = get_iters(self._graphs, desc='Reindex vertices', file=sys.stdout, verbose=(self._verbose >= 2))
  56. self._graphs = [nx.convert_node_labels_to_integers(g, first_label=0, label_attribute='label_orignal') for g in iterator]
  57. if self._p is None and self._q is None: # p and q are uniform distributions as default.
  58. def init_worker(gn_toshare):
  59. global G_gn
  60. G_gn = gn_toshare
  61. do_fun = self._wrapper_kernel_do
  62. parallel_gm(do_fun, gram_matrix, self._graphs, init_worker=init_worker,
  63. glbv=(self._graphs,), n_jobs=self._n_jobs, verbose=self._verbose)
  64. else: # @todo
  65. pass
  66. return gram_matrix
  67. def _compute_kernel_list_series(self, g1, g_list):
  68. self._check_edge_weight(g_list + [g1], self._verbose)
  69. self._check_graphs(g_list + [g1])
  70. lmda = self._weight
  71. # compute kernel list.
  72. kernel_list = [None] * len(g_list)
  73. # Reindex nodes using consecutive integers for the convenience of kernel computation.
  74. g1 = nx.convert_node_labels_to_integers(g1, first_label=0, label_attribute='label_orignal')
  75. iterator = get_iters(g_list, desc='Reindex vertices', file=sys.stdout, verbose=(self._verbose >= 2))
  76. g_list = [nx.convert_node_labels_to_integers(g, first_label=0, label_attribute='label_orignal') for g in iterator]
  77. if self._p is None and self._q is None: # p and q are uniform distributions as default.
  78. iterator = get_iters(range(len(g_list)), desc='Computing kernels', file=sys.stdout, length=len(g_list), verbose=(self._verbose >= 2))
  79. for i in iterator:
  80. kernel = self._kernel_do(g1, g_list[i], lmda)
  81. kernel_list[i] = kernel
  82. else: # @todo
  83. pass
  84. return kernel_list
  85. def _compute_kernel_list_imap_unordered(self, g1, g_list):
  86. self._check_edge_weight(g_list + [g1], self._verbose)
  87. self._check_graphs(g_list + [g1])
  88. # compute kernel list.
  89. kernel_list = [None] * len(g_list)
  90. # Reindex nodes using consecutive integers for the convenience of kernel computation.
  91. g1 = nx.convert_node_labels_to_integers(g1, first_label=0, label_attribute='label_orignal')
  92. # @todo: parallel this.
  93. iterator = get_iters(g_list, desc='Reindex vertices', file=sys.stdout, verbose=(self._verbose >= 2))
  94. g_list = [nx.convert_node_labels_to_integers(g, first_label=0, label_attribute='label_orignal') for g in iterator]
  95. if self._p is None and self._q is None: # p and q are uniform distributions as default.
  96. def init_worker(g1_toshare, g_list_toshare):
  97. global G_g1, G_g_list
  98. G_g1 = g1_toshare
  99. G_g_list = g_list_toshare
  100. do_fun = self._wrapper_kernel_list_do
  101. def func_assign(result, var_to_assign):
  102. var_to_assign[result[0]] = result[1]
  103. itr = range(len(g_list))
  104. len_itr = len(g_list)
  105. parallel_me(do_fun, func_assign, kernel_list, itr, len_itr=len_itr,
  106. init_worker=init_worker, glbv=(g1, g_list), method='imap_unordered',
  107. n_jobs=self._n_jobs, itr_desc='Computing kernels', verbose=self._verbose)
  108. else: # @todo
  109. pass
  110. return kernel_list
  111. def _wrapper_kernel_list_do(self, itr):
  112. return itr, self._kernel_do(G_g1, G_g_list[itr], self._weight)
  113. def _compute_single_kernel_series(self, g1, g2):
  114. self._check_edge_weight([g1] + [g2], self._verbose)
  115. self._check_graphs([g1] + [g2])
  116. lmda = self._weight
  117. # Reindex nodes using consecutive integers for the convenience of kernel computation.
  118. g1 = nx.convert_node_labels_to_integers(g1, first_label=0, label_attribute='label_orignal')
  119. g2 = nx.convert_node_labels_to_integers(g2, first_label=0, label_attribute='label_orignal')
  120. if self._p is None and self._q is None: # p and q are uniform distributions as default.
  121. kernel = self._kernel_do(g1, g2, lmda)
  122. else: # @todo
  123. pass
  124. return kernel
  125. def _kernel_do(self, g1, g2, lmda):
  126. # Frist, compute kernels between all pairs of nodes using the method borrowed
  127. # from FCSP. It is faster than directly computing all edge kernels
  128. # when $d_1d_2>2$, where $d_1$ and $d_2$ are vertex degrees of the
  129. # graphs compared, which is the most case we went though. For very
  130. # sparse graphs, this would be slow.
  131. vk_dict = self._compute_vertex_kernels(g1, g2)
  132. # Compute the weight matrix of the direct product graph.
  133. w_times, w_dim = self._compute_weight_matrix(g1, g2, vk_dict)
  134. # use uniform distribution if there is no prior knowledge.
  135. p_times_uni = 1 / w_dim
  136. A = identity(w_times.shape[0]) - w_times * lmda
  137. b = np.full((w_dim, 1), p_times_uni)
  138. x, _ = cg(A, b)
  139. # use uniform distribution if there is no prior knowledge.
  140. q_times = np.full((1, w_dim), p_times_uni)
  141. return np.dot(q_times, x)
  142. def _wrapper_kernel_do(self, itr):
  143. i = itr[0]
  144. j = itr[1]
  145. return i, j, self._kernel_do(G_gn[i], G_gn[j], self._weight)
  146. def _func_fp(x, p_times, lmda, w_times):
  147. haha = w_times * x
  148. haha = lmda * haha
  149. haha = p_times + haha
  150. return p_times + lmda * np.dot(w_times, x)
  151. def _compute_vertex_kernels(self, g1, g2):
  152. """Compute vertex kernels between vertices of two graphs.
  153. """
  154. return compute_vertex_kernels(g1, g2, self._node_kernels, node_labels=self._node_labels, node_attrs=self._node_attrs)
  155. # @todo: move if out to make it faster.
  156. # @todo: node/edge kernels use direct function rather than dicts.
  157. def _compute_weight_matrix(self, g1, g2, vk_dict):
  158. """Compute the weight matrix of the direct product graph.
  159. """
  160. # Define edge kernels.
  161. def compute_ek_11(e1, e2, ke):
  162. e1_labels = [e1[2][el] for el in self._edge_labels]
  163. e2_labels = [e2[2][el] for el in self._edge_labels]
  164. e1_attrs = [e1[2][ea] for ea in self._edge_attrs]
  165. e2_attrs = [e2[2][ea] for ea in self._edge_attrs]
  166. return ke(e1_labels, e2_labels, e1_attrs, e2_attrs)
  167. def compute_ek_10(e1, e2, ke):
  168. e1_labels = [e1[2][el] for el in self._edge_labels]
  169. e2_labels = [e2[2][el] for el in self._edge_labels]
  170. return ke(e1_labels, e2_labels)
  171. def compute_ek_01(e1, e2, ke):
  172. e1_attrs = [e1[2][ea] for ea in self._edge_attrs]
  173. e2_attrs = [e2[2][ea] for ea in self._edge_attrs]
  174. return ke(e1_attrs, e2_attrs)
  175. def compute_ek_00(e1, e2, ke):
  176. return 1
  177. # Select the proper edge kernel.
  178. if len(self._edge_labels) > 0:
  179. # edge symb and non-synb labeled
  180. if len(self._edge_attrs) > 0:
  181. ke = self._edge_kernels['mix']
  182. ek_temp = compute_ek_11
  183. # edge symb labeled
  184. else:
  185. ke = self._edge_kernels['symb']
  186. ek_temp = compute_ek_10
  187. else:
  188. # edge non-synb labeled
  189. if len(self._edge_attrs) > 0:
  190. ke = self._edge_kernels['nsymb']
  191. ek_temp = compute_ek_01
  192. # edge unlabeled
  193. else:
  194. ke = None
  195. ek_temp = compute_ek_00 # @todo: check how much slower is this.
  196. # Compute the weight matrix.
  197. w_dim = nx.number_of_nodes(g1) * nx.number_of_nodes(g2)
  198. w_times = np.zeros((w_dim, w_dim))
  199. if vk_dict: # node labeled
  200. if self._ds_infos['directed']:
  201. for e1 in g1.edges(data=True):
  202. for e2 in g2.edges(data=True):
  203. w_idx = (e1[0] * nx.number_of_nodes(g2) + e2[0], e1[1] * nx.number_of_nodes(g2) + e2[1])
  204. w_times[w_idx] = vk_dict[(e1[0], e2[0])] * ek_temp(e1, e2, ke) * vk_dict[(e1[1], e2[1])]
  205. else: # undirected
  206. for e1 in g1.edges(data=True):
  207. for e2 in g2.edges(data=True):
  208. w_idx = (e1[0] * nx.number_of_nodes(g2) + e2[0], e1[1] * nx.number_of_nodes(g2) + e2[1])
  209. w_times[w_idx] = vk_dict[(e1[0], e2[0])] * ek_temp(e1, e2, ke) * vk_dict[(e1[1], e2[1])] + vk_dict[(e1[0], e2[1])] * ek_temp(e1, e2, ke) * vk_dict[(e1[1], e2[0])]
  210. w_times[w_idx[1], w_idx[0]] = w_times[w_idx[0], w_idx[1]]
  211. w_idx2 = (e1[0] * nx.number_of_nodes(g2) + e2[1], e1[1] * nx.number_of_nodes(g2) + e2[0])
  212. w_times[w_idx2[0], w_idx2[1]] = w_times[w_idx[0], w_idx[1]]
  213. w_times[w_idx2[1], w_idx2[0]] = w_times[w_idx[0], w_idx[1]]
  214. else: # node unlabeled
  215. if self._ds_infos['directed']:
  216. for e1 in g1.edges(data=True):
  217. for e2 in g2.edges(data=True):
  218. w_idx = (e1[0] * nx.number_of_nodes(g2) + e2[0], e1[1] * nx.number_of_nodes(g2) + e2[1])
  219. w_times[w_idx] = ek_temp(e1, e2, ke)
  220. else: # undirected
  221. for e1 in g1.edges(data=True):
  222. for e2 in g2.edges(data=True):
  223. w_idx = (e1[0] * nx.number_of_nodes(g2) + e2[0], e1[1] * nx.number_of_nodes(g2) + e2[1])
  224. w_times[w_idx] = ek_temp(e1, e2, ke)
  225. w_times[w_idx[1], w_idx[0]] = w_times[w_idx[0], w_idx[1]]
  226. w_idx2 = (e1[0] * nx.number_of_nodes(g2) + e2[1], e1[1] * nx.number_of_nodes(g2) + e2[0])
  227. w_times[w_idx2[0], w_idx2[1]] = w_times[w_idx[0], w_idx[1]]
  228. w_times[w_idx2[1], w_idx2[0]] = w_times[w_idx[0], w_idx[1]]
  229. return w_times, w_dim

A Python package for graph kernels, graph edit distances and graph pre-image problem.