You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

fixed_point.py 11 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304
  1. #!/usr/bin/env python3
  2. # -*- coding: utf-8 -*-
  3. """
  4. Created on Thu Aug 20 16:09:51 2020
  5. @author: ljia
  6. @references:
  7. [1] S Vichy N Vishwanathan, Nicol N Schraudolph, Risi Kondor, and Karsten M Borgwardt. Graph kernels. Journal of Machine Learning Research, 11(Apr):1201–1242, 2010.
  8. """
  9. import sys
  10. from gklearn.utils import get_iters
  11. import numpy as np
  12. import networkx as nx
  13. from scipy import optimize
  14. from gklearn.utils.parallel import parallel_gm, parallel_me
  15. from gklearn.kernels import RandomWalkMeta
  16. from gklearn.utils.utils import compute_vertex_kernels
  17. class FixedPoint(RandomWalkMeta):
  18. def __init__(self, **kwargs):
  19. super().__init__(**kwargs)
  20. self._node_kernels = kwargs.get('node_kernels', None)
  21. self._edge_kernels = kwargs.get('edge_kernels', None)
  22. self._node_labels = kwargs.get('node_labels', [])
  23. self._edge_labels = kwargs.get('edge_labels', [])
  24. self._node_attrs = kwargs.get('node_attrs', [])
  25. self._edge_attrs = kwargs.get('edge_attrs', [])
  26. def _compute_gm_series(self):
  27. self._check_edge_weight(self._graphs, self.verbose)
  28. self._check_graphs(self._graphs)
  29. lmda = self._weight
  30. # Compute Gram matrix.
  31. gram_matrix = np.zeros((len(self._graphs), len(self._graphs)))
  32. # Reindex nodes using consecutive integers for the convenience of kernel computation.
  33. iterator = get_iters(self._graphs, desc='Reindex vertices', file=sys.stdout,verbose=(self.verbose >= 2))
  34. self._graphs = [nx.convert_node_labels_to_integers(g, first_label=0, label_attribute='label_orignal') for g in iterator]
  35. if self._p is None and self._q is None: # p and q are uniform distributions as default.
  36. from itertools import combinations_with_replacement
  37. itr = combinations_with_replacement(range(0, len(self._graphs)), 2)
  38. len_itr = int(len(self._graphs) * (len(self._graphs) + 1) / 2)
  39. iterator = get_iters(itr, desc='Computing kernels', file=sys.stdout, length=len_itr, verbose=(self.verbose >= 2))
  40. for i, j in iterator:
  41. kernel = self._kernel_do(self._graphs[i], self._graphs[j], lmda)
  42. gram_matrix[i][j] = kernel
  43. gram_matrix[j][i] = kernel
  44. else: # @todo
  45. pass
  46. return gram_matrix
  47. def _compute_gm_imap_unordered(self):
  48. self._check_edge_weight(self._graphs, self.verbose)
  49. self._check_graphs(self._graphs)
  50. # Compute Gram matrix.
  51. gram_matrix = np.zeros((len(self._graphs), len(self._graphs)))
  52. # @todo: parallel this.
  53. # Reindex nodes using consecutive integers for the convenience of kernel computation.
  54. iterator = get_iters(self._graphs, desc='Reindex vertices', file=sys.stdout, verbose=(self.verbose >= 2))
  55. self._graphs = [nx.convert_node_labels_to_integers(g, first_label=0, label_attribute='label_orignal') for g in iterator]
  56. if self._p is None and self._q is None: # p and q are uniform distributions as default.
  57. def init_worker(gn_toshare):
  58. global G_gn
  59. G_gn = gn_toshare
  60. do_fun = self._wrapper_kernel_do
  61. parallel_gm(do_fun, gram_matrix, self._graphs, init_worker=init_worker,
  62. glbv=(self._graphs,), n_jobs=self.n_jobs, verbose=self.verbose)
  63. else: # @todo
  64. pass
  65. return gram_matrix
  66. def _compute_kernel_list_series(self, g1, g_list):
  67. self._check_edge_weight(g_list + [g1], self.verbose)
  68. self._check_graphs(g_list + [g1])
  69. lmda = self._weight
  70. # compute kernel list.
  71. kernel_list = [None] * len(g_list)
  72. # Reindex nodes using consecutive integers for the convenience of kernel computation.
  73. g1 = nx.convert_node_labels_to_integers(g1, first_label=0, label_attribute='label_orignal')
  74. iterator = get_iters(g_list, desc='Reindex vertices', file=sys.stdout, verbose=(self.verbose >= 2))
  75. g_list = [nx.convert_node_labels_to_integers(g, first_label=0, label_attribute='label_orignal') for g in iterator]
  76. if self._p is None and self._q is None: # p and q are uniform distributions as default.
  77. iterator = get_iters(range(len(g_list)), desc='Computing kernels', file=sys.stdout, length=len(g_list), verbose=(self.verbose >= 2))
  78. for i in iterator:
  79. kernel = self._kernel_do(g1, g_list[i], lmda)
  80. kernel_list[i] = kernel
  81. else: # @todo
  82. pass
  83. return kernel_list
  84. def _compute_kernel_list_imap_unordered(self, g1, g_list):
  85. self._check_edge_weight(g_list + [g1], self.verbose)
  86. self._check_graphs(g_list + [g1])
  87. # compute kernel list.
  88. kernel_list = [None] * len(g_list)
  89. # Reindex nodes using consecutive integers for the convenience of kernel computation.
  90. g1 = nx.convert_node_labels_to_integers(g1, first_label=0, label_attribute='label_orignal')
  91. # @todo: parallel this.
  92. iterator = get_iters(g_list, desc='Reindex vertices', file=sys.stdout, verbose=(self.verbose >= 2))
  93. g_list = [nx.convert_node_labels_to_integers(g, first_label=0, label_attribute='label_orignal') for g in iterator]
  94. if self._p is None and self._q is None: # p and q are uniform distributions as default.
  95. def init_worker(g1_toshare, g_list_toshare):
  96. global G_g1, G_g_list
  97. G_g1 = g1_toshare
  98. G_g_list = g_list_toshare
  99. do_fun = self._wrapper_kernel_list_do
  100. def func_assign(result, var_to_assign):
  101. var_to_assign[result[0]] = result[1]
  102. itr = range(len(g_list))
  103. len_itr = len(g_list)
  104. parallel_me(do_fun, func_assign, kernel_list, itr, len_itr=len_itr,
  105. init_worker=init_worker, glbv=(g1, g_list), method='imap_unordered',
  106. n_jobs=self.n_jobs, itr_desc='Computing kernels', verbose=self.verbose)
  107. else: # @todo
  108. pass
  109. return kernel_list
  110. def _wrapper_kernel_list_do(self, itr):
  111. return itr, self._kernel_do(G_g1, G_g_list[itr], self._weight)
  112. def _compute_single_kernel_series(self, g1, g2):
  113. self._check_edge_weight([g1] + [g2], self.verbose)
  114. self._check_graphs([g1] + [g2])
  115. lmda = self._weight
  116. # Reindex nodes using consecutive integers for the convenience of kernel computation.
  117. g1 = nx.convert_node_labels_to_integers(g1, first_label=0, label_attribute='label_orignal')
  118. g2 = nx.convert_node_labels_to_integers(g2, first_label=0, label_attribute='label_orignal')
  119. if self._p is None and self._q is None: # p and q are uniform distributions as default.
  120. kernel = self._kernel_do(g1, g2, lmda)
  121. else: # @todo
  122. pass
  123. return kernel
  124. def _kernel_do(self, g1, g2, lmda):
  125. # Frist, compute kernels between all pairs of nodes using the method borrowed
  126. # from FCSP. It is faster than directly computing all edge kernels
  127. # when $d_1d_2>2$, where $d_1$ and $d_2$ are vertex degrees of the
  128. # graphs compared, which is the most case we went though. For very
  129. # sparse graphs, this would be slow.
  130. vk_dict = self._compute_vertex_kernels(g1, g2)
  131. # Compute the weight matrix of the direct product graph.
  132. w_times, w_dim = self._compute_weight_matrix(g1, g2, vk_dict)
  133. # use uniform distribution if there is no prior knowledge.
  134. p_times_uni = 1 / w_dim
  135. p_times = np.full((w_dim, 1), p_times_uni)
  136. x = optimize.fixed_point(self._func_fp, p_times, args=(p_times, lmda, w_times), xtol=1e-06, maxiter=1000)
  137. # use uniform distribution if there is no prior knowledge.
  138. q_times = np.full((1, w_dim), p_times_uni)
  139. return np.dot(q_times, x)
  140. def _wrapper_kernel_do(self, itr):
  141. i = itr[0]
  142. j = itr[1]
  143. return i, j, self._kernel_do(G_gn[i], G_gn[j], self._weight)
  144. def _func_fp(self, x, p_times, lmda, w_times):
  145. haha = w_times * x
  146. haha = lmda * haha
  147. haha = p_times + haha
  148. return p_times + lmda * np.dot(w_times, x)
  149. def _compute_vertex_kernels(self, g1, g2):
  150. """Compute vertex kernels between vertices of two graphs.
  151. """
  152. return compute_vertex_kernels(g1, g2, self._node_kernels, node_labels=self._node_labels, node_attrs=self._node_attrs)
  153. # @todo: move if out to make it faster.
  154. # @todo: node/edge kernels use direct function rather than dicts.
  155. def _compute_weight_matrix(self, g1, g2, vk_dict):
  156. """Compute the weight matrix of the direct product graph.
  157. """
  158. # Define edge kernels.
  159. def compute_ek_11(e1, e2, ke):
  160. e1_labels = [e1[2][el] for el in self._edge_labels]
  161. e2_labels = [e2[2][el] for el in self._edge_labels]
  162. e1_attrs = [e1[2][ea] for ea in self._edge_attrs]
  163. e2_attrs = [e2[2][ea] for ea in self._edge_attrs]
  164. return ke(e1_labels, e2_labels, e1_attrs, e2_attrs)
  165. def compute_ek_10(e1, e2, ke):
  166. e1_labels = [e1[2][el] for el in self._edge_labels]
  167. e2_labels = [e2[2][el] for el in self._edge_labels]
  168. return ke(e1_labels, e2_labels)
  169. def compute_ek_01(e1, e2, ke):
  170. e1_attrs = [e1[2][ea] for ea in self._edge_attrs]
  171. e2_attrs = [e2[2][ea] for ea in self._edge_attrs]
  172. return ke(e1_attrs, e2_attrs)
  173. def compute_ek_00(e1, e2, ke):
  174. return 1
  175. # Select the proper edge kernel.
  176. if len(self._edge_labels) > 0:
  177. # edge symb and non-synb labeled
  178. if len(self._edge_attrs) > 0:
  179. ke = self._edge_kernels['mix']
  180. ek_temp = compute_ek_11
  181. # edge symb labeled
  182. else:
  183. ke = self._edge_kernels['symb']
  184. ek_temp = compute_ek_10
  185. else:
  186. # edge non-synb labeled
  187. if len(self._edge_attrs) > 0:
  188. ke = self._edge_kernels['nsymb']
  189. ek_temp = compute_ek_01
  190. # edge unlabeled
  191. else:
  192. ke = None
  193. ek_temp = compute_ek_00 # @todo: check how much slower is this.
  194. # Compute the weight matrix.
  195. w_dim = nx.number_of_nodes(g1) * nx.number_of_nodes(g2)
  196. w_times = np.zeros((w_dim, w_dim))
  197. if vk_dict: # node labeled
  198. if self._ds_infos['directed']:
  199. for e1 in g1.edges(data=True):
  200. for e2 in g2.edges(data=True):
  201. w_idx = (e1[0] * nx.number_of_nodes(g2) + e2[0], e1[1] * nx.number_of_nodes(g2) + e2[1])
  202. w_times[w_idx] = vk_dict[(e1[0], e2[0])] * ek_temp(e1, e2, ke) * vk_dict[(e1[1], e2[1])]
  203. else: # undirected
  204. for e1 in g1.edges(data=True):
  205. for e2 in g2.edges(data=True):
  206. w_idx = (e1[0] * nx.number_of_nodes(g2) + e2[0], e1[1] * nx.number_of_nodes(g2) + e2[1])
  207. w_times[w_idx] = vk_dict[(e1[0], e2[0])] * ek_temp(e1, e2, ke) * vk_dict[(e1[1], e2[1])] + vk_dict[(e1[0], e2[1])] * ek_temp(e1, e2, ke) * vk_dict[(e1[1], e2[0])]
  208. w_times[w_idx[1], w_idx[0]] = w_times[w_idx[0], w_idx[1]]
  209. w_idx2 = (e1[0] * nx.number_of_nodes(g2) + e2[1], e1[1] * nx.number_of_nodes(g2) + e2[0])
  210. w_times[w_idx2[0], w_idx2[1]] = w_times[w_idx[0], w_idx[1]]
  211. w_times[w_idx2[1], w_idx2[0]] = w_times[w_idx[0], w_idx[1]]
  212. else: # node unlabeled
  213. if self._ds_infos['directed']:
  214. for e1 in g1.edges(data=True):
  215. for e2 in g2.edges(data=True):
  216. w_idx = (e1[0] * nx.number_of_nodes(g2) + e2[0], e1[1] * nx.number_of_nodes(g2) + e2[1])
  217. w_times[w_idx] = ek_temp(e1, e2, ke)
  218. else: # undirected
  219. for e1 in g1.edges(data=True):
  220. for e2 in g2.edges(data=True):
  221. w_idx = (e1[0] * nx.number_of_nodes(g2) + e2[0], e1[1] * nx.number_of_nodes(g2) + e2[1])
  222. w_times[w_idx] = ek_temp(e1, e2, ke)
  223. w_times[w_idx[1], w_idx[0]] = w_times[w_idx[0], w_idx[1]]
  224. w_idx2 = (e1[0] * nx.number_of_nodes(g2) + e2[1], e1[1] * nx.number_of_nodes(g2) + e2[0])
  225. w_times[w_idx2[0], w_idx2[1]] = w_times[w_idx[0], w_idx[1]]
  226. w_times[w_idx2[1], w_idx2[0]] = w_times[w_idx[0], w_idx[1]]
  227. return w_times, w_dim

A Python package for graph kernels, graph edit distances and graph pre-image problem.