You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

fixed_point.py 11 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321
  1. #!/usr/bin/env python3
  2. # -*- coding: utf-8 -*-
  3. """
  4. Created on Thu Aug 20 16:09:51 2020
  5. @author: ljia
  6. @references:
  7. [1] S Vichy N Vishwanathan, Nicol N Schraudolph, Risi Kondor, and Karsten M Borgwardt. Graph kernels. Journal of Machine Learning Research, 11(Apr):1201–1242, 2010.
  8. """
  9. import sys
  10. from tqdm import tqdm
  11. import numpy as np
  12. import networkx as nx
  13. from scipy import optimize
  14. from gklearn.utils.parallel import parallel_gm, parallel_me
  15. from gklearn.kernels import RandomWalkMeta
  16. from gklearn.utils.utils import compute_vertex_kernels
  17. class FixedPoint(RandomWalkMeta):
  18. def __init__(self, **kwargs):
  19. super().__init__(**kwargs)
  20. self._node_kernels = kwargs.get('node_kernels', None)
  21. self._edge_kernels = kwargs.get('edge_kernels', None)
  22. self._node_labels = kwargs.get('node_labels', [])
  23. self._edge_labels = kwargs.get('edge_labels', [])
  24. self._node_attrs = kwargs.get('node_attrs', [])
  25. self._edge_attrs = kwargs.get('edge_attrs', [])
  26. def _compute_gm_series(self):
  27. self._check_edge_weight(self._graphs, self._verbose)
  28. self._check_graphs(self._graphs)
  29. lmda = self._weight
  30. # Compute Gram matrix.
  31. gram_matrix = np.zeros((len(self._graphs), len(self._graphs)))
  32. # Reindex nodes using consecutive integers for the convenience of kernel computation.
  33. if self._verbose >= 2:
  34. iterator = tqdm(self._graphs, desc='Reindex vertices', file=sys.stdout)
  35. else:
  36. iterator = self._graphs
  37. self._graphs = [nx.convert_node_labels_to_integers(g, first_label=0, label_attribute='label_orignal') for g in iterator]
  38. if self._p is None and self._q is None: # p and q are uniform distributions as default.
  39. from itertools import combinations_with_replacement
  40. itr = combinations_with_replacement(range(0, len(self._graphs)), 2)
  41. if self._verbose >= 2:
  42. iterator = tqdm(itr, desc='Computing kernels', file=sys.stdout)
  43. else:
  44. iterator = itr
  45. for i, j in iterator:
  46. kernel = self._kernel_do(self._graphs[i], self._graphs[j], lmda)
  47. gram_matrix[i][j] = kernel
  48. gram_matrix[j][i] = kernel
  49. else: # @todo
  50. pass
  51. return gram_matrix
  52. def _compute_gm_imap_unordered(self):
  53. self._check_edge_weight(self._graphs, self._verbose)
  54. self._check_graphs(self._graphs)
  55. # Compute Gram matrix.
  56. gram_matrix = np.zeros((len(self._graphs), len(self._graphs)))
  57. # @todo: parallel this.
  58. # Reindex nodes using consecutive integers for the convenience of kernel computation.
  59. if self._verbose >= 2:
  60. iterator = tqdm(self._graphs, desc='Reindex vertices', file=sys.stdout)
  61. else:
  62. iterator = self._graphs
  63. self._graphs = [nx.convert_node_labels_to_integers(g, first_label=0, label_attribute='label_orignal') for g in iterator]
  64. if self._p is None and self._q is None: # p and q are uniform distributions as default.
  65. def init_worker(gn_toshare):
  66. global G_gn
  67. G_gn = gn_toshare
  68. do_fun = self._wrapper_kernel_do
  69. parallel_gm(do_fun, gram_matrix, self._graphs, init_worker=init_worker,
  70. glbv=(self._graphs,), n_jobs=self._n_jobs, verbose=self._verbose)
  71. else: # @todo
  72. pass
  73. return gram_matrix
  74. def _compute_kernel_list_series(self, g1, g_list):
  75. self._check_edge_weight(g_list + [g1], self._verbose)
  76. self._check_graphs(g_list + [g1])
  77. lmda = self._weight
  78. # compute kernel list.
  79. kernel_list = [None] * len(g_list)
  80. # Reindex nodes using consecutive integers for the convenience of kernel computation.
  81. g1 = nx.convert_node_labels_to_integers(g1, first_label=0, label_attribute='label_orignal')
  82. if self._verbose >= 2:
  83. iterator = tqdm(g_list, desc='Reindex vertices', file=sys.stdout)
  84. else:
  85. iterator = g_list
  86. g_list = [nx.convert_node_labels_to_integers(g, first_label=0, label_attribute='label_orignal') for g in iterator]
  87. if self._p is None and self._q is None: # p and q are uniform distributions as default.
  88. if self._verbose >= 2:
  89. iterator = tqdm(range(len(g_list)), desc='Computing kernels', file=sys.stdout)
  90. else:
  91. iterator = range(len(g_list))
  92. for i in iterator:
  93. kernel = self._kernel_do(g1, g_list[i], lmda)
  94. kernel_list[i] = kernel
  95. else: # @todo
  96. pass
  97. return kernel_list
  98. def _compute_kernel_list_imap_unordered(self, g1, g_list):
  99. self._check_edge_weight(g_list + [g1], self._verbose)
  100. self._check_graphs(g_list + [g1])
  101. # compute kernel list.
  102. kernel_list = [None] * len(g_list)
  103. # Reindex nodes using consecutive integers for the convenience of kernel computation.
  104. g1 = nx.convert_node_labels_to_integers(g1, first_label=0, label_attribute='label_orignal')
  105. # @todo: parallel this.
  106. if self._verbose >= 2:
  107. iterator = tqdm(g_list, desc='Reindex vertices', file=sys.stdout)
  108. else:
  109. iterator = g_list
  110. g_list = [nx.convert_node_labels_to_integers(g, first_label=0, label_attribute='label_orignal') for g in iterator]
  111. if self._p is None and self._q is None: # p and q are uniform distributions as default.
  112. def init_worker(g1_toshare, g_list_toshare):
  113. global G_g1, G_g_list
  114. G_g1 = g1_toshare
  115. G_g_list = g_list_toshare
  116. do_fun = self._wrapper_kernel_list_do
  117. def func_assign(result, var_to_assign):
  118. var_to_assign[result[0]] = result[1]
  119. itr = range(len(g_list))
  120. len_itr = len(g_list)
  121. parallel_me(do_fun, func_assign, kernel_list, itr, len_itr=len_itr,
  122. init_worker=init_worker, glbv=(g1, g_list), method='imap_unordered',
  123. n_jobs=self._n_jobs, itr_desc='Computing kernels', verbose=self._verbose)
  124. else: # @todo
  125. pass
  126. return kernel_list
  127. def _wrapper_kernel_list_do(self, itr):
  128. return itr, self._kernel_do(G_g1, G_g_list[itr], self._weight)
  129. def _compute_single_kernel_series(self, g1, g2):
  130. self._check_edge_weight([g1] + [g2], self._verbose)
  131. self._check_graphs([g1] + [g2])
  132. lmda = self._weight
  133. # Reindex nodes using consecutive integers for the convenience of kernel computation.
  134. g1 = nx.convert_node_labels_to_integers(g1, first_label=0, label_attribute='label_orignal')
  135. g2 = nx.convert_node_labels_to_integers(g2, first_label=0, label_attribute='label_orignal')
  136. if self._p is None and self._q is None: # p and q are uniform distributions as default.
  137. kernel = self._kernel_do(g1, g2, lmda)
  138. else: # @todo
  139. pass
  140. return kernel
  141. def _kernel_do(self, g1, g2, lmda):
  142. # Frist, compute kernels between all pairs of nodes using the method borrowed
  143. # from FCSP. It is faster than directly computing all edge kernels
  144. # when $d_1d_2>2$, where $d_1$ and $d_2$ are vertex degrees of the
  145. # graphs compared, which is the most case we went though. For very
  146. # sparse graphs, this would be slow.
  147. vk_dict = self._compute_vertex_kernels(g1, g2)
  148. # Compute the weight matrix of the direct product graph.
  149. w_times, w_dim = self._compute_weight_matrix(g1, g2, vk_dict)
  150. # use uniform distribution if there is no prior knowledge.
  151. p_times_uni = 1 / w_dim
  152. p_times = np.full((w_dim, 1), p_times_uni)
  153. x = optimize.fixed_point(self._func_fp, p_times, args=(p_times, lmda, w_times), xtol=1e-06, maxiter=1000)
  154. # use uniform distribution if there is no prior knowledge.
  155. q_times = np.full((1, w_dim), p_times_uni)
  156. return np.dot(q_times, x)
  157. def _wrapper_kernel_do(self, itr):
  158. i = itr[0]
  159. j = itr[1]
  160. return i, j, self._kernel_do(G_gn[i], G_gn[j], self._weight)
  161. def _func_fp(self, x, p_times, lmda, w_times):
  162. haha = w_times * x
  163. haha = lmda * haha
  164. haha = p_times + haha
  165. return p_times + lmda * np.dot(w_times, x)
  166. def _compute_vertex_kernels(self, g1, g2):
  167. """Compute vertex kernels between vertices of two graphs.
  168. """
  169. return compute_vertex_kernels(g1, g2, self._node_kernels, node_labels=self._node_labels, node_attrs=self._node_attrs)
  170. # @todo: move if out to make it faster.
  171. # @todo: node/edge kernels use direct function rather than dicts.
  172. def _compute_weight_matrix(self, g1, g2, vk_dict):
  173. """Compute the weight matrix of the direct product graph.
  174. """
  175. # Define edge kernels.
  176. def compute_ek_11(e1, e2, ke):
  177. e1_labels = [e1[2][el] for el in self._edge_labels]
  178. e2_labels = [e2[2][el] for el in self._edge_labels]
  179. e1_attrs = [e1[2][ea] for ea in self._edge_attrs]
  180. e2_attrs = [e2[2][ea] for ea in self._edge_attrs]
  181. return ke(e1_labels, e2_labels, e1_attrs, e2_attrs)
  182. def compute_ek_10(e1, e2, ke):
  183. e1_labels = [e1[2][el] for el in self._edge_labels]
  184. e2_labels = [e2[2][el] for el in self._edge_labels]
  185. return ke(e1_labels, e2_labels)
  186. def compute_ek_01(e1, e2, ke):
  187. e1_attrs = [e1[2][ea] for ea in self._edge_attrs]
  188. e2_attrs = [e2[2][ea] for ea in self._edge_attrs]
  189. return ke(e1_attrs, e2_attrs)
  190. def compute_ek_00(e1, e2, ke):
  191. return 1
  192. # Select the proper edge kernel.
  193. if len(self._edge_labels) > 0:
  194. # edge symb and non-synb labeled
  195. if len(self._edge_attrs) > 0:
  196. ke = self._edge_kernels['mix']
  197. ek_temp = compute_ek_11
  198. # edge symb labeled
  199. else:
  200. ke = self._edge_kernels['symb']
  201. ek_temp = compute_ek_10
  202. else:
  203. # edge non-synb labeled
  204. if len(self._edge_attrs) > 0:
  205. ke = self._edge_kernels['nsymb']
  206. ek_temp = compute_ek_01
  207. # edge unlabeled
  208. else:
  209. ke = None
  210. ek_temp = compute_ek_00 # @todo: check how much slower is this.
  211. # Compute the weight matrix.
  212. w_dim = nx.number_of_nodes(g1) * nx.number_of_nodes(g2)
  213. w_times = np.zeros((w_dim, w_dim))
  214. if vk_dict: # node labeled
  215. if self._ds_infos['directed']:
  216. for e1 in g1.edges(data=True):
  217. for e2 in g2.edges(data=True):
  218. w_idx = (e1[0] * nx.number_of_nodes(g2) + e2[0], e1[1] * nx.number_of_nodes(g2) + e2[1])
  219. w_times[w_idx] = vk_dict[(e1[0], e2[0])] * ek_temp(e1, e2, ke) * vk_dict[(e1[1], e2[1])]
  220. else: # undirected
  221. for e1 in g1.edges(data=True):
  222. for e2 in g2.edges(data=True):
  223. w_idx = (e1[0] * nx.number_of_nodes(g2) + e2[0], e1[1] * nx.number_of_nodes(g2) + e2[1])
  224. w_times[w_idx] = vk_dict[(e1[0], e2[0])] * ek_temp(e1, e2, ke) * vk_dict[(e1[1], e2[1])] + vk_dict[(e1[0], e2[1])] * ek_temp(e1, e2, ke) * vk_dict[(e1[1], e2[0])]
  225. w_times[w_idx[1], w_idx[0]] = w_times[w_idx[0], w_idx[1]]
  226. w_idx2 = (e1[0] * nx.number_of_nodes(g2) + e2[1], e1[1] * nx.number_of_nodes(g2) + e2[0])
  227. w_times[w_idx2[0], w_idx2[1]] = w_times[w_idx[0], w_idx[1]]
  228. w_times[w_idx2[1], w_idx2[0]] = w_times[w_idx[0], w_idx[1]]
  229. else: # node unlabeled
  230. if self._ds_infos['directed']:
  231. for e1 in g1.edges(data=True):
  232. for e2 in g2.edges(data=True):
  233. w_idx = (e1[0] * nx.number_of_nodes(g2) + e2[0], e1[1] * nx.number_of_nodes(g2) + e2[1])
  234. w_times[w_idx] = ek_temp(e1, e2, ke)
  235. else: # undirected
  236. for e1 in g1.edges(data=True):
  237. for e2 in g2.edges(data=True):
  238. w_idx = (e1[0] * nx.number_of_nodes(g2) + e2[0], e1[1] * nx.number_of_nodes(g2) + e2[1])
  239. w_times[w_idx] = ek_temp(e1, e2, ke)
  240. w_times[w_idx[1], w_idx[0]] = w_times[w_idx[0], w_idx[1]]
  241. w_idx2 = (e1[0] * nx.number_of_nodes(g2) + e2[1], e1[1] * nx.number_of_nodes(g2) + e2[0])
  242. w_times[w_idx2[0], w_idx2[1]] = w_times[w_idx[0], w_idx[1]]
  243. w_times[w_idx2[1], w_idx2[0]] = w_times[w_idx[0], w_idx[1]]
  244. return w_times, w_dim

A Python package for graph kernels, graph edit distances and graph pre-image problem.