You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

graph_kernel.py 5.9 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227
  1. #!/usr/bin/env python3
  2. # -*- coding: utf-8 -*-
  3. """
  4. Created on Mon Mar 30 11:52:47 2020
  5. @author: ljia
  6. """
  7. import numpy as np
  8. import networkx as nx
  9. import multiprocessing
  10. import time
  11. class GraphKernel(object):
  12. def __init__(self):
  13. self._graphs = None
  14. self._parallel = ''
  15. self._n_jobs = 0
  16. self._verbose = None
  17. self._normalize = True
  18. self._run_time = 0
  19. self._gram_matrix = None
  20. self._gram_matrix_unnorm = None
  21. def compute(self, *graphs, **kwargs):
  22. self._parallel = kwargs.get('parallel', 'imap_unordered')
  23. self._n_jobs = kwargs.get('n_jobs', multiprocessing.cpu_count())
  24. self._normalize = kwargs.get('normalize', True)
  25. self._verbose = kwargs.get('verbose', 2)
  26. if len(graphs) == 1:
  27. if not isinstance(graphs[0], list):
  28. raise Exception('Cannot detect graphs.')
  29. elif len(graphs[0]) == 0:
  30. raise Exception('The graph list given is empty. No computation was performed.')
  31. else:
  32. self._graphs = [g.copy() for g in graphs[0]]
  33. self._gram_matrix = self.__compute_gram_matrix()
  34. self._gram_matrix_unnorm = np.copy(self._gram_matrix)
  35. if self._normalize:
  36. self._gram_matrix = self.normalize_gm(self._gram_matrix)
  37. return self._gram_matrix, self._run_time
  38. elif len(graphs) == 2:
  39. if self.is_graph(graphs[0]) and self.is_graph(graphs[1]):
  40. kernel = self.__compute_single_kernel(graphs[0].copy(), graphs[1].copy())
  41. return kernel, self._run_time
  42. elif self.is_graph(graphs[0]) and isinstance(graphs[1], list):
  43. g1 = graphs[0].copy()
  44. g_list = [g.copy() for g in graphs[1]]
  45. kernel_list = self.__compute_kernel_list(g1, g_list)
  46. return kernel_list, self._run_time
  47. elif isinstance(graphs[0], list) and self.is_graph(graphs[1]):
  48. g1 = graphs[1].copy()
  49. g_list = [g.copy() for g in graphs[0]]
  50. kernel_list = self.__compute_kernel_list(g1, g_list)
  51. return kernel_list, self._run_time
  52. else:
  53. raise Exception('Cannot detect graphs.')
  54. elif len(graphs) == 0 and self._graphs is None:
  55. raise Exception('Please add graphs before computing.')
  56. else:
  57. raise Exception('Cannot detect graphs.')
  58. def normalize_gm(self, gram_matrix):
  59. import warnings
  60. warnings.warn('gklearn.kernels.graph_kernel.normalize_gm will be deprecated, use gklearn.utils.normalize_gram_matrix instead', DeprecationWarning)
  61. diag = gram_matrix.diagonal().copy()
  62. for i in range(len(gram_matrix)):
  63. for j in range(i, len(gram_matrix)):
  64. gram_matrix[i][j] /= np.sqrt(diag[i] * diag[j])
  65. gram_matrix[j][i] = gram_matrix[i][j]
  66. return gram_matrix
  67. def compute_distance_matrix(self):
  68. if self._gram_matrix is None:
  69. raise Exception('Please compute the Gram matrix before computing distance matrix.')
  70. dis_mat = np.empty((len(self._gram_matrix), len(self._gram_matrix)))
  71. for i in range(len(self._gram_matrix)):
  72. for j in range(i, len(self._gram_matrix)):
  73. dis = self._gram_matrix[i, i] + self._gram_matrix[j, j] - 2 * self._gram_matrix[i, j]
  74. if dis < 0:
  75. if dis > -1e-10:
  76. dis = 0
  77. else:
  78. raise ValueError('The distance is negative.')
  79. dis_mat[i, j] = np.sqrt(dis)
  80. dis_mat[j, i] = dis_mat[i, j]
  81. dis_max = np.max(np.max(dis_mat))
  82. dis_min = np.min(np.min(dis_mat[dis_mat != 0]))
  83. dis_mean = np.mean(np.mean(dis_mat))
  84. return dis_mat, dis_max, dis_min, dis_mean
  85. def __compute_gram_matrix(self):
  86. start_time = time.time()
  87. if self._parallel == 'imap_unordered':
  88. gram_matrix = self._compute_gm_imap_unordered()
  89. elif self._parallel is None:
  90. gram_matrix = self._compute_gm_series()
  91. else:
  92. raise Exception('Parallel mode is not set correctly.')
  93. self._run_time = time.time() - start_time
  94. if self._verbose:
  95. print('Gram matrix of size %d built in %s seconds.'
  96. % (len(self._graphs), self._run_time))
  97. return gram_matrix
  98. def _compute_gm_series(self):
  99. pass
  100. def _compute_gm_imap_unordered(self):
  101. pass
  102. def __compute_kernel_list(self, g1, g_list):
  103. start_time = time.time()
  104. if self._parallel == 'imap_unordered':
  105. kernel_list = self._compute_kernel_list_imap_unordered(g1, g_list)
  106. elif self._parallel is None:
  107. kernel_list = self._compute_kernel_list_series(g1, g_list)
  108. else:
  109. raise Exception('Parallel mode is not set correctly.')
  110. self._run_time = time.time() - start_time
  111. if self._verbose:
  112. print('Graph kernel bewteen a graph and a list of %d graphs built in %s seconds.'
  113. % (len(g_list), self._run_time))
  114. return kernel_list
  115. def _compute_kernel_list_series(self, g1, g_list):
  116. pass
  117. def _compute_kernel_list_imap_unordered(self, g1, g_list):
  118. pass
  119. def __compute_single_kernel(self, g1, g2):
  120. start_time = time.time()
  121. kernel = self._compute_single_kernel_series(g1, g2)
  122. self._run_time = time.time() - start_time
  123. if self._verbose:
  124. print('Graph kernel bewteen two graphs built in %s seconds.' % (self._run_time))
  125. return kernel
  126. def _compute_single_kernel_series(self, g1, g2):
  127. pass
  128. def is_graph(self, graph):
  129. if isinstance(graph, nx.Graph):
  130. return True
  131. if isinstance(graph, nx.DiGraph):
  132. return True
  133. if isinstance(graph, nx.MultiGraph):
  134. return True
  135. if isinstance(graph, nx.MultiDiGraph):
  136. return True
  137. return False
  138. @property
  139. def graphs(self):
  140. return self._graphs
  141. @property
  142. def parallel(self):
  143. return self._parallel
  144. @property
  145. def n_jobs(self):
  146. return self._n_jobs
  147. @property
  148. def verbose(self):
  149. return self._verbose
  150. @property
  151. def normalize(self):
  152. return self._normalize
  153. @property
  154. def run_time(self):
  155. return self._run_time
  156. @property
  157. def gram_matrix(self):
  158. return self._gram_matrix
  159. @gram_matrix.setter
  160. def gram_matrix(self, value):
  161. self._gram_matrix = value
  162. @property
  163. def gram_matrix_unnorm(self):
  164. return self._gram_matrix_unnorm
  165. @gram_matrix_unnorm.setter
  166. def gram_matrix_unnorm(self, value):
  167. self._gram_matrix_unnorm = value

A Python package for graph kernels, graph edit distances and graph pre-image problem.