You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

utils.py 10 kB

4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233
  1. #!/usr/bin/env python3
  2. # -*- coding: utf-8 -*-
  3. """
  4. Created on Tue Sep 22 11:33:28 2020
  5. @author: ljia
  6. """
  7. import multiprocessing
  8. import numpy as np
  9. from gklearn.utils import model_selection_for_precomputed_kernel
  10. Graph_Kernel_List = ['PathUpToH', 'WLSubtree', 'SylvesterEquation', 'Marginalized', 'ShortestPath', 'Treelet', 'ConjugateGradient', 'FixedPoint', 'SpectralDecomposition', 'StructuralSP', 'CommonWalk']
  11. # Graph_Kernel_List = ['CommonWalk', 'Marginalized', 'SylvesterEquation', 'ConjugateGradient', 'FixedPoint', 'SpectralDecomposition', 'ShortestPath', 'StructuralSP', 'PathUpToH', 'Treelet', 'WLSubtree']
  12. Graph_Kernel_List_VSym = ['PathUpToH', 'WLSubtree', 'Marginalized', 'ShortestPath', 'Treelet', 'ConjugateGradient', 'FixedPoint', 'StructuralSP', 'CommonWalk']
  13. Graph_Kernel_List_ESym = ['PathUpToH', 'Marginalized', 'Treelet', 'ConjugateGradient', 'FixedPoint', 'StructuralSP', 'CommonWalk']
  14. Graph_Kernel_List_VCon = ['ShortestPath', 'ConjugateGradient', 'FixedPoint', 'StructuralSP']
  15. Graph_Kernel_List_ECon = ['ConjugateGradient', 'FixedPoint', 'StructuralSP']
  16. Dataset_List = ['Alkane', 'Acyclic', 'MAO', 'PAH', 'MUTAG', 'Letter-med', 'ENZYMES', 'AIDS', 'NCI1', 'NCI109', 'DD']
  17. def compute_graph_kernel(graphs, kernel_name, n_jobs=multiprocessing.cpu_count(), chunksize=None):
  18. if kernel_name == 'CommonWalk':
  19. from gklearn.kernels.commonWalkKernel import commonwalkkernel
  20. estimator = commonwalkkernel
  21. params = {'compute_method': 'geo', 'weight': 0.1}
  22. elif kernel_name == 'Marginalized':
  23. from gklearn.kernels.marginalizedKernel import marginalizedkernel
  24. estimator = marginalizedkernel
  25. params = {'p_quit': 0.5, 'n_iteration': 5, 'remove_totters': False}
  26. elif kernel_name == 'SylvesterEquation':
  27. from gklearn.kernels.randomWalkKernel import randomwalkkernel
  28. estimator = randomwalkkernel
  29. params = {'compute_method': 'sylvester', 'weight': 0.1}
  30. elif kernel_name == 'ConjugateGradient':
  31. from gklearn.kernels.randomWalkKernel import randomwalkkernel
  32. estimator = randomwalkkernel
  33. from gklearn.utils.kernels import deltakernel, gaussiankernel, kernelproduct
  34. import functools
  35. mixkernel = functools.partial(kernelproduct, deltakernel, gaussiankernel)
  36. sub_kernel = {'symb': deltakernel, 'nsymb': gaussiankernel, 'mix': mixkernel}
  37. params = {'compute_method': 'conjugate', 'weight': 0.1, 'node_kernels': sub_kernel, 'edge_kernels': sub_kernel}
  38. elif kernel_name == 'FixedPoint':
  39. from gklearn.kernels.randomWalkKernel import randomwalkkernel
  40. estimator = randomwalkkernel
  41. from gklearn.utils.kernels import deltakernel, gaussiankernel, kernelproduct
  42. import functools
  43. mixkernel = functools.partial(kernelproduct, deltakernel, gaussiankernel)
  44. sub_kernel = {'symb': deltakernel, 'nsymb': gaussiankernel, 'mix': mixkernel}
  45. params = {'compute_method': 'fp', 'weight': 1e-4, 'node_kernels': sub_kernel, 'edge_kernels': sub_kernel}
  46. elif kernel_name == 'SpectralDecomposition':
  47. from gklearn.kernels.randomWalkKernel import randomwalkkernel
  48. estimator = randomwalkkernel
  49. params = {'compute_method': 'spectral', 'sub_kernel': 'geo', 'weight': 0.1}
  50. elif kernel_name == 'ShortestPath':
  51. from gklearn.kernels.spKernel import spkernel
  52. estimator = spkernel
  53. from gklearn.utils.kernels import deltakernel, gaussiankernel, kernelproduct
  54. import functools
  55. mixkernel = functools.partial(kernelproduct, deltakernel, gaussiankernel)
  56. sub_kernel = {'symb': deltakernel, 'nsymb': gaussiankernel, 'mix': mixkernel}
  57. params = {'node_kernels': sub_kernel}
  58. elif kernel_name == 'StructuralSP':
  59. from gklearn.kernels.structuralspKernel import structuralspkernel
  60. estimator = structuralspkernel
  61. from gklearn.utils.kernels import deltakernel, gaussiankernel, kernelproduct
  62. import functools
  63. mixkernel = functools.partial(kernelproduct, deltakernel, gaussiankernel)
  64. sub_kernel = {'symb': deltakernel, 'nsymb': gaussiankernel, 'mix': mixkernel}
  65. params = {'node_kernels': sub_kernel, 'edge_kernels': sub_kernel}
  66. elif kernel_name == 'PathUpToH':
  67. from gklearn.kernels.untilHPathKernel import untilhpathkernel
  68. estimator = untilhpathkernel
  69. params = {'depth': 5, 'k_func': 'MinMax', 'compute_method': 'trie'}
  70. elif kernel_name == 'Treelet':
  71. from gklearn.kernels.treeletKernel import treeletkernel
  72. estimator = treeletkernel
  73. from gklearn.utils.kernels import polynomialkernel
  74. import functools
  75. sub_kernel = functools.partial(polynomialkernel, d=4, c=1e+8)
  76. params = {'sub_kernel': sub_kernel}
  77. elif kernel_name == 'WLSubtree':
  78. from gklearn.kernels.weisfeilerLehmanKernel import weisfeilerlehmankernel
  79. estimator = weisfeilerlehmankernel
  80. params = {'base_kernel': 'subtree', 'height': 5}
  81. # params['parallel'] = None
  82. params['n_jobs'] = n_jobs
  83. params['chunksize'] = chunksize
  84. params['verbose'] = True
  85. results = estimator(graphs, **params)
  86. return results[0], results[1]
  87. def cross_validate(graphs, targets, kernel_name, output_dir='outputs/', ds_name='synthesized', n_jobs=multiprocessing.cpu_count()):
  88. param_grid = None
  89. if kernel_name == 'CommonWalk':
  90. from gklearn.kernels.commonWalkKernel import commonwalkkernel
  91. estimator = commonwalkkernel
  92. param_grid_precomputed = [{'compute_method': ['geo'],
  93. 'weight': np.linspace(0.01, 0.15, 15)}]
  94. elif kernel_name == 'Marginalized':
  95. from gklearn.kernels.marginalizedKernel import marginalizedkernel
  96. estimator = marginalizedkernel
  97. param_grid_precomputed = {'p_quit': np.linspace(0.1, 0.9, 9),
  98. 'n_iteration': np.linspace(1, 19, 7),
  99. 'remove_totters': [False]}
  100. elif kernel_name == 'SylvesterEquation':
  101. from gklearn.kernels.randomWalkKernel import randomwalkkernel
  102. estimator = randomwalkkernel
  103. param_grid_precomputed = {'compute_method': ['sylvester'],
  104. # 'weight': np.linspace(0.01, 0.10, 10)}
  105. 'weight': np.logspace(-1, -10, num=10, base=10)}
  106. elif kernel_name == 'ConjugateGradient':
  107. from gklearn.kernels.randomWalkKernel import randomwalkkernel
  108. estimator = randomwalkkernel
  109. from gklearn.utils.kernels import deltakernel, gaussiankernel, kernelproduct
  110. import functools
  111. mixkernel = functools.partial(kernelproduct, deltakernel, gaussiankernel)
  112. sub_kernel = {'symb': deltakernel, 'nsymb': gaussiankernel, 'mix': mixkernel}
  113. param_grid_precomputed = {'compute_method': ['conjugate'],
  114. 'node_kernels': [sub_kernel], 'edge_kernels': [sub_kernel],
  115. 'weight': np.logspace(-1, -10, num=10, base=10)}
  116. elif kernel_name == 'FixedPoint':
  117. from gklearn.kernels.randomWalkKernel import randomwalkkernel
  118. estimator = randomwalkkernel
  119. from gklearn.utils.kernels import deltakernel, gaussiankernel, kernelproduct
  120. import functools
  121. mixkernel = functools.partial(kernelproduct, deltakernel, gaussiankernel)
  122. sub_kernel = {'symb': deltakernel, 'nsymb': gaussiankernel, 'mix': mixkernel}
  123. param_grid_precomputed = {'compute_method': ['fp'],
  124. 'node_kernels': [sub_kernel], 'edge_kernels': [sub_kernel],
  125. 'weight': np.logspace(-3, -10, num=8, base=10)}
  126. elif kernel_name == 'SpectralDecomposition':
  127. from gklearn.kernels.randomWalkKernel import randomwalkkernel
  128. estimator = randomwalkkernel
  129. param_grid_precomputed = {'compute_method': ['spectral'],
  130. 'weight': np.logspace(-1, -10, num=10, base=10),
  131. 'sub_kernel': ['geo', 'exp']}
  132. elif kernel_name == 'ShortestPath':
  133. from gklearn.kernels.spKernel import spkernel
  134. estimator = spkernel
  135. from gklearn.utils.kernels import deltakernel, gaussiankernel, kernelproduct
  136. import functools
  137. mixkernel = functools.partial(kernelproduct, deltakernel, gaussiankernel)
  138. sub_kernel = {'symb': deltakernel, 'nsymb': gaussiankernel, 'mix': mixkernel}
  139. param_grid_precomputed = {'node_kernels': [sub_kernel]}
  140. elif kernel_name == 'StructuralSP':
  141. from gklearn.kernels.structuralspKernel import structuralspkernel
  142. estimator = structuralspkernel
  143. from gklearn.utils.kernels import deltakernel, gaussiankernel, kernelproduct
  144. import functools
  145. mixkernel = functools.partial(kernelproduct, deltakernel, gaussiankernel)
  146. sub_kernel = {'symb': deltakernel, 'nsymb': gaussiankernel, 'mix': mixkernel}
  147. param_grid_precomputed = {'node_kernels': [sub_kernel], 'edge_kernels': [sub_kernel],
  148. 'compute_method': ['naive']}
  149. elif kernel_name == 'PathUpToH':
  150. from gklearn.kernels.untilHPathKernel import untilhpathkernel
  151. estimator = untilhpathkernel
  152. param_grid_precomputed = {'depth': np.linspace(1, 10, 10), # [2],
  153. 'k_func': ['MinMax', 'tanimoto'], # ['MinMax'], #
  154. 'compute_method': ['trie']} # ['MinMax']}
  155. elif kernel_name == 'Treelet':
  156. from gklearn.kernels.treeletKernel import treeletkernel
  157. estimator = treeletkernel
  158. from gklearn.utils.kernels import polynomialkernel
  159. import functools
  160. gkernels = [functools.partial(gaussiankernel, gamma=1 / ga)
  161. # for ga in np.linspace(1, 10, 10)]
  162. for ga in np.logspace(0, 10, num=11, base=10)]
  163. pkernels = [functools.partial(polynomialkernel, d=d, c=c) for d in range(1, 11)
  164. for c in np.logspace(0, 10, num=11, base=10)]
  165. param_grid_precomputed = {'sub_kernel': pkernels + gkernels}
  166. elif kernel_name == 'WLSubtree':
  167. from gklearn.kernels.weisfeilerLehmanKernel import weisfeilerlehmankernel
  168. estimator = weisfeilerlehmankernel
  169. param_grid_precomputed = {'base_kernel': ['subtree'],
  170. 'height': np.linspace(0, 10, 11)}
  171. param_grid = {'C': np.logspace(-10, 4, num=29, base=10)}
  172. if param_grid is None:
  173. param_grid = {'C': np.logspace(-10, 10, num=41, base=10)}
  174. results = model_selection_for_precomputed_kernel(
  175. graphs,
  176. estimator,
  177. param_grid_precomputed,
  178. param_grid,
  179. 'classification',
  180. NUM_TRIALS=28,
  181. datafile_y=targets,
  182. extra_params=None,
  183. ds_name=ds_name,
  184. output_dir=output_dir,
  185. n_jobs=n_jobs,
  186. read_gm_from_file=False,
  187. verbose=True)
  188. return results[0], results[1]

A Python package for graph kernels, graph edit distances and graph pre-image problem.