You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

test_graph_kernels.py 9.7 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270
  1. """Tests of graph kernels.
  2. """
  3. import pytest
  4. import multiprocessing
  5. def chooseDataset(ds_name):
  6. """Choose dataset according to name.
  7. """
  8. from gklearn.utils import Dataset
  9. dataset = Dataset()
  10. # no node labels (and no edge labels).
  11. if ds_name == 'Alkane':
  12. dataset.load_predefined_dataset(ds_name)
  13. dataset.trim_dataset(edge_required=False)
  14. irrelevant_labels = {'node_attrs': ['x', 'y', 'z'], 'edge_labels': ['bond_stereo']}
  15. dataset.remove_labels(**irrelevant_labels)
  16. # node symbolic labels.
  17. elif ds_name == 'Acyclic':
  18. dataset.load_predefined_dataset(ds_name)
  19. dataset.trim_dataset(edge_required=False)
  20. irrelevant_labels = {'node_attrs': ['x', 'y', 'z'], 'edge_labels': ['bond_stereo']}
  21. dataset.remove_labels(**irrelevant_labels)
  22. # node non-symbolic labels.
  23. elif ds_name == 'Letter-med':
  24. dataset.load_predefined_dataset(ds_name)
  25. dataset.trim_dataset(edge_required=False)
  26. # node symbolic and non-symbolic labels (and edge symbolic labels).
  27. elif ds_name == 'AIDS':
  28. dataset.load_predefined_dataset(ds_name)
  29. dataset.trim_dataset(edge_required=False)
  30. # edge non-symbolic labels (no node labels).
  31. elif ds_name == 'Fingerprint_edge':
  32. dataset.load_predefined_dataset('Fingerprint')
  33. dataset.trim_dataset(edge_required=True)
  34. irrelevant_labels = {'edge_attrs': ['orient', 'angle']}
  35. dataset.remove_labels(**irrelevant_labels)
  36. # edge non-symbolic labels (and node non-symbolic labels).
  37. elif ds_name == 'Fingerprint':
  38. dataset.load_predefined_dataset(ds_name)
  39. dataset.trim_dataset(edge_required=True)
  40. # edge symbolic and non-symbolic labels (and node symbolic and non-symbolic labels).
  41. elif ds_name == 'Cuneiform':
  42. dataset.load_predefined_dataset(ds_name)
  43. dataset.trim_dataset(edge_required=True)
  44. dataset.cut_graphs(range(0, 3))
  45. return dataset
  46. # @pytest.mark.parametrize('ds_name', ['Alkane', 'AIDS'])
  47. # @pytest.mark.parametrize('weight,compute_method', [(0.01, 'geo'), (1, 'exp')])
  48. # #@pytest.mark.parametrize('parallel', ['imap_unordered', None])
  49. # def test_commonwalkkernel(ds_name, weight, compute_method):
  50. # """Test common walk kernel.
  51. # """
  52. # from gklearn.kernels.commonWalkKernel import commonwalkkernel
  53. # Gn, y = chooseDataset(ds_name)
  54. # try:
  55. # Kmatrix, run_time, idx = commonwalkkernel(Gn,
  56. # node_label='atom',
  57. # edge_label='bond_type',
  58. # weight=weight,
  59. # compute_method=compute_method,
  60. # # parallel=parallel,
  61. # n_jobs=multiprocessing.cpu_count(),
  62. # verbose=True)
  63. # except Exception as exception:
  64. # assert False, exception
  65. # @pytest.mark.parametrize('ds_name', ['Alkane', 'AIDS'])
  66. # @pytest.mark.parametrize('remove_totters', [True, False])
  67. # #@pytest.mark.parametrize('parallel', ['imap_unordered', None])
  68. # def test_marginalizedkernel(ds_name, remove_totters):
  69. # """Test marginalized kernel.
  70. # """
  71. # from gklearn.kernels.marginalizedKernel import marginalizedkernel
  72. # Gn, y = chooseDataset(ds_name)
  73. # try:
  74. # Kmatrix, run_time = marginalizedkernel(Gn,
  75. # node_label='atom',
  76. # edge_label='bond_type',
  77. # p_quit=0.5,
  78. # n_iteration=2,
  79. # remove_totters=remove_totters,
  80. # # parallel=parallel,
  81. # n_jobs=multiprocessing.cpu_count(),
  82. # verbose=True)
  83. # except Exception as exception:
  84. # assert False, exception
  85. # @pytest.mark.parametrize(
  86. # 'compute_method,ds_name,sub_kernel',
  87. # [
  88. # # ('sylvester', 'Alkane', None),
  89. # # ('conjugate', 'Alkane', None),
  90. # # ('conjugate', 'AIDS', None),
  91. # # ('fp', 'Alkane', None),
  92. # # ('fp', 'AIDS', None),
  93. # ('spectral', 'Alkane', 'exp'),
  94. # ('spectral', 'Alkane', 'geo'),
  95. # ]
  96. # )
  97. # #@pytest.mark.parametrize('parallel', ['imap_unordered', None])
  98. # def test_randomwalkkernel(ds_name, compute_method, sub_kernel):
  99. # """Test random walk kernel kernel.
  100. # """
  101. # from gklearn.kernels.randomWalkKernel import randomwalkkernel
  102. # from gklearn.utils.kernels import deltakernel, gaussiankernel, kernelproduct
  103. # import functools
  104. # Gn, y = chooseDataset(ds_name)
  105. # mixkernel = functools.partial(kernelproduct, deltakernel, gaussiankernel)
  106. # sub_kernels = [{'symb': deltakernel, 'nsymb': gaussiankernel, 'mix': mixkernel}]
  107. # try:
  108. # Kmatrix, run_time, idx = randomwalkkernel(Gn,
  109. # compute_method=compute_method,
  110. # weight=1e-3,
  111. # p=None,
  112. # q=None,
  113. # edge_weight=None,
  114. # node_kernels=sub_kernels,
  115. # edge_kernels=sub_kernels,
  116. # node_label='atom',
  117. # edge_label='bond_type',
  118. # sub_kernel=sub_kernel,
  119. # # parallel=parallel,
  120. # n_jobs=multiprocessing.cpu_count(),
  121. # verbose=True)
  122. # except Exception as exception:
  123. # assert False, exception
  124. @pytest.mark.parametrize('ds_name', ['Alkane', 'Acyclic', 'Letter-med', 'AIDS', 'Fingerprint'])
  125. @pytest.mark.parametrize('parallel', ['imap_unordered', None])
  126. def test_ShortestPath(ds_name, parallel):
  127. """Test shortest path kernel.
  128. """
  129. from gklearn.kernels import ShortestPath
  130. from gklearn.utils.kernels import deltakernel, gaussiankernel, kernelproduct
  131. import functools
  132. dataset = chooseDataset(ds_name)
  133. mixkernel = functools.partial(kernelproduct, deltakernel, gaussiankernel)
  134. sub_kernels = {'symb': deltakernel, 'nsymb': gaussiankernel, 'mix': mixkernel}
  135. try:
  136. graph_kernel = ShortestPath(node_labels=dataset.node_labels,
  137. node_attrs=dataset.node_attrs,
  138. ds_infos=dataset.get_dataset_infos(keys=['directed']),
  139. node_kernels=sub_kernels)
  140. gram_matrix, run_time = graph_kernel.compute(dataset.graphs,
  141. parallel=parallel, n_jobs=multiprocessing.cpu_count(), verbose=True)
  142. except Exception as exception:
  143. assert False, exception
  144. #@pytest.mark.parametrize('ds_name', ['Alkane', 'Acyclic', 'Letter-med', 'AIDS', 'Fingerprint'])
  145. @pytest.mark.parametrize('ds_name', ['Alkane', 'Acyclic', 'Letter-med', 'AIDS', 'Fingerprint', 'Fingerprint_edge', 'Cuneiform'])
  146. @pytest.mark.parametrize('parallel', ['imap_unordered', None])
  147. def test_StructuralSP(ds_name, parallel):
  148. """Test structural shortest path kernel.
  149. """
  150. from gklearn.kernels import StructuralSP
  151. from gklearn.utils.kernels import deltakernel, gaussiankernel, kernelproduct
  152. import functools
  153. dataset = chooseDataset(ds_name)
  154. mixkernel = functools.partial(kernelproduct, deltakernel, gaussiankernel)
  155. sub_kernels = {'symb': deltakernel, 'nsymb': gaussiankernel, 'mix': mixkernel}
  156. try:
  157. graph_kernel = StructuralSP(node_labels=dataset.node_labels,
  158. edge_labels=dataset.edge_labels,
  159. node_attrs=dataset.node_attrs,
  160. edge_attrs=dataset.edge_attrs,
  161. ds_infos=dataset.get_dataset_infos(keys=['directed']),
  162. node_kernels=sub_kernels,
  163. edge_kernels=sub_kernels)
  164. gram_matrix, run_time = graph_kernel.compute(dataset.graphs,
  165. parallel=parallel, n_jobs=multiprocessing.cpu_count(), verbose=True)
  166. except Exception as exception:
  167. assert False, exception
  168. @pytest.mark.parametrize('ds_name', ['Alkane', 'AIDS'])
  169. @pytest.mark.parametrize('parallel', ['imap_unordered', None])
  170. #@pytest.mark.parametrize('k_func', ['MinMax', 'tanimoto', None])
  171. @pytest.mark.parametrize('k_func', ['MinMax', 'tanimoto'])
  172. @pytest.mark.parametrize('compute_method', ['trie', 'naive'])
  173. def test_PathUpToH(ds_name, parallel, k_func, compute_method):
  174. """Test path kernel up to length $h$.
  175. """
  176. from gklearn.kernels import PathUpToH
  177. dataset = chooseDataset(ds_name)
  178. try:
  179. graph_kernel = PathUpToH(node_labels=dataset.node_labels,
  180. edge_labels=dataset.edge_labels,
  181. ds_infos=dataset.get_dataset_infos(keys=['directed']),
  182. depth=2, k_func=k_func, compute_method=compute_method)
  183. gram_matrix, run_time = graph_kernel.compute(dataset.graphs,
  184. parallel=parallel, n_jobs=multiprocessing.cpu_count(), verbose=True)
  185. except Exception as exception:
  186. assert False, exception
  187. # @pytest.mark.parametrize('ds_name', ['Alkane', 'AIDS'])
  188. # @pytest.mark.parametrize('parallel', ['imap_unordered', None])
  189. # def test_treeletkernel(ds_name, parallel):
  190. # """Test treelet kernel.
  191. # """
  192. # from gklearn.kernels.treeletKernel import treeletkernel
  193. # from gklearn.utils.kernels import polynomialkernel
  194. # import functools
  195. # Gn, y = chooseDataset(ds_name)
  196. # pkernel = functools.partial(polynomialkernel, d=2, c=1e5)
  197. # try:
  198. # Kmatrix, run_time = treeletkernel(Gn,
  199. # sub_kernel=pkernel,
  200. # node_label='atom',
  201. # edge_label='bond_type',
  202. # parallel=parallel,
  203. # n_jobs=multiprocessing.cpu_count(),
  204. # verbose=True)
  205. # except Exception as exception:
  206. # assert False, exception
  207. # @pytest.mark.parametrize('ds_name', ['Acyclic'])
  208. # #@pytest.mark.parametrize('base_kernel', ['subtree', 'sp', 'edge'])
  209. # @pytest.mark.parametrize('base_kernel', ['subtree'])
  210. # @pytest.mark.parametrize('parallel', ['imap_unordered', None])
  211. # def test_weisfeilerlehmankernel(ds_name, parallel, base_kernel):
  212. # """Test Weisfeiler-Lehman kernel.
  213. # """
  214. # from gklearn.kernels.weisfeilerLehmanKernel import weisfeilerlehmankernel
  215. # Gn, y = chooseDataset(ds_name)
  216. # try:
  217. # Kmatrix, run_time = weisfeilerlehmankernel(Gn,
  218. # node_label='atom',
  219. # edge_label='bond_type',
  220. # height=2,
  221. # base_kernel=base_kernel,
  222. # parallel=parallel,
  223. # n_jobs=multiprocessing.cpu_count(),
  224. # verbose=True)
  225. # except Exception as exception:
  226. # assert False, exception
  227. if __name__ == "__main__":
  228. # test_spkernel('Alkane', 'imap_unordered')
  229. test_StructuralSP('Fingerprint_edge', 'imap_unordered')

A Python package for graph kernels, graph edit distances and graph pre-image problem.