You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

test_graph_kernels.py 18 kB

5 years ago
5 years ago
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441
  1. """Tests of graph kernels.
  2. """
  3. import pytest
  4. import multiprocessing
  5. def chooseDataset(ds_name):
  6. """Choose dataset according to name.
  7. """
  8. from gklearn.utils import Dataset
  9. dataset = Dataset()
  10. # no node labels (and no edge labels).
  11. if ds_name == 'Alkane':
  12. dataset.load_predefined_dataset(ds_name)
  13. dataset.trim_dataset(edge_required=False)
  14. irrelevant_labels = {'node_attrs': ['x', 'y', 'z'], 'edge_labels': ['bond_stereo']}
  15. dataset.remove_labels(**irrelevant_labels)
  16. # node symbolic labels.
  17. elif ds_name == 'Acyclic':
  18. dataset.load_predefined_dataset(ds_name)
  19. dataset.trim_dataset(edge_required=False)
  20. irrelevant_labels = {'node_attrs': ['x', 'y', 'z'], 'edge_labels': ['bond_stereo']}
  21. dataset.remove_labels(**irrelevant_labels)
  22. # node non-symbolic labels.
  23. elif ds_name == 'Letter-med':
  24. dataset.load_predefined_dataset(ds_name)
  25. dataset.trim_dataset(edge_required=False)
  26. # node symbolic and non-symbolic labels (and edge symbolic labels).
  27. elif ds_name == 'AIDS':
  28. dataset.load_predefined_dataset(ds_name)
  29. dataset.trim_dataset(edge_required=False)
  30. # edge non-symbolic labels (no node labels).
  31. elif ds_name == 'Fingerprint_edge':
  32. dataset.load_predefined_dataset('Fingerprint')
  33. dataset.trim_dataset(edge_required=True)
  34. irrelevant_labels = {'edge_attrs': ['orient', 'angle']}
  35. dataset.remove_labels(**irrelevant_labels)
  36. # edge non-symbolic labels (and node non-symbolic labels).
  37. elif ds_name == 'Fingerprint':
  38. dataset.load_predefined_dataset(ds_name)
  39. dataset.trim_dataset(edge_required=True)
  40. # edge symbolic and non-symbolic labels (and node symbolic and non-symbolic labels).
  41. elif ds_name == 'Cuneiform':
  42. dataset.load_predefined_dataset(ds_name)
  43. dataset.trim_dataset(edge_required=True)
  44. dataset.cut_graphs(range(0, 3))
  45. return dataset
  46. # @pytest.mark.parametrize('ds_name', ['Alkane', 'AIDS'])
  47. # @pytest.mark.parametrize('weight,compute_method', [(0.01, 'geo'), (1, 'exp')])
  48. # @pytest.mark.parametrize('parallel', ['imap_unordered', None])
  49. # def test_CommonWalk(ds_name, parallel, weight, compute_method):
  50. # """Test common walk kernel.
  51. # """
  52. # from gklearn.kernels import CommonWalk
  53. # import networkx as nx
  54. #
  55. # dataset = chooseDataset(ds_name)
  56. # dataset.load_graphs([g for g in dataset.graphs if nx.number_of_nodes(g) > 1])
  57. #
  58. # try:
  59. # graph_kernel = CommonWalk(node_labels=dataset.node_labels,
  60. # edge_labels=dataset.edge_labels,
  61. # ds_infos=dataset.get_dataset_infos(keys=['directed']),
  62. # weight=weight,
  63. # compute_method=compute_method)
  64. # gram_matrix, run_time = graph_kernel.compute(dataset.graphs,
  65. # parallel=parallel, n_jobs=multiprocessing.cpu_count(), verbose=True)
  66. # kernel_list, run_time = graph_kernel.compute(dataset.graphs[0], dataset.graphs[1:],
  67. # parallel=parallel, n_jobs=multiprocessing.cpu_count(), verbose=True)
  68. # kernel, run_time = graph_kernel.compute(dataset.graphs[0], dataset.graphs[1],
  69. # parallel=parallel, n_jobs=multiprocessing.cpu_count(), verbose=True)
  70. # except Exception as exception:
  71. # assert False, exception
  72. #
  73. #
  74. # @pytest.mark.parametrize('ds_name', ['Alkane', 'AIDS'])
  75. # @pytest.mark.parametrize('remove_totters', [False]) #[True, False])
  76. # @pytest.mark.parametrize('parallel', ['imap_unordered', None])
  77. # def test_Marginalized(ds_name, parallel, remove_totters):
  78. # """Test marginalized kernel.
  79. # """
  80. # from gklearn.kernels import Marginalized
  81. #
  82. # dataset = chooseDataset(ds_name)
  83. #
  84. # try:
  85. # graph_kernel = Marginalized(node_labels=dataset.node_labels,
  86. # edge_labels=dataset.edge_labels,
  87. # ds_infos=dataset.get_dataset_infos(keys=['directed']),
  88. # p_quit=0.5,
  89. # n_iteration=2,
  90. # remove_totters=remove_totters)
  91. # gram_matrix, run_time = graph_kernel.compute(dataset.graphs,
  92. # parallel=parallel, n_jobs=multiprocessing.cpu_count(), verbose=True)
  93. # kernel_list, run_time = graph_kernel.compute(dataset.graphs[0], dataset.graphs[1:],
  94. # parallel=parallel, n_jobs=multiprocessing.cpu_count(), verbose=True)
  95. # kernel, run_time = graph_kernel.compute(dataset.graphs[0], dataset.graphs[1],
  96. # parallel=parallel, n_jobs=multiprocessing.cpu_count(), verbose=True)
  97. # except Exception as exception:
  98. # assert False, exception
  99. #
  100. #
  101. @pytest.mark.parametrize('ds_name', ['Acyclic'])
  102. @pytest.mark.parametrize('parallel', ['imap_unordered', None])
  103. def test_SylvesterEquation(ds_name, parallel):
  104. """Test sylvester equation kernel.
  105. """
  106. from gklearn.kernels import SylvesterEquation
  107. dataset = chooseDataset(ds_name)
  108. try:
  109. graph_kernel = SylvesterEquation(
  110. ds_infos=dataset.get_dataset_infos(keys=['directed']),
  111. weight=1e-3,
  112. p=None,
  113. q=None,
  114. edge_weight=None)
  115. gram_matrix, run_time = graph_kernel.compute(dataset.graphs,
  116. parallel=parallel, n_jobs=multiprocessing.cpu_count(), verbose=True)
  117. kernel_list, run_time = graph_kernel.compute(dataset.graphs[0], dataset.graphs[1:],
  118. parallel=parallel, n_jobs=multiprocessing.cpu_count(), verbose=True)
  119. kernel, run_time = graph_kernel.compute(dataset.graphs[0], dataset.graphs[1],
  120. parallel=parallel, n_jobs=multiprocessing.cpu_count(), verbose=True)
  121. except Exception as exception:
  122. assert False, exception
  123. @pytest.mark.parametrize('ds_name', ['Acyclic', 'AIDS'])
  124. @pytest.mark.parametrize('parallel', ['imap_unordered', None])
  125. def test_ConjugateGradient(ds_name, parallel):
  126. """Test conjugate gradient kernel.
  127. """
  128. from gklearn.kernels import ConjugateGradient
  129. from gklearn.utils.kernels import deltakernel, gaussiankernel, kernelproduct
  130. import functools
  131. dataset = chooseDataset(ds_name)
  132. mixkernel = functools.partial(kernelproduct, deltakernel, gaussiankernel)
  133. sub_kernels = {'symb': deltakernel, 'nsymb': gaussiankernel, 'mix': mixkernel}
  134. try:
  135. graph_kernel = ConjugateGradient(
  136. node_labels=dataset.node_labels,
  137. node_attrs=dataset.node_attrs,
  138. edge_labels=dataset.edge_labels,
  139. edge_attrs=dataset.edge_attrs,
  140. ds_infos=dataset.get_dataset_infos(keys=['directed']),
  141. weight=1e-3,
  142. p=None,
  143. q=None,
  144. edge_weight=None,
  145. node_kernels=sub_kernels,
  146. edge_kernels=sub_kernels)
  147. gram_matrix, run_time = graph_kernel.compute(dataset.graphs,
  148. parallel=parallel, n_jobs=multiprocessing.cpu_count(), verbose=True)
  149. kernel_list, run_time = graph_kernel.compute(dataset.graphs[0], dataset.graphs[1:],
  150. parallel=parallel, n_jobs=multiprocessing.cpu_count(), verbose=True)
  151. kernel, run_time = graph_kernel.compute(dataset.graphs[0], dataset.graphs[1],
  152. parallel=parallel, n_jobs=multiprocessing.cpu_count(), verbose=True)
  153. except Exception as exception:
  154. assert False, exception
  155. @pytest.mark.parametrize('ds_name', ['Acyclic', 'AIDS'])
  156. @pytest.mark.parametrize('parallel', ['imap_unordered', None])
  157. def test_FixedPoint(ds_name, parallel):
  158. """Test fixed point kernel.
  159. """
  160. from gklearn.kernels import FixedPoint
  161. from gklearn.utils.kernels import deltakernel, gaussiankernel, kernelproduct
  162. import functools
  163. dataset = chooseDataset(ds_name)
  164. mixkernel = functools.partial(kernelproduct, deltakernel, gaussiankernel)
  165. sub_kernels = {'symb': deltakernel, 'nsymb': gaussiankernel, 'mix': mixkernel}
  166. try:
  167. graph_kernel = FixedPoint(
  168. node_labels=dataset.node_labels,
  169. node_attrs=dataset.node_attrs,
  170. edge_labels=dataset.edge_labels,
  171. edge_attrs=dataset.edge_attrs,
  172. ds_infos=dataset.get_dataset_infos(keys=['directed']),
  173. weight=1e-3,
  174. p=None,
  175. q=None,
  176. edge_weight=None,
  177. node_kernels=sub_kernels,
  178. edge_kernels=sub_kernels)
  179. gram_matrix, run_time = graph_kernel.compute(dataset.graphs,
  180. parallel=parallel, n_jobs=multiprocessing.cpu_count(), verbose=True)
  181. kernel_list, run_time = graph_kernel.compute(dataset.graphs[0], dataset.graphs[1:],
  182. parallel=parallel, n_jobs=multiprocessing.cpu_count(), verbose=True)
  183. kernel, run_time = graph_kernel.compute(dataset.graphs[0], dataset.graphs[1],
  184. parallel=parallel, n_jobs=multiprocessing.cpu_count(), verbose=True)
  185. except Exception as exception:
  186. assert False, exception
  187. @pytest.mark.parametrize('ds_name', ['Acyclic'])
  188. @pytest.mark.parametrize('sub_kernel', ['exp', 'geo'])
  189. @pytest.mark.parametrize('parallel', ['imap_unordered', None])
  190. def test_SpectralDecomposition(ds_name, sub_kernel, parallel):
  191. """Test spectral decomposition kernel.
  192. """
  193. from gklearn.kernels import SpectralDecomposition
  194. dataset = chooseDataset(ds_name)
  195. try:
  196. graph_kernel = SpectralDecomposition(
  197. ds_infos=dataset.get_dataset_infos(keys=['directed']),
  198. weight=1e-3,
  199. p=None,
  200. q=None,
  201. edge_weight=None,
  202. sub_kernel=sub_kernel)
  203. gram_matrix, run_time = graph_kernel.compute(dataset.graphs,
  204. parallel=parallel, n_jobs=multiprocessing.cpu_count(), verbose=True)
  205. kernel_list, run_time = graph_kernel.compute(dataset.graphs[0], dataset.graphs[1:],
  206. parallel=parallel, n_jobs=multiprocessing.cpu_count(), verbose=True)
  207. kernel, run_time = graph_kernel.compute(dataset.graphs[0], dataset.graphs[1],
  208. parallel=parallel, n_jobs=multiprocessing.cpu_count(), verbose=True)
  209. except Exception as exception:
  210. assert False, exception
  211. #
  212. #
  213. # # @pytest.mark.parametrize(
  214. # # 'compute_method,ds_name,sub_kernel',
  215. # # [
  216. # # ('sylvester', 'Alkane', None),
  217. # # ('conjugate', 'Alkane', None),
  218. # # ('conjugate', 'AIDS', None),
  219. # # ('fp', 'Alkane', None),
  220. # # ('fp', 'AIDS', None),
  221. # # ('spectral', 'Alkane', 'exp'),
  222. # # ('spectral', 'Alkane', 'geo'),
  223. # # ]
  224. # # )
  225. # # @pytest.mark.parametrize('parallel', ['imap_unordered', None])
  226. # # def test_RandomWalk(ds_name, compute_method, sub_kernel, parallel):
  227. # # """Test random walk kernel.
  228. # # """
  229. # # from gklearn.kernels import RandomWalk
  230. # # from gklearn.utils.kernels import deltakernel, gaussiankernel, kernelproduct
  231. # # import functools
  232. # #
  233. # # dataset = chooseDataset(ds_name)
  234. # # mixkernel = functools.partial(kernelproduct, deltakernel, gaussiankernel)
  235. # # sub_kernels = {'symb': deltakernel, 'nsymb': gaussiankernel, 'mix': mixkernel}
  236. # # # try:
  237. # # graph_kernel = RandomWalk(node_labels=dataset.node_labels,
  238. # # node_attrs=dataset.node_attrs,
  239. # # edge_labels=dataset.edge_labels,
  240. # # edge_attrs=dataset.edge_attrs,
  241. # # ds_infos=dataset.get_dataset_infos(keys=['directed']),
  242. # # compute_method=compute_method,
  243. # # weight=1e-3,
  244. # # p=None,
  245. # # q=None,
  246. # # edge_weight=None,
  247. # # node_kernels=sub_kernels,
  248. # # edge_kernels=sub_kernels,
  249. # # sub_kernel=sub_kernel)
  250. # # gram_matrix, run_time = graph_kernel.compute(dataset.graphs,
  251. # # parallel=parallel, n_jobs=multiprocessing.cpu_count(), verbose=True)
  252. # # kernel_list, run_time = graph_kernel.compute(dataset.graphs[0], dataset.graphs[1:],
  253. # # parallel=parallel, n_jobs=multiprocessing.cpu_count(), verbose=True)
  254. # # kernel, run_time = graph_kernel.compute(dataset.graphs[0], dataset.graphs[1],
  255. # # parallel=parallel, n_jobs=multiprocessing.cpu_count(), verbose=True)
  256. # # except Exception as exception:
  257. # # assert False, exception
  258. #
  259. # @pytest.mark.parametrize('ds_name', ['Alkane', 'Acyclic', 'Letter-med', 'AIDS', 'Fingerprint'])
  260. # @pytest.mark.parametrize('parallel', ['imap_unordered', None])
  261. # def test_ShortestPath(ds_name, parallel):
  262. # """Test shortest path kernel.
  263. # """
  264. # from gklearn.kernels import ShortestPath
  265. # from gklearn.utils.kernels import deltakernel, gaussiankernel, kernelproduct
  266. # import functools
  267. #
  268. # dataset = chooseDataset(ds_name)
  269. #
  270. # mixkernel = functools.partial(kernelproduct, deltakernel, gaussiankernel)
  271. # sub_kernels = {'symb': deltakernel, 'nsymb': gaussiankernel, 'mix': mixkernel}
  272. # try:
  273. # graph_kernel = ShortestPath(node_labels=dataset.node_labels,
  274. # node_attrs=dataset.node_attrs,
  275. # ds_infos=dataset.get_dataset_infos(keys=['directed']),
  276. # node_kernels=sub_kernels)
  277. # gram_matrix, run_time = graph_kernel.compute(dataset.graphs,
  278. # parallel=parallel, n_jobs=multiprocessing.cpu_count(), verbose=True)
  279. # kernel_list, run_time = graph_kernel.compute(dataset.graphs[0], dataset.graphs[1:],
  280. # parallel=parallel, n_jobs=multiprocessing.cpu_count(), verbose=True)
  281. # kernel, run_time = graph_kernel.compute(dataset.graphs[0], dataset.graphs[1],
  282. # parallel=parallel, n_jobs=multiprocessing.cpu_count(), verbose=True)
  283. # except Exception as exception:
  284. # assert False, exception
  285. # #@pytest.mark.parametrize('ds_name', ['Alkane', 'Acyclic', 'Letter-med', 'AIDS', 'Fingerprint'])
  286. # @pytest.mark.parametrize('ds_name', ['Alkane', 'Acyclic', 'Letter-med', 'AIDS', 'Fingerprint', 'Fingerprint_edge', 'Cuneiform'])
  287. # @pytest.mark.parametrize('parallel', ['imap_unordered', None])
  288. # def test_StructuralSP(ds_name, parallel):
  289. # """Test structural shortest path kernel.
  290. # """
  291. # from gklearn.kernels import StructuralSP
  292. # from gklearn.utils.kernels import deltakernel, gaussiankernel, kernelproduct
  293. # import functools
  294. #
  295. # dataset = chooseDataset(ds_name)
  296. #
  297. # mixkernel = functools.partial(kernelproduct, deltakernel, gaussiankernel)
  298. # sub_kernels = {'symb': deltakernel, 'nsymb': gaussiankernel, 'mix': mixkernel}
  299. # try:
  300. # graph_kernel = StructuralSP(node_labels=dataset.node_labels,
  301. # edge_labels=dataset.edge_labels,
  302. # node_attrs=dataset.node_attrs,
  303. # edge_attrs=dataset.edge_attrs,
  304. # ds_infos=dataset.get_dataset_infos(keys=['directed']),
  305. # node_kernels=sub_kernels,
  306. # edge_kernels=sub_kernels)
  307. # gram_matrix, run_time = graph_kernel.compute(dataset.graphs,
  308. # parallel=parallel, n_jobs=multiprocessing.cpu_count(), verbose=True)
  309. # kernel_list, run_time = graph_kernel.compute(dataset.graphs[0], dataset.graphs[1:],
  310. # parallel=parallel, n_jobs=multiprocessing.cpu_count(), verbose=True)
  311. # kernel, run_time = graph_kernel.compute(dataset.graphs[0], dataset.graphs[1],
  312. # parallel=parallel, n_jobs=multiprocessing.cpu_count(), verbose=True)
  313. # except Exception as exception:
  314. # assert False, exception
  315. # @pytest.mark.parametrize('ds_name', ['Alkane', 'AIDS'])
  316. # @pytest.mark.parametrize('parallel', ['imap_unordered', None])
  317. # #@pytest.mark.parametrize('k_func', ['MinMax', 'tanimoto', None])
  318. # @pytest.mark.parametrize('k_func', ['MinMax', 'tanimoto'])
  319. # @pytest.mark.parametrize('compute_method', ['trie', 'naive'])
  320. # def test_PathUpToH(ds_name, parallel, k_func, compute_method):
  321. # """Test path kernel up to length $h$.
  322. # """
  323. # from gklearn.kernels import PathUpToH
  324. #
  325. # dataset = chooseDataset(ds_name)
  326. #
  327. # try:
  328. # graph_kernel = PathUpToH(node_labels=dataset.node_labels,
  329. # edge_labels=dataset.edge_labels,
  330. # ds_infos=dataset.get_dataset_infos(keys=['directed']),
  331. # depth=2, k_func=k_func, compute_method=compute_method)
  332. # gram_matrix, run_time = graph_kernel.compute(dataset.graphs,
  333. # parallel=parallel, n_jobs=multiprocessing.cpu_count(), verbose=True)
  334. # kernel_list, run_time = graph_kernel.compute(dataset.graphs[0], dataset.graphs[1:],
  335. # parallel=parallel, n_jobs=multiprocessing.cpu_count(), verbose=True)
  336. # kernel, run_time = graph_kernel.compute(dataset.graphs[0], dataset.graphs[1],
  337. # parallel=parallel, n_jobs=multiprocessing.cpu_count(), verbose=True)
  338. # except Exception as exception:
  339. # assert False, exception
  340. #
  341. #
  342. # @pytest.mark.parametrize('ds_name', ['Alkane', 'AIDS'])
  343. # @pytest.mark.parametrize('parallel', ['imap_unordered', None])
  344. # def test_Treelet(ds_name, parallel):
  345. # """Test treelet kernel.
  346. # """
  347. # from gklearn.kernels import Treelet
  348. # from gklearn.utils.kernels import polynomialkernel
  349. # import functools
  350. #
  351. # dataset = chooseDataset(ds_name)
  352. # pkernel = functools.partial(polynomialkernel, d=2, c=1e5)
  353. # try:
  354. # graph_kernel = Treelet(node_labels=dataset.node_labels,
  355. # edge_labels=dataset.edge_labels,
  356. # ds_infos=dataset.get_dataset_infos(keys=['directed']),
  357. # sub_kernel=pkernel)
  358. # gram_matrix, run_time = graph_kernel.compute(dataset.graphs,
  359. # parallel=parallel, n_jobs=multiprocessing.cpu_count(), verbose=True)
  360. # kernel_list, run_time = graph_kernel.compute(dataset.graphs[0], dataset.graphs[1:],
  361. # parallel=parallel, n_jobs=multiprocessing.cpu_count(), verbose=True)
  362. # kernel, run_time = graph_kernel.compute(dataset.graphs[0], dataset.graphs[1],
  363. # parallel=parallel, n_jobs=multiprocessing.cpu_count(), verbose=True)
  364. # except Exception as exception:
  365. # assert False, exception
  366. #
  367. #
  368. # @pytest.mark.parametrize('ds_name', ['Acyclic'])
  369. # #@pytest.mark.parametrize('base_kernel', ['subtree', 'sp', 'edge'])
  370. # # @pytest.mark.parametrize('base_kernel', ['subtree'])
  371. # @pytest.mark.parametrize('parallel', ['imap_unordered', None])
  372. # def test_WLSubtree(ds_name, parallel):
  373. # """Test Weisfeiler-Lehman subtree kernel.
  374. # """
  375. # from gklearn.kernels import WLSubtree
  376. #
  377. # dataset = chooseDataset(ds_name)
  378. # try:
  379. # graph_kernel = WLSubtree(node_labels=dataset.node_labels,
  380. # edge_labels=dataset.edge_labels,
  381. # ds_infos=dataset.get_dataset_infos(keys=['directed']),
  382. # height=2)
  383. # gram_matrix, run_time = graph_kernel.compute(dataset.graphs,
  384. # parallel=parallel, n_jobs=multiprocessing.cpu_count(), verbose=True)
  385. # kernel_list, run_time = graph_kernel.compute(dataset.graphs[0], dataset.graphs[1:],
  386. # parallel=parallel, n_jobs=multiprocessing.cpu_count(), verbose=True)
  387. # kernel, run_time = graph_kernel.compute(dataset.graphs[0], dataset.graphs[1],
  388. # parallel=parallel, n_jobs=multiprocessing.cpu_count(), verbose=True)
  389. # except Exception as exception:
  390. # assert False, exception
  391. if __name__ == "__main__":
  392. # test_spkernel('Alkane', 'imap_unordered')
  393. test_StructuralSP('Fingerprint_edge', 'imap_unordered')
  394. # test_RandomWalk('Acyclic', 'sylvester', None, 'imap_unordered')
  395. # test_RandomWalk('Acyclic', 'conjugate', None, 'imap_unordered')
  396. # test_RandomWalk('Acyclic', 'fp', None, None)
  397. # test_RandomWalk('Acyclic', 'spectral', 'exp', 'imap_unordered')

A Python package for graph kernels, graph edit distances and graph pre-image problem.