You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

test_graph_kernels.py 18 kB

5 years ago
5 years ago
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442
  1. """Tests of graph kernels.
  2. """
  3. import pytest
  4. import multiprocessing
  5. def chooseDataset(ds_name):
  6. """Choose dataset according to name.
  7. """
  8. from gklearn.utils import Dataset
  9. dataset = Dataset()
  10. # no node labels (and no edge labels).
  11. if ds_name == 'Alkane':
  12. dataset.load_predefined_dataset(ds_name)
  13. dataset.trim_dataset(edge_required=False)
  14. irrelevant_labels = {'node_attrs': ['x', 'y', 'z'], 'edge_labels': ['bond_stereo']}
  15. dataset.remove_labels(**irrelevant_labels)
  16. # node symbolic labels.
  17. elif ds_name == 'Acyclic':
  18. dataset.load_predefined_dataset(ds_name)
  19. dataset.trim_dataset(edge_required=False)
  20. irrelevant_labels = {'node_attrs': ['x', 'y', 'z'], 'edge_labels': ['bond_stereo']}
  21. dataset.remove_labels(**irrelevant_labels)
  22. # node non-symbolic labels.
  23. elif ds_name == 'Letter-med':
  24. dataset.load_predefined_dataset(ds_name)
  25. dataset.trim_dataset(edge_required=False)
  26. # node symbolic and non-symbolic labels (and edge symbolic labels).
  27. elif ds_name == 'AIDS':
  28. dataset.load_predefined_dataset(ds_name)
  29. dataset.trim_dataset(edge_required=False)
  30. # edge non-symbolic labels (no node labels).
  31. elif ds_name == 'Fingerprint_edge':
  32. dataset.load_predefined_dataset('Fingerprint')
  33. dataset.trim_dataset(edge_required=True)
  34. irrelevant_labels = {'edge_attrs': ['orient', 'angle']}
  35. dataset.remove_labels(**irrelevant_labels)
  36. # edge non-symbolic labels (and node non-symbolic labels).
  37. elif ds_name == 'Fingerprint':
  38. dataset.load_predefined_dataset(ds_name)
  39. dataset.trim_dataset(edge_required=True)
  40. # edge symbolic and non-symbolic labels (and node symbolic and non-symbolic labels).
  41. elif ds_name == 'Cuneiform':
  42. dataset.load_predefined_dataset(ds_name)
  43. dataset.trim_dataset(edge_required=True)
  44. dataset.cut_graphs(range(0, 3))
  45. return dataset
  46. @pytest.mark.parametrize('ds_name', ['Alkane', 'AIDS'])
  47. @pytest.mark.parametrize('weight,compute_method', [(0.01, 'geo'), (1, 'exp')])
  48. @pytest.mark.parametrize('parallel', ['imap_unordered', None])
  49. def test_CommonWalk(ds_name, parallel, weight, compute_method):
  50. """Test common walk kernel.
  51. """
  52. from gklearn.kernels import CommonWalk
  53. import networkx as nx
  54. dataset = chooseDataset(ds_name)
  55. dataset.load_graphs([g for g in dataset.graphs if nx.number_of_nodes(g) > 1])
  56. try:
  57. graph_kernel = CommonWalk(node_labels=dataset.node_labels,
  58. edge_labels=dataset.edge_labels,
  59. ds_infos=dataset.get_dataset_infos(keys=['directed']),
  60. weight=weight,
  61. compute_method=compute_method)
  62. gram_matrix, run_time = graph_kernel.compute(dataset.graphs,
  63. parallel=parallel, n_jobs=multiprocessing.cpu_count(), verbose=True)
  64. kernel_list, run_time = graph_kernel.compute(dataset.graphs[0], dataset.graphs[1:],
  65. parallel=parallel, n_jobs=multiprocessing.cpu_count(), verbose=True)
  66. kernel, run_time = graph_kernel.compute(dataset.graphs[0], dataset.graphs[1],
  67. parallel=parallel, n_jobs=multiprocessing.cpu_count(), verbose=True)
  68. except Exception as exception:
  69. assert False, exception
  70. @pytest.mark.parametrize('ds_name', ['Alkane', 'AIDS'])
  71. @pytest.mark.parametrize('remove_totters', [False]) #[True, False])
  72. @pytest.mark.parametrize('parallel', ['imap_unordered', None])
  73. def test_Marginalized(ds_name, parallel, remove_totters):
  74. """Test marginalized kernel.
  75. """
  76. from gklearn.kernels import Marginalized
  77. dataset = chooseDataset(ds_name)
  78. try:
  79. graph_kernel = Marginalized(node_labels=dataset.node_labels,
  80. edge_labels=dataset.edge_labels,
  81. ds_infos=dataset.get_dataset_infos(keys=['directed']),
  82. p_quit=0.5,
  83. n_iteration=2,
  84. remove_totters=remove_totters)
  85. gram_matrix, run_time = graph_kernel.compute(dataset.graphs,
  86. parallel=parallel, n_jobs=multiprocessing.cpu_count(), verbose=True)
  87. kernel_list, run_time = graph_kernel.compute(dataset.graphs[0], dataset.graphs[1:],
  88. parallel=parallel, n_jobs=multiprocessing.cpu_count(), verbose=True)
  89. kernel, run_time = graph_kernel.compute(dataset.graphs[0], dataset.graphs[1],
  90. parallel=parallel, n_jobs=multiprocessing.cpu_count(), verbose=True)
  91. except Exception as exception:
  92. assert False, exception
  93. @pytest.mark.parametrize('ds_name', ['Acyclic'])
  94. @pytest.mark.parametrize('parallel', ['imap_unordered', None])
  95. def test_SylvesterEquation(ds_name, parallel):
  96. """Test sylvester equation kernel.
  97. """
  98. from gklearn.kernels import SylvesterEquation
  99. dataset = chooseDataset(ds_name)
  100. try:
  101. graph_kernel = SylvesterEquation(
  102. ds_infos=dataset.get_dataset_infos(keys=['directed']),
  103. weight=1e-3,
  104. p=None,
  105. q=None,
  106. edge_weight=None)
  107. gram_matrix, run_time = graph_kernel.compute(dataset.graphs,
  108. parallel=parallel, n_jobs=multiprocessing.cpu_count(), verbose=True)
  109. kernel_list, run_time = graph_kernel.compute(dataset.graphs[0], dataset.graphs[1:],
  110. parallel=parallel, n_jobs=multiprocessing.cpu_count(), verbose=True)
  111. kernel, run_time = graph_kernel.compute(dataset.graphs[0], dataset.graphs[1],
  112. parallel=parallel, n_jobs=multiprocessing.cpu_count(), verbose=True)
  113. except Exception as exception:
  114. assert False, exception
  115. @pytest.mark.parametrize('ds_name', ['Acyclic', 'AIDS'])
  116. @pytest.mark.parametrize('parallel', ['imap_unordered', None])
  117. def test_ConjugateGradient(ds_name, parallel):
  118. """Test conjugate gradient kernel.
  119. """
  120. from gklearn.kernels import ConjugateGradient
  121. from gklearn.utils.kernels import deltakernel, gaussiankernel, kernelproduct
  122. import functools
  123. dataset = chooseDataset(ds_name)
  124. mixkernel = functools.partial(kernelproduct, deltakernel, gaussiankernel)
  125. sub_kernels = {'symb': deltakernel, 'nsymb': gaussiankernel, 'mix': mixkernel}
  126. try:
  127. graph_kernel = ConjugateGradient(
  128. node_labels=dataset.node_labels,
  129. node_attrs=dataset.node_attrs,
  130. edge_labels=dataset.edge_labels,
  131. edge_attrs=dataset.edge_attrs,
  132. ds_infos=dataset.get_dataset_infos(keys=['directed']),
  133. weight=1e-3,
  134. p=None,
  135. q=None,
  136. edge_weight=None,
  137. node_kernels=sub_kernels,
  138. edge_kernels=sub_kernels)
  139. gram_matrix, run_time = graph_kernel.compute(dataset.graphs,
  140. parallel=parallel, n_jobs=multiprocessing.cpu_count(), verbose=True)
  141. kernel_list, run_time = graph_kernel.compute(dataset.graphs[0], dataset.graphs[1:],
  142. parallel=parallel, n_jobs=multiprocessing.cpu_count(), verbose=True)
  143. kernel, run_time = graph_kernel.compute(dataset.graphs[0], dataset.graphs[1],
  144. parallel=parallel, n_jobs=multiprocessing.cpu_count(), verbose=True)
  145. except Exception as exception:
  146. assert False, exception
  147. @pytest.mark.parametrize('ds_name', ['Acyclic', 'AIDS'])
  148. @pytest.mark.parametrize('parallel', ['imap_unordered', None])
  149. def test_FixedPoint(ds_name, parallel):
  150. """Test fixed point kernel.
  151. """
  152. from gklearn.kernels import FixedPoint
  153. from gklearn.utils.kernels import deltakernel, gaussiankernel, kernelproduct
  154. import functools
  155. dataset = chooseDataset(ds_name)
  156. mixkernel = functools.partial(kernelproduct, deltakernel, gaussiankernel)
  157. sub_kernels = {'symb': deltakernel, 'nsymb': gaussiankernel, 'mix': mixkernel}
  158. try:
  159. graph_kernel = FixedPoint(
  160. node_labels=dataset.node_labels,
  161. node_attrs=dataset.node_attrs,
  162. edge_labels=dataset.edge_labels,
  163. edge_attrs=dataset.edge_attrs,
  164. ds_infos=dataset.get_dataset_infos(keys=['directed']),
  165. weight=1e-3,
  166. p=None,
  167. q=None,
  168. edge_weight=None,
  169. node_kernels=sub_kernels,
  170. edge_kernels=sub_kernels)
  171. gram_matrix, run_time = graph_kernel.compute(dataset.graphs,
  172. parallel=parallel, n_jobs=multiprocessing.cpu_count(), verbose=True)
  173. kernel_list, run_time = graph_kernel.compute(dataset.graphs[0], dataset.graphs[1:],
  174. parallel=parallel, n_jobs=multiprocessing.cpu_count(), verbose=True)
  175. kernel, run_time = graph_kernel.compute(dataset.graphs[0], dataset.graphs[1],
  176. parallel=parallel, n_jobs=multiprocessing.cpu_count(), verbose=True)
  177. except Exception as exception:
  178. assert False, exception
  179. @pytest.mark.parametrize('ds_name', ['Acyclic'])
  180. @pytest.mark.parametrize('sub_kernel', ['exp', 'geo'])
  181. @pytest.mark.parametrize('parallel', ['imap_unordered', None])
  182. def test_SpectralDecomposition(ds_name, sub_kernel, parallel):
  183. """Test spectral decomposition kernel.
  184. """
  185. from gklearn.kernels import SpectralDecomposition
  186. dataset = chooseDataset(ds_name)
  187. try:
  188. graph_kernel = SpectralDecomposition(
  189. ds_infos=dataset.get_dataset_infos(keys=['directed']),
  190. weight=1e-3,
  191. p=None,
  192. q=None,
  193. edge_weight=None,
  194. sub_kernel=sub_kernel)
  195. gram_matrix, run_time = graph_kernel.compute(dataset.graphs,
  196. parallel=parallel, n_jobs=multiprocessing.cpu_count(), verbose=True)
  197. kernel_list, run_time = graph_kernel.compute(dataset.graphs[0], dataset.graphs[1:],
  198. parallel=parallel, n_jobs=multiprocessing.cpu_count(), verbose=True)
  199. kernel, run_time = graph_kernel.compute(dataset.graphs[0], dataset.graphs[1],
  200. parallel=parallel, n_jobs=multiprocessing.cpu_count(), verbose=True)
  201. except Exception as exception:
  202. assert False, exception
  203. # @pytest.mark.parametrize(
  204. # 'compute_method,ds_name,sub_kernel',
  205. # [
  206. # ('sylvester', 'Alkane', None),
  207. # ('conjugate', 'Alkane', None),
  208. # ('conjugate', 'AIDS', None),
  209. # ('fp', 'Alkane', None),
  210. # ('fp', 'AIDS', None),
  211. # ('spectral', 'Alkane', 'exp'),
  212. # ('spectral', 'Alkane', 'geo'),
  213. # ]
  214. # )
  215. # @pytest.mark.parametrize('parallel', ['imap_unordered', None])
  216. # def test_RandomWalk(ds_name, compute_method, sub_kernel, parallel):
  217. # """Test random walk kernel.
  218. # """
  219. # from gklearn.kernels import RandomWalk
  220. # from gklearn.utils.kernels import deltakernel, gaussiankernel, kernelproduct
  221. # import functools
  222. #
  223. # dataset = chooseDataset(ds_name)
  224. # mixkernel = functools.partial(kernelproduct, deltakernel, gaussiankernel)
  225. # sub_kernels = {'symb': deltakernel, 'nsymb': gaussiankernel, 'mix': mixkernel}
  226. # # try:
  227. # graph_kernel = RandomWalk(node_labels=dataset.node_labels,
  228. # node_attrs=dataset.node_attrs,
  229. # edge_labels=dataset.edge_labels,
  230. # edge_attrs=dataset.edge_attrs,
  231. # ds_infos=dataset.get_dataset_infos(keys=['directed']),
  232. # compute_method=compute_method,
  233. # weight=1e-3,
  234. # p=None,
  235. # q=None,
  236. # edge_weight=None,
  237. # node_kernels=sub_kernels,
  238. # edge_kernels=sub_kernels,
  239. # sub_kernel=sub_kernel)
  240. # gram_matrix, run_time = graph_kernel.compute(dataset.graphs,
  241. # parallel=parallel, n_jobs=multiprocessing.cpu_count(), verbose=True)
  242. # kernel_list, run_time = graph_kernel.compute(dataset.graphs[0], dataset.graphs[1:],
  243. # parallel=parallel, n_jobs=multiprocessing.cpu_count(), verbose=True)
  244. # kernel, run_time = graph_kernel.compute(dataset.graphs[0], dataset.graphs[1],
  245. # parallel=parallel, n_jobs=multiprocessing.cpu_count(), verbose=True)
  246. # except Exception as exception:
  247. # assert False, exception
  248. @pytest.mark.parametrize('ds_name', ['Alkane', 'Acyclic', 'Letter-med', 'AIDS', 'Fingerprint'])
  249. @pytest.mark.parametrize('parallel', ['imap_unordered', None])
  250. def test_ShortestPath(ds_name, parallel):
  251. """Test shortest path kernel.
  252. """
  253. from gklearn.kernels import ShortestPath
  254. from gklearn.utils.kernels import deltakernel, gaussiankernel, kernelproduct
  255. import functools
  256. dataset = chooseDataset(ds_name)
  257. mixkernel = functools.partial(kernelproduct, deltakernel, gaussiankernel)
  258. sub_kernels = {'symb': deltakernel, 'nsymb': gaussiankernel, 'mix': mixkernel}
  259. try:
  260. graph_kernel = ShortestPath(node_labels=dataset.node_labels,
  261. node_attrs=dataset.node_attrs,
  262. ds_infos=dataset.get_dataset_infos(keys=['directed']),
  263. node_kernels=sub_kernels)
  264. gram_matrix, run_time = graph_kernel.compute(dataset.graphs,
  265. parallel=parallel, n_jobs=multiprocessing.cpu_count(), verbose=True)
  266. kernel_list, run_time = graph_kernel.compute(dataset.graphs[0], dataset.graphs[1:],
  267. parallel=parallel, n_jobs=multiprocessing.cpu_count(), verbose=True)
  268. kernel, run_time = graph_kernel.compute(dataset.graphs[0], dataset.graphs[1],
  269. parallel=parallel, n_jobs=multiprocessing.cpu_count(), verbose=True)
  270. except Exception as exception:
  271. assert False, exception
  272. #@pytest.mark.parametrize('ds_name', ['Alkane', 'Acyclic', 'Letter-med', 'AIDS', 'Fingerprint'])
  273. @pytest.mark.parametrize('ds_name', ['Alkane', 'Acyclic', 'Letter-med', 'AIDS', 'Fingerprint', 'Fingerprint_edge', 'Cuneiform'])
  274. @pytest.mark.parametrize('parallel', ['imap_unordered', None])
  275. def test_StructuralSP(ds_name, parallel):
  276. """Test structural shortest path kernel.
  277. """
  278. from gklearn.kernels import StructuralSP
  279. from gklearn.utils.kernels import deltakernel, gaussiankernel, kernelproduct
  280. import functools
  281. dataset = chooseDataset(ds_name)
  282. mixkernel = functools.partial(kernelproduct, deltakernel, gaussiankernel)
  283. sub_kernels = {'symb': deltakernel, 'nsymb': gaussiankernel, 'mix': mixkernel}
  284. try:
  285. graph_kernel = StructuralSP(node_labels=dataset.node_labels,
  286. edge_labels=dataset.edge_labels,
  287. node_attrs=dataset.node_attrs,
  288. edge_attrs=dataset.edge_attrs,
  289. ds_infos=dataset.get_dataset_infos(keys=['directed']),
  290. node_kernels=sub_kernels,
  291. edge_kernels=sub_kernels)
  292. gram_matrix, run_time = graph_kernel.compute(dataset.graphs,
  293. parallel=parallel, n_jobs=multiprocessing.cpu_count(), verbose=True)
  294. kernel_list, run_time = graph_kernel.compute(dataset.graphs[0], dataset.graphs[1:],
  295. parallel=parallel, n_jobs=multiprocessing.cpu_count(), verbose=True)
  296. kernel, run_time = graph_kernel.compute(dataset.graphs[0], dataset.graphs[1],
  297. parallel=parallel, n_jobs=multiprocessing.cpu_count(), verbose=True)
  298. except Exception as exception:
  299. assert False, exception
  300. @pytest.mark.parametrize('ds_name', ['Alkane', 'AIDS'])
  301. @pytest.mark.parametrize('parallel', ['imap_unordered', None])
  302. #@pytest.mark.parametrize('k_func', ['MinMax', 'tanimoto', None])
  303. @pytest.mark.parametrize('k_func', ['MinMax', 'tanimoto'])
  304. @pytest.mark.parametrize('compute_method', ['trie', 'naive'])
  305. def test_PathUpToH(ds_name, parallel, k_func, compute_method):
  306. """Test path kernel up to length $h$.
  307. """
  308. from gklearn.kernels import PathUpToH
  309. dataset = chooseDataset(ds_name)
  310. try:
  311. graph_kernel = PathUpToH(node_labels=dataset.node_labels,
  312. edge_labels=dataset.edge_labels,
  313. ds_infos=dataset.get_dataset_infos(keys=['directed']),
  314. depth=2, k_func=k_func, compute_method=compute_method)
  315. gram_matrix, run_time = graph_kernel.compute(dataset.graphs,
  316. parallel=parallel, n_jobs=multiprocessing.cpu_count(), verbose=True)
  317. kernel_list, run_time = graph_kernel.compute(dataset.graphs[0], dataset.graphs[1:],
  318. parallel=parallel, n_jobs=multiprocessing.cpu_count(), verbose=True)
  319. kernel, run_time = graph_kernel.compute(dataset.graphs[0], dataset.graphs[1],
  320. parallel=parallel, n_jobs=multiprocessing.cpu_count(), verbose=True)
  321. except Exception as exception:
  322. assert False, exception
  323. @pytest.mark.parametrize('ds_name', ['Alkane', 'AIDS'])
  324. @pytest.mark.parametrize('parallel', ['imap_unordered', None])
  325. def test_Treelet(ds_name, parallel):
  326. """Test treelet kernel.
  327. """
  328. from gklearn.kernels import Treelet
  329. from gklearn.utils.kernels import polynomialkernel
  330. import functools
  331. dataset = chooseDataset(ds_name)
  332. pkernel = functools.partial(polynomialkernel, d=2, c=1e5)
  333. try:
  334. graph_kernel = Treelet(node_labels=dataset.node_labels,
  335. edge_labels=dataset.edge_labels,
  336. ds_infos=dataset.get_dataset_infos(keys=['directed']),
  337. sub_kernel=pkernel)
  338. gram_matrix, run_time = graph_kernel.compute(dataset.graphs,
  339. parallel=parallel, n_jobs=multiprocessing.cpu_count(), verbose=True)
  340. kernel_list, run_time = graph_kernel.compute(dataset.graphs[0], dataset.graphs[1:],
  341. parallel=parallel, n_jobs=multiprocessing.cpu_count(), verbose=True)
  342. kernel, run_time = graph_kernel.compute(dataset.graphs[0], dataset.graphs[1],
  343. parallel=parallel, n_jobs=multiprocessing.cpu_count(), verbose=True)
  344. except Exception as exception:
  345. assert False, exception
  346. @pytest.mark.parametrize('ds_name', ['Acyclic'])
  347. #@pytest.mark.parametrize('base_kernel', ['subtree', 'sp', 'edge'])
  348. # @pytest.mark.parametrize('base_kernel', ['subtree'])
  349. @pytest.mark.parametrize('parallel', ['imap_unordered', None])
  350. def test_WLSubtree(ds_name, parallel):
  351. """Test Weisfeiler-Lehman subtree kernel.
  352. """
  353. from gklearn.kernels import WLSubtree
  354. dataset = chooseDataset(ds_name)
  355. try:
  356. graph_kernel = WLSubtree(node_labels=dataset.node_labels,
  357. edge_labels=dataset.edge_labels,
  358. ds_infos=dataset.get_dataset_infos(keys=['directed']),
  359. height=2)
  360. gram_matrix, run_time = graph_kernel.compute(dataset.graphs,
  361. parallel=parallel, n_jobs=multiprocessing.cpu_count(), verbose=True)
  362. kernel_list, run_time = graph_kernel.compute(dataset.graphs[0], dataset.graphs[1:],
  363. parallel=parallel, n_jobs=multiprocessing.cpu_count(), verbose=True)
  364. kernel, run_time = graph_kernel.compute(dataset.graphs[0], dataset.graphs[1],
  365. parallel=parallel, n_jobs=multiprocessing.cpu_count(), verbose=True)
  366. except Exception as exception:
  367. assert False, exception
  368. if __name__ == "__main__":
  369. # test_spkernel('Alkane', 'imap_unordered')
  370. # test_StructuralSP('Fingerprint_edge', 'imap_unordered')
  371. test_WLSubtree('Acyclic', 'imap_unordered')
  372. # test_RandomWalk('Acyclic', 'sylvester', None, 'imap_unordered')
  373. # test_RandomWalk('Acyclic', 'conjugate', None, 'imap_unordered')
  374. # test_RandomWalk('Acyclic', 'fp', None, None)
  375. # test_RandomWalk('Acyclic', 'spectral', 'exp', 'imap_unordered')

A Python package for graph kernels, graph edit distances and graph pre-image problem.