You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

test_graph_kernels.py 18 kB

5 years ago
5 years ago
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451
  1. """Tests of graph kernels.
  2. """
  3. import pytest
  4. import multiprocessing
  5. def chooseDataset(ds_name):
  6. """Choose dataset according to name.
  7. """
  8. from gklearn.utils import Dataset
  9. dataset = Dataset()
  10. # no node labels (and no edge labels).
  11. if ds_name == 'Alkane':
  12. dataset.load_predefined_dataset(ds_name)
  13. dataset.trim_dataset(edge_required=False)
  14. irrelevant_labels = {'node_attrs': ['x', 'y', 'z'], 'edge_labels': ['bond_stereo']}
  15. dataset.remove_labels(**irrelevant_labels)
  16. # node symbolic labels.
  17. elif ds_name == 'Acyclic':
  18. dataset.load_predefined_dataset(ds_name)
  19. dataset.trim_dataset(edge_required=False)
  20. irrelevant_labels = {'node_attrs': ['x', 'y', 'z'], 'edge_labels': ['bond_stereo']}
  21. dataset.remove_labels(**irrelevant_labels)
  22. # node non-symbolic labels.
  23. elif ds_name == 'Letter-med':
  24. dataset.load_predefined_dataset(ds_name)
  25. dataset.trim_dataset(edge_required=False)
  26. # node symbolic and non-symbolic labels (and edge symbolic labels).
  27. elif ds_name == 'AIDS':
  28. dataset.load_predefined_dataset(ds_name)
  29. dataset.trim_dataset(edge_required=False)
  30. # edge non-symbolic labels (no node labels).
  31. elif ds_name == 'Fingerprint_edge':
  32. dataset.load_predefined_dataset('Fingerprint')
  33. dataset.trim_dataset(edge_required=True)
  34. irrelevant_labels = {'edge_attrs': ['orient', 'angle']}
  35. dataset.remove_labels(**irrelevant_labels)
  36. # edge non-symbolic labels (and node non-symbolic labels).
  37. elif ds_name == 'Fingerprint':
  38. dataset.load_predefined_dataset(ds_name)
  39. dataset.trim_dataset(edge_required=True)
  40. # edge symbolic and non-symbolic labels (and node symbolic and non-symbolic labels).
  41. elif ds_name == 'Cuneiform':
  42. dataset.load_predefined_dataset(ds_name)
  43. dataset.trim_dataset(edge_required=True)
  44. dataset.cut_graphs(range(0, 3))
  45. return dataset
  46. def test_list_graph_kernels():
  47. """
  48. """
  49. from gklearn.kernels import GRAPH_KERNELS, list_of_graph_kernels
  50. assert list_of_graph_kernels() == [i for i in GRAPH_KERNELS]
  51. @pytest.mark.parametrize('ds_name', ['Alkane', 'AIDS'])
  52. @pytest.mark.parametrize('weight,compute_method', [(0.01, 'geo'), (1, 'exp')])
  53. @pytest.mark.parametrize('parallel', ['imap_unordered', None])
  54. def test_CommonWalk(ds_name, parallel, weight, compute_method):
  55. """Test common walk kernel.
  56. """
  57. from gklearn.kernels import CommonWalk
  58. import networkx as nx
  59. dataset = chooseDataset(ds_name)
  60. dataset.load_graphs([g for g in dataset.graphs if nx.number_of_nodes(g) > 1])
  61. try:
  62. graph_kernel = CommonWalk(node_labels=dataset.node_labels,
  63. edge_labels=dataset.edge_labels,
  64. ds_infos=dataset.get_dataset_infos(keys=['directed']),
  65. weight=weight,
  66. compute_method=compute_method)
  67. gram_matrix, run_time = graph_kernel.compute(dataset.graphs,
  68. parallel=parallel, n_jobs=multiprocessing.cpu_count(), verbose=True)
  69. kernel_list, run_time = graph_kernel.compute(dataset.graphs[0], dataset.graphs[1:],
  70. parallel=parallel, n_jobs=multiprocessing.cpu_count(), verbose=True)
  71. kernel, run_time = graph_kernel.compute(dataset.graphs[0], dataset.graphs[1],
  72. parallel=parallel, n_jobs=multiprocessing.cpu_count(), verbose=True)
  73. except Exception as exception:
  74. assert False, exception
  75. @pytest.mark.parametrize('ds_name', ['Alkane', 'AIDS'])
  76. @pytest.mark.parametrize('remove_totters', [False]) #[True, False])
  77. @pytest.mark.parametrize('parallel', ['imap_unordered', None])
  78. def test_Marginalized(ds_name, parallel, remove_totters):
  79. """Test marginalized kernel.
  80. """
  81. from gklearn.kernels import Marginalized
  82. dataset = chooseDataset(ds_name)
  83. try:
  84. graph_kernel = Marginalized(node_labels=dataset.node_labels,
  85. edge_labels=dataset.edge_labels,
  86. ds_infos=dataset.get_dataset_infos(keys=['directed']),
  87. p_quit=0.5,
  88. n_iteration=2,
  89. remove_totters=remove_totters)
  90. gram_matrix, run_time = graph_kernel.compute(dataset.graphs,
  91. parallel=parallel, n_jobs=multiprocessing.cpu_count(), verbose=True)
  92. kernel_list, run_time = graph_kernel.compute(dataset.graphs[0], dataset.graphs[1:],
  93. parallel=parallel, n_jobs=multiprocessing.cpu_count(), verbose=True)
  94. kernel, run_time = graph_kernel.compute(dataset.graphs[0], dataset.graphs[1],
  95. parallel=parallel, n_jobs=multiprocessing.cpu_count(), verbose=True)
  96. except Exception as exception:
  97. assert False, exception
  98. @pytest.mark.parametrize('ds_name', ['Acyclic'])
  99. @pytest.mark.parametrize('parallel', ['imap_unordered', None])
  100. def test_SylvesterEquation(ds_name, parallel):
  101. """Test sylvester equation kernel.
  102. """
  103. from gklearn.kernels import SylvesterEquation
  104. dataset = chooseDataset(ds_name)
  105. try:
  106. graph_kernel = SylvesterEquation(
  107. ds_infos=dataset.get_dataset_infos(keys=['directed']),
  108. weight=1e-3,
  109. p=None,
  110. q=None,
  111. edge_weight=None)
  112. gram_matrix, run_time = graph_kernel.compute(dataset.graphs,
  113. parallel=parallel, n_jobs=multiprocessing.cpu_count(), verbose=True)
  114. kernel_list, run_time = graph_kernel.compute(dataset.graphs[0], dataset.graphs[1:],
  115. parallel=parallel, n_jobs=multiprocessing.cpu_count(), verbose=True)
  116. kernel, run_time = graph_kernel.compute(dataset.graphs[0], dataset.graphs[1],
  117. parallel=parallel, n_jobs=multiprocessing.cpu_count(), verbose=True)
  118. except Exception as exception:
  119. assert False, exception
  120. @pytest.mark.parametrize('ds_name', ['Acyclic', 'AIDS'])
  121. @pytest.mark.parametrize('parallel', ['imap_unordered', None])
  122. def test_ConjugateGradient(ds_name, parallel):
  123. """Test conjugate gradient kernel.
  124. """
  125. from gklearn.kernels import ConjugateGradient
  126. from gklearn.utils.kernels import deltakernel, gaussiankernel, kernelproduct
  127. import functools
  128. dataset = chooseDataset(ds_name)
  129. mixkernel = functools.partial(kernelproduct, deltakernel, gaussiankernel)
  130. sub_kernels = {'symb': deltakernel, 'nsymb': gaussiankernel, 'mix': mixkernel}
  131. try:
  132. graph_kernel = ConjugateGradient(
  133. node_labels=dataset.node_labels,
  134. node_attrs=dataset.node_attrs,
  135. edge_labels=dataset.edge_labels,
  136. edge_attrs=dataset.edge_attrs,
  137. ds_infos=dataset.get_dataset_infos(keys=['directed']),
  138. weight=1e-3,
  139. p=None,
  140. q=None,
  141. edge_weight=None,
  142. node_kernels=sub_kernels,
  143. edge_kernels=sub_kernels)
  144. gram_matrix, run_time = graph_kernel.compute(dataset.graphs,
  145. parallel=parallel, n_jobs=multiprocessing.cpu_count(), verbose=True)
  146. kernel_list, run_time = graph_kernel.compute(dataset.graphs[0], dataset.graphs[1:],
  147. parallel=parallel, n_jobs=multiprocessing.cpu_count(), verbose=True)
  148. kernel, run_time = graph_kernel.compute(dataset.graphs[0], dataset.graphs[1],
  149. parallel=parallel, n_jobs=multiprocessing.cpu_count(), verbose=True)
  150. except Exception as exception:
  151. assert False, exception
  152. @pytest.mark.parametrize('ds_name', ['Acyclic', 'AIDS'])
  153. @pytest.mark.parametrize('parallel', ['imap_unordered', None])
  154. def test_FixedPoint(ds_name, parallel):
  155. """Test fixed point kernel.
  156. """
  157. from gklearn.kernels import FixedPoint
  158. from gklearn.utils.kernels import deltakernel, gaussiankernel, kernelproduct
  159. import functools
  160. dataset = chooseDataset(ds_name)
  161. mixkernel = functools.partial(kernelproduct, deltakernel, gaussiankernel)
  162. sub_kernels = {'symb': deltakernel, 'nsymb': gaussiankernel, 'mix': mixkernel}
  163. try:
  164. graph_kernel = FixedPoint(
  165. node_labels=dataset.node_labels,
  166. node_attrs=dataset.node_attrs,
  167. edge_labels=dataset.edge_labels,
  168. edge_attrs=dataset.edge_attrs,
  169. ds_infos=dataset.get_dataset_infos(keys=['directed']),
  170. weight=1e-3,
  171. p=None,
  172. q=None,
  173. edge_weight=None,
  174. node_kernels=sub_kernels,
  175. edge_kernels=sub_kernels)
  176. gram_matrix, run_time = graph_kernel.compute(dataset.graphs,
  177. parallel=parallel, n_jobs=multiprocessing.cpu_count(), verbose=True)
  178. kernel_list, run_time = graph_kernel.compute(dataset.graphs[0], dataset.graphs[1:],
  179. parallel=parallel, n_jobs=multiprocessing.cpu_count(), verbose=True)
  180. kernel, run_time = graph_kernel.compute(dataset.graphs[0], dataset.graphs[1],
  181. parallel=parallel, n_jobs=multiprocessing.cpu_count(), verbose=True)
  182. except Exception as exception:
  183. assert False, exception
  184. @pytest.mark.parametrize('ds_name', ['Acyclic'])
  185. @pytest.mark.parametrize('sub_kernel', ['exp', 'geo'])
  186. @pytest.mark.parametrize('parallel', ['imap_unordered', None])
  187. def test_SpectralDecomposition(ds_name, sub_kernel, parallel):
  188. """Test spectral decomposition kernel.
  189. """
  190. from gklearn.kernels import SpectralDecomposition
  191. dataset = chooseDataset(ds_name)
  192. try:
  193. graph_kernel = SpectralDecomposition(
  194. ds_infos=dataset.get_dataset_infos(keys=['directed']),
  195. weight=1e-3,
  196. p=None,
  197. q=None,
  198. edge_weight=None,
  199. sub_kernel=sub_kernel)
  200. gram_matrix, run_time = graph_kernel.compute(dataset.graphs,
  201. parallel=parallel, n_jobs=multiprocessing.cpu_count(), verbose=True)
  202. kernel_list, run_time = graph_kernel.compute(dataset.graphs[0], dataset.graphs[1:],
  203. parallel=parallel, n_jobs=multiprocessing.cpu_count(), verbose=True)
  204. kernel, run_time = graph_kernel.compute(dataset.graphs[0], dataset.graphs[1],
  205. parallel=parallel, n_jobs=multiprocessing.cpu_count(), verbose=True)
  206. except Exception as exception:
  207. assert False, exception
  208. # @pytest.mark.parametrize(
  209. # 'compute_method,ds_name,sub_kernel',
  210. # [
  211. # ('sylvester', 'Alkane', None),
  212. # ('conjugate', 'Alkane', None),
  213. # ('conjugate', 'AIDS', None),
  214. # ('fp', 'Alkane', None),
  215. # ('fp', 'AIDS', None),
  216. # ('spectral', 'Alkane', 'exp'),
  217. # ('spectral', 'Alkane', 'geo'),
  218. # ]
  219. # )
  220. # @pytest.mark.parametrize('parallel', ['imap_unordered', None])
  221. # def test_RandomWalk(ds_name, compute_method, sub_kernel, parallel):
  222. # """Test random walk kernel.
  223. # """
  224. # from gklearn.kernels import RandomWalk
  225. # from gklearn.utils.kernels import deltakernel, gaussiankernel, kernelproduct
  226. # import functools
  227. #
  228. # dataset = chooseDataset(ds_name)
  229. # mixkernel = functools.partial(kernelproduct, deltakernel, gaussiankernel)
  230. # sub_kernels = {'symb': deltakernel, 'nsymb': gaussiankernel, 'mix': mixkernel}
  231. # # try:
  232. # graph_kernel = RandomWalk(node_labels=dataset.node_labels,
  233. # node_attrs=dataset.node_attrs,
  234. # edge_labels=dataset.edge_labels,
  235. # edge_attrs=dataset.edge_attrs,
  236. # ds_infos=dataset.get_dataset_infos(keys=['directed']),
  237. # compute_method=compute_method,
  238. # weight=1e-3,
  239. # p=None,
  240. # q=None,
  241. # edge_weight=None,
  242. # node_kernels=sub_kernels,
  243. # edge_kernels=sub_kernels,
  244. # sub_kernel=sub_kernel)
  245. # gram_matrix, run_time = graph_kernel.compute(dataset.graphs,
  246. # parallel=parallel, n_jobs=multiprocessing.cpu_count(), verbose=True)
  247. # kernel_list, run_time = graph_kernel.compute(dataset.graphs[0], dataset.graphs[1:],
  248. # parallel=parallel, n_jobs=multiprocessing.cpu_count(), verbose=True)
  249. # kernel, run_time = graph_kernel.compute(dataset.graphs[0], dataset.graphs[1],
  250. # parallel=parallel, n_jobs=multiprocessing.cpu_count(), verbose=True)
  251. # except Exception as exception:
  252. # assert False, exception
  253. @pytest.mark.parametrize('ds_name', ['Alkane', 'Acyclic', 'Letter-med', 'AIDS', 'Fingerprint'])
  254. @pytest.mark.parametrize('parallel', ['imap_unordered', None])
  255. def test_ShortestPath(ds_name, parallel):
  256. """Test shortest path kernel.
  257. """
  258. from gklearn.kernels import ShortestPath
  259. from gklearn.utils.kernels import deltakernel, gaussiankernel, kernelproduct
  260. import functools
  261. dataset = chooseDataset(ds_name)
  262. mixkernel = functools.partial(kernelproduct, deltakernel, gaussiankernel)
  263. sub_kernels = {'symb': deltakernel, 'nsymb': gaussiankernel, 'mix': mixkernel}
  264. try:
  265. graph_kernel = ShortestPath(node_labels=dataset.node_labels,
  266. node_attrs=dataset.node_attrs,
  267. ds_infos=dataset.get_dataset_infos(keys=['directed']),
  268. node_kernels=sub_kernels)
  269. gram_matrix, run_time = graph_kernel.compute(dataset.graphs,
  270. parallel=parallel, n_jobs=multiprocessing.cpu_count(), verbose=True)
  271. kernel_list, run_time = graph_kernel.compute(dataset.graphs[0], dataset.graphs[1:],
  272. parallel=parallel, n_jobs=multiprocessing.cpu_count(), verbose=True)
  273. kernel, run_time = graph_kernel.compute(dataset.graphs[0], dataset.graphs[1],
  274. parallel=parallel, n_jobs=multiprocessing.cpu_count(), verbose=True)
  275. except Exception as exception:
  276. assert False, exception
  277. #@pytest.mark.parametrize('ds_name', ['Alkane', 'Acyclic', 'Letter-med', 'AIDS', 'Fingerprint'])
  278. @pytest.mark.parametrize('ds_name', ['Alkane', 'Acyclic', 'Letter-med', 'AIDS', 'Fingerprint', 'Fingerprint_edge', 'Cuneiform'])
  279. @pytest.mark.parametrize('parallel', ['imap_unordered', None])
  280. def test_StructuralSP(ds_name, parallel):
  281. """Test structural shortest path kernel.
  282. """
  283. from gklearn.kernels import StructuralSP
  284. from gklearn.utils.kernels import deltakernel, gaussiankernel, kernelproduct
  285. import functools
  286. dataset = chooseDataset(ds_name)
  287. mixkernel = functools.partial(kernelproduct, deltakernel, gaussiankernel)
  288. sub_kernels = {'symb': deltakernel, 'nsymb': gaussiankernel, 'mix': mixkernel}
  289. try:
  290. graph_kernel = StructuralSP(node_labels=dataset.node_labels,
  291. edge_labels=dataset.edge_labels,
  292. node_attrs=dataset.node_attrs,
  293. edge_attrs=dataset.edge_attrs,
  294. ds_infos=dataset.get_dataset_infos(keys=['directed']),
  295. node_kernels=sub_kernels,
  296. edge_kernels=sub_kernels)
  297. gram_matrix, run_time = graph_kernel.compute(dataset.graphs,
  298. parallel=parallel, n_jobs=multiprocessing.cpu_count(), verbose=True)
  299. kernel_list, run_time = graph_kernel.compute(dataset.graphs[0], dataset.graphs[1:],
  300. parallel=parallel, n_jobs=multiprocessing.cpu_count(), verbose=True)
  301. kernel, run_time = graph_kernel.compute(dataset.graphs[0], dataset.graphs[1],
  302. parallel=parallel, n_jobs=multiprocessing.cpu_count(), verbose=True)
  303. except Exception as exception:
  304. assert False, exception
  305. @pytest.mark.parametrize('ds_name', ['Alkane', 'AIDS'])
  306. @pytest.mark.parametrize('parallel', ['imap_unordered', None])
  307. #@pytest.mark.parametrize('k_func', ['MinMax', 'tanimoto', None])
  308. @pytest.mark.parametrize('k_func', ['MinMax', 'tanimoto'])
  309. @pytest.mark.parametrize('compute_method', ['trie', 'naive'])
  310. def test_PathUpToH(ds_name, parallel, k_func, compute_method):
  311. """Test path kernel up to length $h$.
  312. """
  313. from gklearn.kernels import PathUpToH
  314. dataset = chooseDataset(ds_name)
  315. try:
  316. graph_kernel = PathUpToH(node_labels=dataset.node_labels,
  317. edge_labels=dataset.edge_labels,
  318. ds_infos=dataset.get_dataset_infos(keys=['directed']),
  319. depth=2, k_func=k_func, compute_method=compute_method)
  320. gram_matrix, run_time = graph_kernel.compute(dataset.graphs,
  321. parallel=parallel, n_jobs=multiprocessing.cpu_count(), verbose=True)
  322. kernel_list, run_time = graph_kernel.compute(dataset.graphs[0], dataset.graphs[1:],
  323. parallel=parallel, n_jobs=multiprocessing.cpu_count(), verbose=True)
  324. kernel, run_time = graph_kernel.compute(dataset.graphs[0], dataset.graphs[1],
  325. parallel=parallel, n_jobs=multiprocessing.cpu_count(), verbose=True)
  326. except Exception as exception:
  327. assert False, exception
  328. @pytest.mark.parametrize('ds_name', ['Alkane', 'AIDS'])
  329. @pytest.mark.parametrize('parallel', ['imap_unordered', None])
  330. def test_Treelet(ds_name, parallel):
  331. """Test treelet kernel.
  332. """
  333. from gklearn.kernels import Treelet
  334. from gklearn.utils.kernels import polynomialkernel
  335. import functools
  336. dataset = chooseDataset(ds_name)
  337. pkernel = functools.partial(polynomialkernel, d=2, c=1e5)
  338. try:
  339. graph_kernel = Treelet(node_labels=dataset.node_labels,
  340. edge_labels=dataset.edge_labels,
  341. ds_infos=dataset.get_dataset_infos(keys=['directed']),
  342. sub_kernel=pkernel)
  343. gram_matrix, run_time = graph_kernel.compute(dataset.graphs,
  344. parallel=parallel, n_jobs=multiprocessing.cpu_count(), verbose=True)
  345. kernel_list, run_time = graph_kernel.compute(dataset.graphs[0], dataset.graphs[1:],
  346. parallel=parallel, n_jobs=multiprocessing.cpu_count(), verbose=True)
  347. kernel, run_time = graph_kernel.compute(dataset.graphs[0], dataset.graphs[1],
  348. parallel=parallel, n_jobs=multiprocessing.cpu_count(), verbose=True)
  349. except Exception as exception:
  350. assert False, exception
  351. @pytest.mark.parametrize('ds_name', ['Acyclic'])
  352. #@pytest.mark.parametrize('base_kernel', ['subtree', 'sp', 'edge'])
  353. # @pytest.mark.parametrize('base_kernel', ['subtree'])
  354. @pytest.mark.parametrize('parallel', ['imap_unordered', None])
  355. def test_WLSubtree(ds_name, parallel):
  356. """Test Weisfeiler-Lehman subtree kernel.
  357. """
  358. from gklearn.kernels import WLSubtree
  359. dataset = chooseDataset(ds_name)
  360. try:
  361. graph_kernel = WLSubtree(node_labels=dataset.node_labels,
  362. edge_labels=dataset.edge_labels,
  363. ds_infos=dataset.get_dataset_infos(keys=['directed']),
  364. height=2)
  365. gram_matrix, run_time = graph_kernel.compute(dataset.graphs,
  366. parallel=parallel, n_jobs=multiprocessing.cpu_count(), verbose=True)
  367. kernel_list, run_time = graph_kernel.compute(dataset.graphs[0], dataset.graphs[1:],
  368. parallel=parallel, n_jobs=multiprocessing.cpu_count(), verbose=True)
  369. kernel, run_time = graph_kernel.compute(dataset.graphs[0], dataset.graphs[1],
  370. parallel=parallel, n_jobs=multiprocessing.cpu_count(), verbose=True)
  371. except Exception as exception:
  372. assert False, exception
  373. if __name__ == "__main__":
  374. test_list_graph_kernels()
  375. # test_spkernel('Alkane', 'imap_unordered')
  376. # test_StructuralSP('Fingerprint_edge', 'imap_unordered')
  377. # test_WLSubtree('Acyclic', 'imap_unordered')
  378. # test_RandomWalk('Acyclic', 'sylvester', None, 'imap_unordered')
  379. # test_RandomWalk('Acyclic', 'conjugate', None, 'imap_unordered')
  380. # test_RandomWalk('Acyclic', 'fp', None, None)
  381. # test_RandomWalk('Acyclic', 'spectral', 'exp', 'imap_unordered')

A Python package for graph kernels, graph edit distances and graph pre-image problem.