You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

test_graphkernels.py 12 kB


  1. """Tests of graph kernels.
  2. """
  3. import pytest
  4. import multiprocessing
  5. def chooseDataset(ds_name):
  6. """Choose dataset according to name.
  7. """
  8. from gklearn.utils.graphfiles import loadDataset
  9. # no node labels (and no edge labels).
  10. if ds_name == 'Alkane':
  11. ds_file = 'datasets/Alkane/dataset.ds'
  12. ds_y = 'datasets/Alkane/dataset_boiling_point_names.txt'
  13. Gn, y = loadDataset(ds_file, filename_y=ds_y)
  14. for G in Gn:
  15. for node in G.nodes:
  16. del G.nodes[node]['attributes']
  17. # node symbolic labels.
  18. elif ds_name == 'Acyclic':
  19. ds_file = 'datasets/acyclic/dataset_bps.ds'
  20. Gn, y = loadDataset(ds_file)
  21. for G in Gn:
  22. for node in G.nodes:
  23. del G.nodes[node]['attributes']
  24. # node non-symbolic labels.
  25. elif ds_name == 'Letter-med':
  26. ds_file = 'datasets/Letter-med/Letter-med_A.txt'
  27. Gn, y = loadDataset(ds_file)
  28. # node symbolic and non-symbolic labels (and edge symbolic labels).
  29. elif ds_name == 'AIDS':
  30. ds_file = 'datasets/AIDS/AIDS_A.txt'
  31. Gn, y = loadDataset(ds_file)
  32. # edge non-symbolic labels (no node labels).
  33. elif ds_name == 'Fingerprint_edge':
  34. import networkx as nx
  35. ds_file = 'datasets/Fingerprint/Fingerprint_A.txt'
  36. Gn, y = loadDataset(ds_file)
  37. Gn = [(idx, G) for idx, G in enumerate(Gn) if nx.number_of_edges(G) != 0]
  38. idx = [G[0] for G in Gn]
  39. Gn = [G[1] for G in Gn]
  40. y = [y[i] for i in idx]
  41. for G in Gn:
  42. G.graph['node_attrs'] = []
  43. for node in G.nodes:
  44. del G.nodes[node]['attributes']
  45. del G.nodes[node]['x']
  46. del G.nodes[node]['y']
  47. # edge non-symbolic labels (and node non-symbolic labels).
  48. elif ds_name == 'Fingerprint':
  49. import networkx as nx
  50. ds_file = 'datasets/Fingerprint/Fingerprint_A.txt'
  51. Gn, y = loadDataset(ds_file)
  52. Gn = [(idx, G) for idx, G in enumerate(Gn) if nx.number_of_edges(G) != 0]
  53. idx = [G[0] for G in Gn]
  54. Gn = [G[1] for G in Gn]
  55. y = [y[i] for i in idx]
  56. # edge symbolic and non-symbolic labels (and node symbolic and non-symbolic labels).
  57. elif ds_name == 'Cuneiform':
  58. import networkx as nx
  59. ds_file = 'datasets/Cuneiform/Cuneiform_A.txt'
  60. Gn, y = loadDataset(ds_file)
  61. Gn = Gn[0:3]
  62. y = y[0:3]
  63. return Gn, y
  64. @pytest.mark.parametrize('ds_name', ['Alkane', 'AIDS'])
  65. @pytest.mark.parametrize('weight,compute_method', [(0.01, 'geo'), (1, 'exp')])
  66. #@pytest.mark.parametrize('parallel', ['imap_unordered', None])
  67. def test_commonwalkkernel(ds_name, weight, compute_method):
  68. """Test common walk kernel.
  69. """
  70. from gklearn.kernels.commonWalkKernel import commonwalkkernel
  71. Gn, y = chooseDataset(ds_name)
  72. try:
  73. Kmatrix, run_time, idx = commonwalkkernel(Gn,
  74. node_label='atom',
  75. edge_label='bond_type',
  76. weight=weight,
  77. compute_method=compute_method,
  78. # parallel=parallel,
  79. n_jobs=multiprocessing.cpu_count(),
  80. verbose=True)
  81. except Exception as exception:
  82. assert False, exception
  83. @pytest.mark.parametrize('ds_name', ['Alkane', 'AIDS'])
  84. @pytest.mark.parametrize('remove_totters', [True, False])
  85. #@pytest.mark.parametrize('parallel', ['imap_unordered', None])
  86. def test_marginalizedkernel(ds_name, remove_totters):
  87. """Test marginalized kernel.
  88. """
  89. from gklearn.kernels.marginalizedKernel import marginalizedkernel
  90. Gn, y = chooseDataset(ds_name)
  91. try:
  92. Kmatrix, run_time = marginalizedkernel(Gn,
  93. node_label='atom',
  94. edge_label='bond_type',
  95. p_quit=0.5,
  96. n_iteration=2,
  97. remove_totters=remove_totters,
  98. # parallel=parallel,
  99. n_jobs=multiprocessing.cpu_count(),
  100. verbose=True)
  101. except Exception as exception:
  102. assert False, exception
  103. @pytest.mark.parametrize(
  104. 'compute_method,ds_name,sub_kernel',
  105. [
  106. # ('sylvester', 'Alkane', None),
  107. # ('conjugate', 'Alkane', None),
  108. # ('conjugate', 'AIDS', None),
  109. # ('fp', 'Alkane', None),
  110. # ('fp', 'AIDS', None),
  111. ('spectral', 'Alkane', 'exp'),
  112. ('spectral', 'Alkane', 'geo'),
  113. ]
  114. )
  115. #@pytest.mark.parametrize('parallel', ['imap_unordered', None])
  116. def test_randomwalkkernel(ds_name, compute_method, sub_kernel):
  117. """Test random walk kernel kernel.
  118. """
  119. from gklearn.kernels.randomWalkKernel import randomwalkkernel
  120. from gklearn.utils.kernels import deltakernel, gaussiankernel, kernelproduct
  121. import functools
  122. Gn, y = chooseDataset(ds_name)
  123. mixkernel = functools.partial(kernelproduct, deltakernel, gaussiankernel)
  124. sub_kernels = [{'symb': deltakernel, 'nsymb': gaussiankernel, 'mix': mixkernel}]
  125. try:
  126. Kmatrix, run_time, idx = randomwalkkernel(Gn,
  127. compute_method=compute_method,
  128. weight=1e-3,
  129. p=None,
  130. q=None,
  131. edge_weight=None,
  132. node_kernels=sub_kernels,
  133. edge_kernels=sub_kernels,
  134. node_label='atom',
  135. edge_label='bond_type',
  136. sub_kernel=sub_kernel,
  137. # parallel=parallel,
  138. n_jobs=multiprocessing.cpu_count(),
  139. verbose=True)
  140. except Exception as exception:
  141. assert False, exception
  142. @pytest.mark.parametrize('ds_name', ['Alkane', 'Acyclic', 'Letter-med', 'AIDS', 'Fingerprint'])
  143. #@pytest.mark.parametrize('parallel', ['imap_unordered', None])
  144. @pytest.mark.parametrize('parallel', ['imap_unordered'])
  145. def test_spkernel(ds_name, parallel):
  146. """Test shortest path kernel.
  147. """
  148. from gklearn.kernels.spKernel import spkernel
  149. from gklearn.utils.kernels import deltakernel, gaussiankernel, kernelproduct
  150. import functools
  151. Gn, y = chooseDataset(ds_name)
  152. mixkernel = functools.partial(kernelproduct, deltakernel, gaussiankernel)
  153. sub_kernels = {'symb': deltakernel, 'nsymb': gaussiankernel, 'mix': mixkernel}
  154. try:
  155. Kmatrix, run_time, idx = spkernel(Gn, node_label='atom',
  156. node_kernels=sub_kernels,
  157. parallel=parallel, n_jobs=multiprocessing.cpu_count(), verbose=True)
  158. except Exception as exception:
  159. assert False, exception
  160. #@pytest.mark.parametrize('ds_name', ['Alkane', 'Acyclic', 'Letter-med', 'AIDS', 'Fingerprint'])
  161. @pytest.mark.parametrize('ds_name', ['Alkane', 'Acyclic', 'Letter-med', 'AIDS', 'Fingerprint', 'Fingerprint_edge', 'Cuneiform'])
  162. @pytest.mark.parametrize('parallel', ['imap_unordered', None])
  163. def test_structuralspkernel(ds_name, parallel):
  164. """Test structural shortest path kernel.
  165. """
  166. from gklearn.kernels.structuralspKernel import structuralspkernel
  167. from gklearn.utils.kernels import deltakernel, gaussiankernel, kernelproduct
  168. import functools
  169. Gn, y = chooseDataset(ds_name)
  170. mixkernel = functools.partial(kernelproduct, deltakernel, gaussiankernel)
  171. sub_kernels = {'symb': deltakernel, 'nsymb': gaussiankernel, 'mix': mixkernel}
  172. try:
  173. Kmatrix, run_time = structuralspkernel(Gn, node_label='atom',
  174. edge_label='bond_type', node_kernels=sub_kernels,
  175. edge_kernels=sub_kernels,
  176. parallel=parallel, n_jobs=multiprocessing.cpu_count(),
  177. verbose=True)
  178. except Exception as exception:
  179. assert False, exception
  180. @pytest.mark.parametrize('ds_name', ['Alkane', 'AIDS'])
  181. @pytest.mark.parametrize('parallel', ['imap_unordered', None])
  182. #@pytest.mark.parametrize('k_func', ['MinMax', 'tanimoto', None])
  183. @pytest.mark.parametrize('k_func', ['MinMax', 'tanimoto'])
  184. @pytest.mark.parametrize('compute_method', ['trie', 'naive'])
  185. def test_untilhpathkernel(ds_name, parallel, k_func, compute_method):
  186. """Test path kernel up to length $h$.
  187. """
  188. from gklearn.kernels.untilHPathKernel import untilhpathkernel
  189. Gn, y = chooseDataset(ds_name)
  190. try:
  191. Kmatrix, run_time = untilhpathkernel(Gn, node_label='atom',
  192. edge_label='bond_type',
  193. depth=2, k_func=k_func, compute_method=compute_method,
  194. parallel=parallel,
  195. n_jobs=multiprocessing.cpu_count(), verbose=True)
  196. except Exception as exception:
  197. assert False, exception
  198. @pytest.mark.parametrize('ds_name', ['Alkane', 'AIDS'])
  199. @pytest.mark.parametrize('parallel', ['imap_unordered', None])
  200. def test_treeletkernel(ds_name, parallel):
  201. """Test treelet kernel.
  202. """
  203. from gklearn.kernels.treeletKernel import treeletkernel
  204. from gklearn.utils.kernels import polynomialkernel
  205. import functools
  206. Gn, y = chooseDataset(ds_name)
  207. pkernel = functools.partial(polynomialkernel, d=2, c=1e5)
  208. try:
  209. Kmatrix, run_time = treeletkernel(Gn,
  210. sub_kernel=pkernel,
  211. node_label='atom',
  212. edge_label='bond_type',
  213. parallel=parallel,
  214. n_jobs=multiprocessing.cpu_count(),
  215. verbose=True)
  216. except Exception as exception:
  217. assert False, exception
  218. @pytest.mark.parametrize('ds_name', ['Acyclic'])
  219. #@pytest.mark.parametrize('base_kernel', ['subtree', 'sp', 'edge'])
  220. @pytest.mark.parametrize('base_kernel', ['subtree'])
  221. @pytest.mark.parametrize('parallel', ['imap_unordered', None])
  222. def test_weisfeilerlehmankernel(ds_name, parallel, base_kernel):
  223. """Test Weisfeiler-Lehman kernel.
  224. """
  225. from gklearn.kernels.weisfeilerLehmanKernel import weisfeilerlehmankernel
  226. Gn, y = chooseDataset(ds_name)
  227. try:
  228. Kmatrix, run_time = weisfeilerlehmankernel(Gn,
  229. node_label='atom',
  230. edge_label='bond_type',
  231. height=2,
  232. base_kernel=base_kernel,
  233. parallel=parallel,
  234. n_jobs=multiprocessing.cpu_count(),
  235. verbose=True)
  236. except Exception as exception:
  237. assert False, exception
  238. if __name__ == "__main__":
  239. # test_spkernel('Alkane', 'imap_unordered')
  240. test_structuralspkernel('Fingerprint_edge', 'imap_unordered')

A Python package for graph kernels, graph edit distances and graph pre-image problem.