From d9e9fbbce44cc10268b4b19e9f7a6d61d6fa940b Mon Sep 17 00:00:00 2001 From: jajupmochi Date: Mon, 2 Mar 2020 19:58:29 +0100 Subject: [PATCH] test all graph kernels. --- .gitignore | 1 + gklearn/kernels/untilHPathKernel.py | 6 +- gklearn/tests/requirements.txt | 1 + gklearn/tests/test_graphkernels.py | 241 ++++++++++++++++++++++++++++++++++-- 4 files changed, 234 insertions(+), 15 deletions(-) diff --git a/.gitignore b/.gitignore index 852f2a5..ede9eb2 100644 --- a/.gitignore +++ b/.gitignore @@ -14,6 +14,7 @@ datasets/* !datasets/NCI109/ !datasets/AIDS/ !datasets/monoterpenoides/ +!datasets/Fingerprint/*.txt notebooks/results/* notebooks/check_gm/* notebooks/test_parallel/* diff --git a/gklearn/kernels/untilHPathKernel.py b/gklearn/kernels/untilHPathKernel.py index f8030f9..68ea7dc 100644 --- a/gklearn/kernels/untilHPathKernel.py +++ b/gklearn/kernels/untilHPathKernel.py @@ -31,7 +31,7 @@ def untilhpathkernel(*args, depth=10, k_func='MinMax', compute_method='trie', - parallel=True, + parallel='imap_unordered', n_jobs=None, verbose=True): """Calculate path graph kernels up to depth/hight h between graphs. @@ -177,7 +177,7 @@ def untilhpathkernel(*args, glbv=(all_paths,), n_jobs=n_jobs, verbose=verbose) elif parallel == None: - from pympler import asizeof +# from pympler import asizeof # ---- direct running, normally use single CPU core. ---- # print(asizeof.asized(all_paths, detail=1).format()) @@ -231,7 +231,7 @@ def untilhpathkernel(*args, % (depth, len(Gn), run_time)) # print(Kmatrix[0][0:10]) - return Kmatrix, run_time, sizeof_allpaths + return Kmatrix, run_time def _untilhpathkernel_do_trie(trie1, trie2, k_func): diff --git a/gklearn/tests/requirements.txt b/gklearn/tests/requirements.txt index d0aa596..5a423e0 100644 --- a/gklearn/tests/requirements.txt +++ b/gklearn/tests/requirements.txt @@ -5,3 +5,4 @@ networkx scikit-learn tabulate tqdm +pytest diff --git a/gklearn/tests/test_graphkernels.py b/gklearn/tests/test_graphkernels.py index 17cd857..feaf7b2 100644 --- a/gklearn/tests/test_graphkernels.py +++ b/gklearn/tests/test_graphkernels.py @@ -1,32 +1,249 @@ """Tests of graph kernels. """ -#import pytest -from gklearn.utils.graphfiles import loadDataset +import pytest +import multiprocessing -def test_spkernel(): +def chooseDataset(ds_name): + """Choose dataset according to name. + """ + from gklearn.utils.graphfiles import loadDataset + + # no node labels (and no edge labels). + if ds_name == 'Alkane': + ds_file = 'datasets/Alkane/dataset.ds' + ds_y = 'datasets/Alkane/dataset_boiling_point_names.txt' + Gn, y = loadDataset(ds_file, filename_y=ds_y) + # node symbolic labels. + elif ds_name == 'Acyclic': + ds_file = 'datasets/acyclic/dataset_bps.ds' + Gn, y = loadDataset(ds_file) + # node non-symbolic labels. + elif ds_name == 'Letter-med': + ds_file = 'datasets/Letter-med/Letter-med_A.txt' + Gn, y = loadDataset(ds_file) + # node symbolic and non-symbolic labels (and edge symbolic labels). + elif ds_name == 'AIDS': + ds_file = 'datasets/AIDS/AIDS_A.txt' + Gn, y = loadDataset(ds_file) + + # edge non-symbolic labels (and node non-symbolic labels). + elif ds_name == 'Fingerprint': + ds_file = 'datasets/Fingerprint/Fingerprint_A.txt' + Gn, y = loadDataset(ds_file) + + Gn = Gn[0:10] + y = y[0:10] + + return Gn, y + + +@pytest.mark.parametrize('ds_name', ['Alkane', 'AIDS']) +@pytest.mark.parametrize('weight,compute_method', [(0.01, 'geo'), (1, 'exp')]) +#@pytest.mark.parametrize('parallel', ['imap_unordered', None]) +def test_commonwalkkernel(ds_name, weight, compute_method): + """Test common walk kernel. + """ + from gklearn.kernels.commonWalkKernel import commonwalkkernel + + Gn, y = chooseDataset(ds_name) + + try: + Kmatrix, run_time, idx = commonwalkkernel(Gn, + node_label='atom', + edge_label='bond_type', + weight=weight, + compute_method=compute_method, +# parallel=parallel, + n_jobs=multiprocessing.cpu_count(), + verbose=True) + except Exception as exception: + assert False, exception + + +@pytest.mark.parametrize('ds_name', ['Alkane', 'AIDS']) +@pytest.mark.parametrize('remove_totters', [True, False]) +#@pytest.mark.parametrize('parallel', ['imap_unordered', None]) +def test_marginalizedkernel(ds_name, remove_totters): + """Test marginalized kernel. + """ + from gklearn.kernels.marginalizedKernel import marginalizedkernel + + Gn, y = chooseDataset(ds_name) + + try: + Kmatrix, run_time = marginalizedkernel(Gn, + node_label='atom', + edge_label='bond_type', + p_quit=0.5, + n_iteration=2, + remove_totters=remove_totters, +# parallel=parallel, + n_jobs=multiprocessing.cpu_count(), + verbose=True) + except Exception as exception: + assert False, exception + + +@pytest.mark.parametrize( + 'compute_method,ds_name,sub_kernel', + [ +# ('sylvester', 'Alkane', None), +# ('conjugate', 'Alkane', None), +# ('conjugate', 'AIDS', None), +# ('fp', 'Alkane', None), +# ('fp', 'AIDS', None), + ('spectral', 'Alkane', 'exp'), + ('spectral', 'Alkane', 'geo'), + ] +) +#@pytest.mark.parametrize('parallel', ['imap_unordered', None]) +def test_randomwalkkernel(ds_name, compute_method, sub_kernel): + """Test random walk kernel kernel. + """ + from gklearn.kernels.randomWalkKernel import randomwalkkernel + from gklearn.utils.kernels import deltakernel, gaussiankernel, kernelproduct + import functools + + Gn, y = chooseDataset(ds_name) + + mixkernel = functools.partial(kernelproduct, deltakernel, gaussiankernel) + sub_kernels = [{'symb': deltakernel, 'nsymb': gaussiankernel, 'mix': mixkernel}] + try: + Kmatrix, run_time, idx = randomwalkkernel(Gn, + compute_method=compute_method, + weight=1e-3, + p=None, + q=None, + edge_weight=None, + node_kernels=sub_kernels, + edge_kernels=sub_kernels, + node_label='atom', + edge_label='bond_type', + sub_kernel=sub_kernel, +# parallel=parallel, + n_jobs=multiprocessing.cpu_count(), + verbose=True) + except Exception as exception: + assert False, exception + + +@pytest.mark.parametrize('ds_name', ['Alkane', 'Acyclic', 'Letter-med', 'AIDS', 'Fingerprint']) +#@pytest.mark.parametrize('parallel', ['imap_unordered', None]) +@pytest.mark.parametrize('parallel', ['imap_unordered']) +def test_spkernel(ds_name, parallel): """Test shortest path kernel. """ from gklearn.kernels.spKernel import spkernel from gklearn.utils.kernels import deltakernel, gaussiankernel, kernelproduct import functools - import multiprocessing + + Gn, y = chooseDataset(ds_name) + + mixkernel = functools.partial(kernelproduct, deltakernel, gaussiankernel) + sub_kernels = {'symb': deltakernel, 'nsymb': gaussiankernel, 'mix': mixkernel} + try: + Kmatrix, run_time, idx = spkernel(Gn, node_label='atom', + node_kernels=sub_kernels, + parallel=parallel, n_jobs=multiprocessing.cpu_count(), verbose=True) + except Exception as exception: + assert False, exception - ds_file = 'datasets/Alkane/dataset.ds' - ds_y = 'datasets/Alkane/dataset_boiling_point_names.txt' - Gn, y = loadDataset(ds_file, filename_y=ds_y) - Gn = Gn[0:10] - y = y[0:10] + +#@pytest.mark.parametrize('ds_name', ['Alkane', 'Acyclic', 'Letter-med', 'AIDS', 'Fingerprint']) +@pytest.mark.parametrize('ds_name', ['Alkane', 'Acyclic', 'Letter-med', 'AIDS']) +@pytest.mark.parametrize('parallel', ['imap_unordered', None]) +def test_structuralspkernel(ds_name, parallel): + """Test structural shortest path kernel. + """ + from gklearn.kernels.structuralspKernel import structuralspkernel + from gklearn.utils.kernels import deltakernel, gaussiankernel, kernelproduct + import functools + + Gn, y = chooseDataset(ds_name) mixkernel = functools.partial(kernelproduct, deltakernel, gaussiankernel) + sub_kernels = {'symb': deltakernel, 'nsymb': gaussiankernel, 'mix': mixkernel} + try: + Kmatrix, run_time = structuralspkernel(Gn, node_label='atom', + edge_label='bond_type', node_kernels=sub_kernels, + edge_kernels=sub_kernels, + parallel=parallel, n_jobs=multiprocessing.cpu_count(), + verbose=True) + except Exception as exception: + assert False, exception + + +@pytest.mark.parametrize('ds_name', ['Alkane', 'AIDS']) +@pytest.mark.parametrize('parallel', ['imap_unordered', None]) +#@pytest.mark.parametrize('k_func', ['MinMax', 'tanimoto', None]) +@pytest.mark.parametrize('k_func', ['MinMax', 'tanimoto']) +@pytest.mark.parametrize('compute_method', ['trie', 'naive']) +def test_untilhpathkernel(ds_name, parallel, k_func, compute_method): + """Test path kernel up to length $h$. + """ + from gklearn.kernels.untilHPathKernel import untilhpathkernel + + Gn, y = chooseDataset(ds_name) + + try: + Kmatrix, run_time = untilhpathkernel(Gn, node_label='atom', + edge_label='bond_type', + depth=2, k_func=k_func, compute_method=compute_method, + parallel=parallel, + n_jobs=multiprocessing.cpu_count(), verbose=True) + except Exception as exception: + assert False, exception + + +@pytest.mark.parametrize('ds_name', ['Alkane', 'AIDS']) +@pytest.mark.parametrize('parallel', ['imap_unordered', None]) +def test_treeletkernel(ds_name, parallel): + """Test treelet kernel. + """ + from gklearn.kernels.treeletKernel import treeletkernel + from gklearn.utils.kernels import polynomialkernel + import functools + + Gn, y = chooseDataset(ds_name) + + pkernel = functools.partial(polynomialkernel, d=2, c=1e5) try: - Kmatrix, run_time, idx = spkernel(Gn, node_label=None, node_kernels= - {'symb': deltakernel, 'nsymb': gaussiankernel, 'mix': mixkernel}, - n_jobs=multiprocessing.cpu_count(), verbose=True) + Kmatrix, run_time = treeletkernel(Gn, + sub_kernel=pkernel, + node_label='atom', + edge_label='bond_type', + parallel=parallel, + n_jobs=multiprocessing.cpu_count(), + verbose=True) except Exception as exception: assert False, exception + + +@pytest.mark.parametrize('ds_name', ['Acyclic']) +#@pytest.mark.parametrize('base_kernel', ['subtree', 'sp', 'edge']) +@pytest.mark.parametrize('base_kernel', ['subtree']) +@pytest.mark.parametrize('parallel', ['imap_unordered', None]) +def test_weisfeilerlehmankernel(ds_name, parallel, base_kernel): + """Test Weisfeiler-Lehman kernel. + """ + from gklearn.kernels.weisfeilerLehmanKernel import weisfeilerlehmankernel + Gn, y = chooseDataset(ds_name) + + try: + Kmatrix, run_time = weisfeilerlehmankernel(Gn, + node_label='atom', + edge_label='bond_type', + height=2, + base_kernel=base_kernel, + parallel=parallel, + n_jobs=multiprocessing.cpu_count(), + verbose=True) + except Exception as exception: + assert False, exception + if __name__ == "__main__": test_spkernel() \ No newline at end of file