Browse Source

test all graph kernels.

v0.1
jajupmochi 5 years ago
parent
commit
d9e9fbbce4
4 changed files with 234 additions and 15 deletions
  1. +1
    -0
      .gitignore
  2. +3
    -3
      gklearn/kernels/untilHPathKernel.py
  3. +1
    -0
      gklearn/tests/requirements.txt
  4. +229
    -12
      gklearn/tests/test_graphkernels.py

+ 1
- 0
.gitignore View File

@@ -14,6 +14,7 @@ datasets/*
!datasets/NCI109/ !datasets/NCI109/
!datasets/AIDS/ !datasets/AIDS/
!datasets/monoterpenoides/ !datasets/monoterpenoides/
!datasets/Fingerprint/*.txt
notebooks/results/* notebooks/results/*
notebooks/check_gm/* notebooks/check_gm/*
notebooks/test_parallel/* notebooks/test_parallel/*


+ 3
- 3
gklearn/kernels/untilHPathKernel.py View File

@@ -31,7 +31,7 @@ def untilhpathkernel(*args,
depth=10, depth=10,
k_func='MinMax', k_func='MinMax',
compute_method='trie', compute_method='trie',
parallel=True,
parallel='imap_unordered',
n_jobs=None, n_jobs=None,
verbose=True): verbose=True):
"""Calculate path graph kernels up to depth/hight h between graphs. """Calculate path graph kernels up to depth/hight h between graphs.
@@ -177,7 +177,7 @@ def untilhpathkernel(*args,
glbv=(all_paths,), n_jobs=n_jobs, verbose=verbose) glbv=(all_paths,), n_jobs=n_jobs, verbose=verbose)
elif parallel == None: elif parallel == None:
from pympler import asizeof
# from pympler import asizeof
# ---- direct running, normally use single CPU core. ---- # ---- direct running, normally use single CPU core. ----
# print(asizeof.asized(all_paths, detail=1).format()) # print(asizeof.asized(all_paths, detail=1).format())
@@ -231,7 +231,7 @@ def untilhpathkernel(*args,
% (depth, len(Gn), run_time)) % (depth, len(Gn), run_time))


# print(Kmatrix[0][0:10]) # print(Kmatrix[0][0:10])
return Kmatrix, run_time, sizeof_allpaths
return Kmatrix, run_time




def _untilhpathkernel_do_trie(trie1, trie2, k_func): def _untilhpathkernel_do_trie(trie1, trie2, k_func):


+ 1
- 0
gklearn/tests/requirements.txt View File

@@ -5,3 +5,4 @@ networkx
scikit-learn scikit-learn
tabulate tabulate
tqdm tqdm
pytest

+ 229
- 12
gklearn/tests/test_graphkernels.py View File

@@ -1,32 +1,249 @@
"""Tests of graph kernels. """Tests of graph kernels.
""" """


#import pytest
from gklearn.utils.graphfiles import loadDataset
import pytest
import multiprocessing




def test_spkernel():
def chooseDataset(ds_name):
"""Choose dataset according to name.
"""
from gklearn.utils.graphfiles import loadDataset

# no node labels (and no edge labels).
if ds_name == 'Alkane':
ds_file = 'datasets/Alkane/dataset.ds'
ds_y = 'datasets/Alkane/dataset_boiling_point_names.txt'
Gn, y = loadDataset(ds_file, filename_y=ds_y)
# node symbolic labels.
elif ds_name == 'Acyclic':
ds_file = 'datasets/acyclic/dataset_bps.ds'
Gn, y = loadDataset(ds_file)
# node non-symbolic labels.
elif ds_name == 'Letter-med':
ds_file = 'datasets/Letter-med/Letter-med_A.txt'
Gn, y = loadDataset(ds_file)
# node symbolic and non-symbolic labels (and edge symbolic labels).
elif ds_name == 'AIDS':
ds_file = 'datasets/AIDS/AIDS_A.txt'
Gn, y = loadDataset(ds_file)
# edge non-symbolic labels (and node non-symbolic labels).
elif ds_name == 'Fingerprint':
ds_file = 'datasets/Fingerprint/Fingerprint_A.txt'
Gn, y = loadDataset(ds_file)
Gn = Gn[0:10]
y = y[0:10]
return Gn, y


@pytest.mark.parametrize('ds_name', ['Alkane', 'AIDS'])
@pytest.mark.parametrize('weight,compute_method', [(0.01, 'geo'), (1, 'exp')])
#@pytest.mark.parametrize('parallel', ['imap_unordered', None])
def test_commonwalkkernel(ds_name, weight, compute_method):
"""Test common walk kernel.
"""
from gklearn.kernels.commonWalkKernel import commonwalkkernel
Gn, y = chooseDataset(ds_name)

try:
Kmatrix, run_time, idx = commonwalkkernel(Gn,
node_label='atom',
edge_label='bond_type',
weight=weight,
compute_method=compute_method,
# parallel=parallel,
n_jobs=multiprocessing.cpu_count(),
verbose=True)
except Exception as exception:
assert False, exception
@pytest.mark.parametrize('ds_name', ['Alkane', 'AIDS'])
@pytest.mark.parametrize('remove_totters', [True, False])
#@pytest.mark.parametrize('parallel', ['imap_unordered', None])
def test_marginalizedkernel(ds_name, remove_totters):
"""Test marginalized kernel.
"""
from gklearn.kernels.marginalizedKernel import marginalizedkernel
Gn, y = chooseDataset(ds_name)

try:
Kmatrix, run_time = marginalizedkernel(Gn,
node_label='atom',
edge_label='bond_type',
p_quit=0.5,
n_iteration=2,
remove_totters=remove_totters,
# parallel=parallel,
n_jobs=multiprocessing.cpu_count(),
verbose=True)
except Exception as exception:
assert False, exception
@pytest.mark.parametrize(
'compute_method,ds_name,sub_kernel',
[
# ('sylvester', 'Alkane', None),
# ('conjugate', 'Alkane', None),
# ('conjugate', 'AIDS', None),
# ('fp', 'Alkane', None),
# ('fp', 'AIDS', None),
('spectral', 'Alkane', 'exp'),
('spectral', 'Alkane', 'geo'),
]
)
#@pytest.mark.parametrize('parallel', ['imap_unordered', None])
def test_randomwalkkernel(ds_name, compute_method, sub_kernel):
"""Test random walk kernel kernel.
"""
from gklearn.kernels.randomWalkKernel import randomwalkkernel
from gklearn.utils.kernels import deltakernel, gaussiankernel, kernelproduct
import functools
Gn, y = chooseDataset(ds_name)

mixkernel = functools.partial(kernelproduct, deltakernel, gaussiankernel)
sub_kernels = [{'symb': deltakernel, 'nsymb': gaussiankernel, 'mix': mixkernel}]
try:
Kmatrix, run_time, idx = randomwalkkernel(Gn,
compute_method=compute_method,
weight=1e-3,
p=None,
q=None,
edge_weight=None,
node_kernels=sub_kernels,
edge_kernels=sub_kernels,
node_label='atom',
edge_label='bond_type',
sub_kernel=sub_kernel,
# parallel=parallel,
n_jobs=multiprocessing.cpu_count(),
verbose=True)
except Exception as exception:
assert False, exception

@pytest.mark.parametrize('ds_name', ['Alkane', 'Acyclic', 'Letter-med', 'AIDS', 'Fingerprint'])
#@pytest.mark.parametrize('parallel', ['imap_unordered', None])
@pytest.mark.parametrize('parallel', ['imap_unordered'])
def test_spkernel(ds_name, parallel):
"""Test shortest path kernel. """Test shortest path kernel.
""" """
from gklearn.kernels.spKernel import spkernel from gklearn.kernels.spKernel import spkernel
from gklearn.utils.kernels import deltakernel, gaussiankernel, kernelproduct from gklearn.utils.kernels import deltakernel, gaussiankernel, kernelproduct
import functools import functools
import multiprocessing
Gn, y = chooseDataset(ds_name)
mixkernel = functools.partial(kernelproduct, deltakernel, gaussiankernel)
sub_kernels = {'symb': deltakernel, 'nsymb': gaussiankernel, 'mix': mixkernel}
try:
Kmatrix, run_time, idx = spkernel(Gn, node_label='atom',
node_kernels=sub_kernels,
parallel=parallel, n_jobs=multiprocessing.cpu_count(), verbose=True)
except Exception as exception:
assert False, exception


ds_file = 'datasets/Alkane/dataset.ds'
ds_y = 'datasets/Alkane/dataset_boiling_point_names.txt'
Gn, y = loadDataset(ds_file, filename_y=ds_y)
Gn = Gn[0:10]
y = y[0:10]

#@pytest.mark.parametrize('ds_name', ['Alkane', 'Acyclic', 'Letter-med', 'AIDS', 'Fingerprint'])
@pytest.mark.parametrize('ds_name', ['Alkane', 'Acyclic', 'Letter-med', 'AIDS'])
@pytest.mark.parametrize('parallel', ['imap_unordered', None])
def test_structuralspkernel(ds_name, parallel):
"""Test structural shortest path kernel.
"""
from gklearn.kernels.structuralspKernel import structuralspkernel
from gklearn.utils.kernels import deltakernel, gaussiankernel, kernelproduct
import functools
Gn, y = chooseDataset(ds_name)
mixkernel = functools.partial(kernelproduct, deltakernel, gaussiankernel) mixkernel = functools.partial(kernelproduct, deltakernel, gaussiankernel)
sub_kernels = {'symb': deltakernel, 'nsymb': gaussiankernel, 'mix': mixkernel}
try:
Kmatrix, run_time = structuralspkernel(Gn, node_label='atom',
edge_label='bond_type', node_kernels=sub_kernels,
edge_kernels=sub_kernels,
parallel=parallel, n_jobs=multiprocessing.cpu_count(),
verbose=True)
except Exception as exception:
assert False, exception


@pytest.mark.parametrize('ds_name', ['Alkane', 'AIDS'])
@pytest.mark.parametrize('parallel', ['imap_unordered', None])
#@pytest.mark.parametrize('k_func', ['MinMax', 'tanimoto', None])
@pytest.mark.parametrize('k_func', ['MinMax', 'tanimoto'])
@pytest.mark.parametrize('compute_method', ['trie', 'naive'])
def test_untilhpathkernel(ds_name, parallel, k_func, compute_method):
"""Test path kernel up to length $h$.
"""
from gklearn.kernels.untilHPathKernel import untilhpathkernel
Gn, y = chooseDataset(ds_name)
try:
Kmatrix, run_time = untilhpathkernel(Gn, node_label='atom',
edge_label='bond_type',
depth=2, k_func=k_func, compute_method=compute_method,
parallel=parallel,
n_jobs=multiprocessing.cpu_count(), verbose=True)
except Exception as exception:
assert False, exception
@pytest.mark.parametrize('ds_name', ['Alkane', 'AIDS'])
@pytest.mark.parametrize('parallel', ['imap_unordered', None])
def test_treeletkernel(ds_name, parallel):
"""Test treelet kernel.
"""
from gklearn.kernels.treeletKernel import treeletkernel
from gklearn.utils.kernels import polynomialkernel
import functools
Gn, y = chooseDataset(ds_name)

pkernel = functools.partial(polynomialkernel, d=2, c=1e5)
try: try:
Kmatrix, run_time, idx = spkernel(Gn, node_label=None, node_kernels=
{'symb': deltakernel, 'nsymb': gaussiankernel, 'mix': mixkernel},
n_jobs=multiprocessing.cpu_count(), verbose=True)
Kmatrix, run_time = treeletkernel(Gn,
sub_kernel=pkernel,
node_label='atom',
edge_label='bond_type',
parallel=parallel,
n_jobs=multiprocessing.cpu_count(),
verbose=True)
except Exception as exception: except Exception as exception:
assert False, exception assert False, exception
@pytest.mark.parametrize('ds_name', ['Acyclic'])
#@pytest.mark.parametrize('base_kernel', ['subtree', 'sp', 'edge'])
@pytest.mark.parametrize('base_kernel', ['subtree'])
@pytest.mark.parametrize('parallel', ['imap_unordered', None])
def test_weisfeilerlehmankernel(ds_name, parallel, base_kernel):
"""Test Weisfeiler-Lehman kernel.
"""
from gklearn.kernels.weisfeilerLehmanKernel import weisfeilerlehmankernel
Gn, y = chooseDataset(ds_name)

try:
Kmatrix, run_time = weisfeilerlehmankernel(Gn,
node_label='atom',
edge_label='bond_type',
height=2,
base_kernel=base_kernel,
parallel=parallel,
n_jobs=multiprocessing.cpu_count(),
verbose=True)
except Exception as exception:
assert False, exception


if __name__ == "__main__": if __name__ == "__main__":
test_spkernel() test_spkernel()

Loading…
Cancel
Save