Browse Source

test all graph kernels.

v0.1
jajupmochi 5 years ago
parent
commit
d9e9fbbce4
4 changed files with 234 additions and 15 deletions
  1. +1
    -0
      .gitignore
  2. +3
    -3
      gklearn/kernels/untilHPathKernel.py
  3. +1
    -0
      gklearn/tests/requirements.txt
  4. +229
    -12
      gklearn/tests/test_graphkernels.py

+ 1
- 0
.gitignore View File

@@ -14,6 +14,7 @@ datasets/*
!datasets/NCI109/
!datasets/AIDS/
!datasets/monoterpenoides/
!datasets/Fingerprint/*.txt
notebooks/results/*
notebooks/check_gm/*
notebooks/test_parallel/*


+ 3
- 3
gklearn/kernels/untilHPathKernel.py View File

@@ -31,7 +31,7 @@ def untilhpathkernel(*args,
depth=10,
k_func='MinMax',
compute_method='trie',
parallel=True,
parallel='imap_unordered',
n_jobs=None,
verbose=True):
"""Calculate path graph kernels up to depth/hight h between graphs.
@@ -177,7 +177,7 @@ def untilhpathkernel(*args,
glbv=(all_paths,), n_jobs=n_jobs, verbose=verbose)
elif parallel == None:
from pympler import asizeof
# from pympler import asizeof
# ---- direct running, normally use single CPU core. ----
# print(asizeof.asized(all_paths, detail=1).format())
@@ -231,7 +231,7 @@ def untilhpathkernel(*args,
% (depth, len(Gn), run_time))

# print(Kmatrix[0][0:10])
return Kmatrix, run_time, sizeof_allpaths
return Kmatrix, run_time


def _untilhpathkernel_do_trie(trie1, trie2, k_func):


+ 1
- 0
gklearn/tests/requirements.txt View File

@@ -5,3 +5,4 @@ networkx
scikit-learn
tabulate
tqdm
pytest

+ 229
- 12
gklearn/tests/test_graphkernels.py View File

@@ -1,32 +1,249 @@
"""Tests of graph kernels.
"""

#import pytest
from gklearn.utils.graphfiles import loadDataset
import pytest
import multiprocessing


def test_spkernel():
def chooseDataset(ds_name):
"""Choose dataset according to name.
"""
from gklearn.utils.graphfiles import loadDataset

# no node labels (and no edge labels).
if ds_name == 'Alkane':
ds_file = 'datasets/Alkane/dataset.ds'
ds_y = 'datasets/Alkane/dataset_boiling_point_names.txt'
Gn, y = loadDataset(ds_file, filename_y=ds_y)
# node symbolic labels.
elif ds_name == 'Acyclic':
ds_file = 'datasets/acyclic/dataset_bps.ds'
Gn, y = loadDataset(ds_file)
# node non-symbolic labels.
elif ds_name == 'Letter-med':
ds_file = 'datasets/Letter-med/Letter-med_A.txt'
Gn, y = loadDataset(ds_file)
# node symbolic and non-symbolic labels (and edge symbolic labels).
elif ds_name == 'AIDS':
ds_file = 'datasets/AIDS/AIDS_A.txt'
Gn, y = loadDataset(ds_file)
# edge non-symbolic labels (and node non-symbolic labels).
elif ds_name == 'Fingerprint':
ds_file = 'datasets/Fingerprint/Fingerprint_A.txt'
Gn, y = loadDataset(ds_file)
Gn = Gn[0:10]
y = y[0:10]
return Gn, y


@pytest.mark.parametrize('ds_name', ['Alkane', 'AIDS'])
@pytest.mark.parametrize('weight,compute_method', [(0.01, 'geo'), (1, 'exp')])
#@pytest.mark.parametrize('parallel', ['imap_unordered', None])
def test_commonwalkkernel(ds_name, weight, compute_method):
"""Test common walk kernel.
"""
from gklearn.kernels.commonWalkKernel import commonwalkkernel
Gn, y = chooseDataset(ds_name)

try:
Kmatrix, run_time, idx = commonwalkkernel(Gn,
node_label='atom',
edge_label='bond_type',
weight=weight,
compute_method=compute_method,
# parallel=parallel,
n_jobs=multiprocessing.cpu_count(),
verbose=True)
except Exception as exception:
assert False, exception
@pytest.mark.parametrize('ds_name', ['Alkane', 'AIDS'])
@pytest.mark.parametrize('remove_totters', [True, False])
#@pytest.mark.parametrize('parallel', ['imap_unordered', None])
def test_marginalizedkernel(ds_name, remove_totters):
"""Test marginalized kernel.
"""
from gklearn.kernels.marginalizedKernel import marginalizedkernel
Gn, y = chooseDataset(ds_name)

try:
Kmatrix, run_time = marginalizedkernel(Gn,
node_label='atom',
edge_label='bond_type',
p_quit=0.5,
n_iteration=2,
remove_totters=remove_totters,
# parallel=parallel,
n_jobs=multiprocessing.cpu_count(),
verbose=True)
except Exception as exception:
assert False, exception
@pytest.mark.parametrize(
'compute_method,ds_name,sub_kernel',
[
# ('sylvester', 'Alkane', None),
# ('conjugate', 'Alkane', None),
# ('conjugate', 'AIDS', None),
# ('fp', 'Alkane', None),
# ('fp', 'AIDS', None),
('spectral', 'Alkane', 'exp'),
('spectral', 'Alkane', 'geo'),
]
)
#@pytest.mark.parametrize('parallel', ['imap_unordered', None])
def test_randomwalkkernel(ds_name, compute_method, sub_kernel):
"""Test random walk kernel kernel.
"""
from gklearn.kernels.randomWalkKernel import randomwalkkernel
from gklearn.utils.kernels import deltakernel, gaussiankernel, kernelproduct
import functools
Gn, y = chooseDataset(ds_name)

mixkernel = functools.partial(kernelproduct, deltakernel, gaussiankernel)
sub_kernels = [{'symb': deltakernel, 'nsymb': gaussiankernel, 'mix': mixkernel}]
try:
Kmatrix, run_time, idx = randomwalkkernel(Gn,
compute_method=compute_method,
weight=1e-3,
p=None,
q=None,
edge_weight=None,
node_kernels=sub_kernels,
edge_kernels=sub_kernels,
node_label='atom',
edge_label='bond_type',
sub_kernel=sub_kernel,
# parallel=parallel,
n_jobs=multiprocessing.cpu_count(),
verbose=True)
except Exception as exception:
assert False, exception

@pytest.mark.parametrize('ds_name', ['Alkane', 'Acyclic', 'Letter-med', 'AIDS', 'Fingerprint'])
#@pytest.mark.parametrize('parallel', ['imap_unordered', None])
@pytest.mark.parametrize('parallel', ['imap_unordered'])
def test_spkernel(ds_name, parallel):
"""Test shortest path kernel.
"""
from gklearn.kernels.spKernel import spkernel
from gklearn.utils.kernels import deltakernel, gaussiankernel, kernelproduct
import functools
import multiprocessing
Gn, y = chooseDataset(ds_name)
mixkernel = functools.partial(kernelproduct, deltakernel, gaussiankernel)
sub_kernels = {'symb': deltakernel, 'nsymb': gaussiankernel, 'mix': mixkernel}
try:
Kmatrix, run_time, idx = spkernel(Gn, node_label='atom',
node_kernels=sub_kernels,
parallel=parallel, n_jobs=multiprocessing.cpu_count(), verbose=True)
except Exception as exception:
assert False, exception

ds_file = 'datasets/Alkane/dataset.ds'
ds_y = 'datasets/Alkane/dataset_boiling_point_names.txt'
Gn, y = loadDataset(ds_file, filename_y=ds_y)
Gn = Gn[0:10]
y = y[0:10]

#@pytest.mark.parametrize('ds_name', ['Alkane', 'Acyclic', 'Letter-med', 'AIDS', 'Fingerprint'])
@pytest.mark.parametrize('ds_name', ['Alkane', 'Acyclic', 'Letter-med', 'AIDS'])
@pytest.mark.parametrize('parallel', ['imap_unordered', None])
def test_structuralspkernel(ds_name, parallel):
"""Test structural shortest path kernel.
"""
from gklearn.kernels.structuralspKernel import structuralspkernel
from gklearn.utils.kernels import deltakernel, gaussiankernel, kernelproduct
import functools
Gn, y = chooseDataset(ds_name)
mixkernel = functools.partial(kernelproduct, deltakernel, gaussiankernel)
sub_kernels = {'symb': deltakernel, 'nsymb': gaussiankernel, 'mix': mixkernel}
try:
Kmatrix, run_time = structuralspkernel(Gn, node_label='atom',
edge_label='bond_type', node_kernels=sub_kernels,
edge_kernels=sub_kernels,
parallel=parallel, n_jobs=multiprocessing.cpu_count(),
verbose=True)
except Exception as exception:
assert False, exception


@pytest.mark.parametrize('ds_name', ['Alkane', 'AIDS'])
@pytest.mark.parametrize('parallel', ['imap_unordered', None])
#@pytest.mark.parametrize('k_func', ['MinMax', 'tanimoto', None])
@pytest.mark.parametrize('k_func', ['MinMax', 'tanimoto'])
@pytest.mark.parametrize('compute_method', ['trie', 'naive'])
def test_untilhpathkernel(ds_name, parallel, k_func, compute_method):
"""Test path kernel up to length $h$.
"""
from gklearn.kernels.untilHPathKernel import untilhpathkernel
Gn, y = chooseDataset(ds_name)
try:
Kmatrix, run_time = untilhpathkernel(Gn, node_label='atom',
edge_label='bond_type',
depth=2, k_func=k_func, compute_method=compute_method,
parallel=parallel,
n_jobs=multiprocessing.cpu_count(), verbose=True)
except Exception as exception:
assert False, exception
@pytest.mark.parametrize('ds_name', ['Alkane', 'AIDS'])
@pytest.mark.parametrize('parallel', ['imap_unordered', None])
def test_treeletkernel(ds_name, parallel):
"""Test treelet kernel.
"""
from gklearn.kernels.treeletKernel import treeletkernel
from gklearn.utils.kernels import polynomialkernel
import functools
Gn, y = chooseDataset(ds_name)

pkernel = functools.partial(polynomialkernel, d=2, c=1e5)
try:
Kmatrix, run_time, idx = spkernel(Gn, node_label=None, node_kernels=
{'symb': deltakernel, 'nsymb': gaussiankernel, 'mix': mixkernel},
n_jobs=multiprocessing.cpu_count(), verbose=True)
Kmatrix, run_time = treeletkernel(Gn,
sub_kernel=pkernel,
node_label='atom',
edge_label='bond_type',
parallel=parallel,
n_jobs=multiprocessing.cpu_count(),
verbose=True)
except Exception as exception:
assert False, exception
@pytest.mark.parametrize('ds_name', ['Acyclic'])
#@pytest.mark.parametrize('base_kernel', ['subtree', 'sp', 'edge'])
@pytest.mark.parametrize('base_kernel', ['subtree'])
@pytest.mark.parametrize('parallel', ['imap_unordered', None])
def test_weisfeilerlehmankernel(ds_name, parallel, base_kernel):
"""Test Weisfeiler-Lehman kernel.
"""
from gklearn.kernels.weisfeilerLehmanKernel import weisfeilerlehmankernel
Gn, y = chooseDataset(ds_name)

try:
Kmatrix, run_time = weisfeilerlehmankernel(Gn,
node_label='atom',
edge_label='bond_type',
height=2,
base_kernel=base_kernel,
parallel=parallel,
n_jobs=multiprocessing.cpu_count(),
verbose=True)
except Exception as exception:
assert False, exception

if __name__ == "__main__":
test_spkernel()

Loading…
Cancel
Save