@@ -8,6 +8,7 @@ __author__ = "Linlin Jia" | |||||
__date__ = "November 2018" | __date__ = "November 2018" | ||||
from gklearn.kernels.graph_kernel import GraphKernel | from gklearn.kernels.graph_kernel import GraphKernel | ||||
from gklearn.kernels.common_walk import CommonWalk | |||||
from gklearn.kernels.marginalized import Marginalized | from gklearn.kernels.marginalized import Marginalized | ||||
from gklearn.kernels.shortest_path import ShortestPath | from gklearn.kernels.shortest_path import ShortestPath | ||||
from gklearn.kernels.structural_sp import StructuralSP | from gklearn.kernels.structural_sp import StructuralSP | ||||
@@ -0,0 +1,282 @@ | |||||
#!/usr/bin/env python3 | |||||
# -*- coding: utf-8 -*- | |||||
""" | |||||
Created on Tue Aug 18 11:21:31 2020 | |||||
@author: ljia | |||||
@references: | |||||
[1] Thomas Gärtner, Peter Flach, and Stefan Wrobel. On graph kernels: | |||||
Hardness results and efficient alternatives. Learning Theory and Kernel | |||||
Machines, pages 129–143, 2003. | |||||
""" | |||||
import sys | |||||
from tqdm import tqdm | |||||
import numpy as np | |||||
import networkx as nx | |||||
from gklearn.utils import SpecialLabel | |||||
from gklearn.utils.parallel import parallel_gm, parallel_me | |||||
from gklearn.utils.utils import direct_product_graph | |||||
from gklearn.kernels import GraphKernel | |||||
class CommonWalk(GraphKernel): | |||||
def __init__(self, **kwargs): | |||||
GraphKernel.__init__(self) | |||||
self.__node_labels = kwargs.get('node_labels', []) | |||||
self.__edge_labels = kwargs.get('edge_labels', []) | |||||
self.__weight = kwargs.get('weight', 1) | |||||
self.__compute_method = kwargs.get('compute_method', None) | |||||
self.__ds_infos = kwargs.get('ds_infos', {}) | |||||
self.__compute_method = self.__compute_method.lower() | |||||
def _compute_gm_series(self): | |||||
self.__check_graphs(self._graphs) | |||||
self.__add_dummy_labels(self._graphs) | |||||
if not self.__ds_infos['directed']: # convert | |||||
self._graphs = [G.to_directed() for G in self._graphs] | |||||
# compute Gram matrix. | |||||
gram_matrix = np.zeros((len(self._graphs), len(self._graphs))) | |||||
from itertools import combinations_with_replacement | |||||
itr = combinations_with_replacement(range(0, len(self._graphs)), 2) | |||||
if self._verbose >= 2: | |||||
iterator = tqdm(itr, desc='calculating kernels', file=sys.stdout) | |||||
else: | |||||
iterator = itr | |||||
# direct product graph method - exponential | |||||
if self.__compute_method == 'exp': | |||||
for i, j in iterator: | |||||
kernel = self.__kernel_do_exp(self._graphs[i], self._graphs[j], self.__weight) | |||||
gram_matrix[i][j] = kernel | |||||
gram_matrix[j][i] = kernel | |||||
# direct product graph method - geometric | |||||
elif self.__compute_method == 'geo': | |||||
for i, j in iterator: | |||||
kernel = self.__kernel_do_geo(self._graphs[i], self._graphs[j], self.__weight) | |||||
gram_matrix[i][j] = kernel | |||||
gram_matrix[j][i] = kernel | |||||
return gram_matrix | |||||
def _compute_gm_imap_unordered(self): | |||||
self.__check_graphs(self._graphs) | |||||
self.__add_dummy_labels(self._graphs) | |||||
if not self.__ds_infos['directed']: # convert | |||||
self._graphs = [G.to_directed() for G in self._graphs] | |||||
# compute Gram matrix. | |||||
gram_matrix = np.zeros((len(self._graphs), len(self._graphs))) | |||||
def init_worker(gn_toshare): | |||||
global G_gn | |||||
G_gn = gn_toshare | |||||
# direct product graph method - exponential | |||||
if self.__compute_method == 'exp': | |||||
do_fun = self._wrapper_kernel_do_exp | |||||
# direct product graph method - geometric | |||||
elif self.__compute_method == 'geo': | |||||
do_fun = self._wrapper_kernel_do_geo | |||||
parallel_gm(do_fun, gram_matrix, self._graphs, init_worker=init_worker, | |||||
glbv=(self._graphs,), n_jobs=self._n_jobs, verbose=self._verbose) | |||||
return gram_matrix | |||||
def _compute_kernel_list_series(self, g1, g_list): | |||||
self.__check_graphs(g_list + [g1]) | |||||
self.__add_dummy_labels(g_list + [g1]) | |||||
if not self.__ds_infos['directed']: # convert | |||||
g1 = g1.to_directed() | |||||
g_list = [G.to_directed() for G in g_list] | |||||
# compute kernel list. | |||||
kernel_list = [None] * len(g_list) | |||||
if self._verbose >= 2: | |||||
iterator = tqdm(range(len(g_list)), desc='calculating kernels', file=sys.stdout) | |||||
else: | |||||
iterator = range(len(g_list)) | |||||
# direct product graph method - exponential | |||||
if self.__compute_method == 'exp': | |||||
for i in iterator: | |||||
kernel = self.__kernel_do_exp(g1, g_list[i], self.__weight) | |||||
kernel_list[i] = kernel | |||||
# direct product graph method - geometric | |||||
elif self.__compute_method == 'geo': | |||||
for i in iterator: | |||||
kernel = self.__kernel_do_geo(g1, g_list[i], self.__weight) | |||||
kernel_list[i] = kernel | |||||
return kernel_list | |||||
def _compute_kernel_list_imap_unordered(self, g1, g_list): | |||||
self.__check_graphs(g_list + [g1]) | |||||
self.__add_dummy_labels(g_list + [g1]) | |||||
if not self.__ds_infos['directed']: # convert | |||||
g1 = g1.to_directed() | |||||
g_list = [G.to_directed() for G in g_list] | |||||
# compute kernel list. | |||||
kernel_list = [None] * len(g_list) | |||||
def init_worker(g1_toshare, g_list_toshare): | |||||
global G_g1, G_g_list | |||||
G_g1 = g1_toshare | |||||
G_g_list = g_list_toshare | |||||
# direct product graph method - exponential | |||||
if self.__compute_method == 'exp': | |||||
do_fun = self._wrapper_kernel_list_do_exp | |||||
# direct product graph method - geometric | |||||
elif self.__compute_method == 'geo': | |||||
do_fun = self._wrapper_kernel_list_do_geo | |||||
def func_assign(result, var_to_assign): | |||||
var_to_assign[result[0]] = result[1] | |||||
itr = range(len(g_list)) | |||||
len_itr = len(g_list) | |||||
parallel_me(do_fun, func_assign, kernel_list, itr, len_itr=len_itr, | |||||
init_worker=init_worker, glbv=(g1, g_list), method='imap_unordered', | |||||
n_jobs=self._n_jobs, itr_desc='calculating kernels', verbose=self._verbose) | |||||
return kernel_list | |||||
def _wrapper_kernel_list_do_exp(self, itr): | |||||
return itr, self.__kernel_do_exp(G_g1, G_g_list[itr], self.__weight) | |||||
def _wrapper_kernel_list_do_geo(self, itr): | |||||
return itr, self.__kernel_do_geo(G_g1, G_g_list[itr], self.__weight) | |||||
def _compute_single_kernel_series(self, g1, g2): | |||||
self.__check_graphs([g1] + [g2]) | |||||
self.__add_dummy_labels([g1] + [g2]) | |||||
if not self.__ds_infos['directed']: # convert | |||||
g1 = g1.to_directed() | |||||
g2 = g2.to_directed() | |||||
# direct product graph method - exponential | |||||
if self.__compute_method == 'exp': | |||||
kernel = self.__kernel_do_exp(g1, g2, self.__weight) | |||||
# direct product graph method - geometric | |||||
elif self.__compute_method == 'geo': | |||||
kernel = self.__kernel_do_geo(g1, g2, self.__weight) | |||||
return kernel | |||||
def __kernel_do_exp(self, g1, g2, beta): | |||||
"""Calculate common walk graph kernel between 2 graphs using exponential | |||||
series. | |||||
Parameters | |||||
---------- | |||||
g1, g2 : NetworkX graphs | |||||
Graphs between which the kernels are calculated. | |||||
beta : integer | |||||
Weight. | |||||
Return | |||||
------ | |||||
kernel : float | |||||
The common walk Kernel between 2 graphs. | |||||
""" | |||||
# get tensor product / direct product | |||||
gp = direct_product_graph(g1, g2, self.__node_labels, self.__edge_labels) | |||||
# return 0 if the direct product graph have no more than 1 node. | |||||
if nx.number_of_nodes(gp) < 2: | |||||
return 0 | |||||
A = nx.adjacency_matrix(gp).todense() | |||||
ew, ev = np.linalg.eig(A) | |||||
# # remove imaginary part if possible. | |||||
# # @todo: don't know if it is necessary. | |||||
# for i in range(len(ew)): | |||||
# if np.abs(ew[i].imag) < 1e-9: | |||||
# ew[i] = ew[i].real | |||||
# for i in range(ev.shape[0]): | |||||
# for j in range(ev.shape[1]): | |||||
# if np.abs(ev[i, j].imag) < 1e-9: | |||||
# ev[i, j] = ev[i, j].real | |||||
D = np.zeros((len(ew), len(ew)), dtype=complex) # @todo: use complex? | |||||
for i in range(len(ew)): | |||||
D[i][i] = np.exp(beta * ew[i]) | |||||
exp_D = ev * D * ev.T | |||||
kernel = exp_D.sum() | |||||
if (kernel.real == 0 and np.abs(kernel.imag) < 1e-9) or np.abs(kernel.imag / kernel.real) < 1e-9: | |||||
kernel = kernel.real | |||||
return kernel | |||||
def _wrapper_kernel_do_exp(self, itr): | |||||
i = itr[0] | |||||
j = itr[1] | |||||
return i, j, self.__kernel_do_exp(G_gn[i], G_gn[j], self.__weight) | |||||
def __kernel_do_geo(self, g1, g2, gamma): | |||||
"""Calculate common walk graph kernel between 2 graphs using geometric | |||||
series. | |||||
Parameters | |||||
---------- | |||||
g1, g2 : NetworkX graphs | |||||
Graphs between which the kernels are calculated. | |||||
gamma : integer | |||||
Weight. | |||||
Return | |||||
------ | |||||
kernel : float | |||||
The common walk Kernel between 2 graphs. | |||||
""" | |||||
# get tensor product / direct product | |||||
gp = direct_product_graph(g1, g2, self.__node_labels, self.__edge_labels) | |||||
# return 0 if the direct product graph have no more than 1 node. | |||||
if nx.number_of_nodes(gp) < 2: | |||||
return 0 | |||||
A = nx.adjacency_matrix(gp).todense() | |||||
mat = np.identity(len(A)) - gamma * A | |||||
# try: | |||||
return mat.I.sum() | |||||
# except np.linalg.LinAlgError: | |||||
# return np.nan | |||||
def _wrapper_kernel_do_geo(self, itr): | |||||
i = itr[0] | |||||
j = itr[1] | |||||
return i, j, self.__kernel_do_geo(G_gn[i], G_gn[j], self.__weight) | |||||
def __check_graphs(self, Gn): | |||||
for g in Gn: | |||||
if nx.number_of_nodes(g) == 1: | |||||
raise Exception('Graphs must contain more than 1 node to construct adjacency matrices.') | |||||
def __add_dummy_labels(self, Gn): | |||||
if len(self.__node_labels) == 0 or (len(self.__node_labels) == 1 and self.__node_labels[0] == SpecialLabel.DUMMY): | |||||
for i in range(len(Gn)): | |||||
nx.set_node_attributes(Gn[i], '0', SpecialLabel.DUMMY) | |||||
self.__node_labels = [SpecialLabel.DUMMY] | |||||
if len(self.__edge_labels) == 0 or (len(self.__edge_labels) == 1 and self.__edge_labels[0] == SpecialLabel.DUMMY): | |||||
for i in range(len(Gn)): | |||||
nx.set_edge_attributes(Gn[i], '0', SpecialLabel.DUMMY) | |||||
self.__edge_labels = [SpecialLabel.DUMMY] |
@@ -10,6 +10,7 @@ import networkx as nx | |||||
import multiprocessing | import multiprocessing | ||||
import time | import time | ||||
class GraphKernel(object): | class GraphKernel(object): | ||||
def __init__(self): | def __init__(self): | ||||
@@ -51,7 +51,7 @@ class Marginalized(GraphKernel): | |||||
else: | else: | ||||
iterator = self._graphs | iterator = self._graphs | ||||
# @todo: this may not work. | # @todo: this may not work. | ||||
self._graphs = [untotterTransformation(G, self.__node_label, self.__edge_label) for G in iterator] | |||||
self._graphs = [untotterTransformation(G, self.__node_labels, self.__edge_labels) for G in iterator] | |||||
# compute Gram matrix. | # compute Gram matrix. | ||||
gram_matrix = np.zeros((len(self._graphs), len(self._graphs))) | gram_matrix = np.zeros((len(self._graphs), len(self._graphs))) | ||||
@@ -108,13 +108,13 @@ class Marginalized(GraphKernel): | |||||
self.__add_dummy_labels(g_list + [g1]) | self.__add_dummy_labels(g_list + [g1]) | ||||
if self.__remove_totters: | if self.__remove_totters: | ||||
g1 = untotterTransformation(g1, self.__node_label, self.__edge_label) # @todo: this may not work. | |||||
g1 = untotterTransformation(g1, self.__node_labels, self.__edge_labels) # @todo: this may not work. | |||||
if self._verbose >= 2: | if self._verbose >= 2: | ||||
iterator = tqdm(g_list, desc='removing tottering', file=sys.stdout) | iterator = tqdm(g_list, desc='removing tottering', file=sys.stdout) | ||||
else: | else: | ||||
iterator = g_list | iterator = g_list | ||||
# @todo: this may not work. | # @todo: this may not work. | ||||
g_list = [untotterTransformation(G, self.__node_label, self.__edge_label) for G in iterator] | |||||
g_list = [untotterTransformation(G, self.__node_labels, self.__edge_labels) for G in iterator] | |||||
# compute kernel list. | # compute kernel list. | ||||
kernel_list = [None] * len(g_list) | kernel_list = [None] * len(g_list) | ||||
@@ -133,7 +133,7 @@ class Marginalized(GraphKernel): | |||||
self.__add_dummy_labels(g_list + [g1]) | self.__add_dummy_labels(g_list + [g1]) | ||||
if self.__remove_totters: | if self.__remove_totters: | ||||
g1 = untotterTransformation(g1, self.__node_label, self.__edge_label) # @todo: this may not work. | |||||
g1 = untotterTransformation(g1, self.__node_labels, self.__edge_labels) # @todo: this may not work. | |||||
pool = Pool(self._n_jobs) | pool = Pool(self._n_jobs) | ||||
itr = range(0, len(g_list)) | itr = range(0, len(g_list)) | ||||
if len(g_list) < 100 * self._n_jobs: | if len(g_list) < 100 * self._n_jobs: | ||||
@@ -177,8 +177,8 @@ class Marginalized(GraphKernel): | |||||
def _compute_single_kernel_series(self, g1, g2): | def _compute_single_kernel_series(self, g1, g2): | ||||
self.__add_dummy_labels([g1] + [g2]) | self.__add_dummy_labels([g1] + [g2]) | ||||
if self.__remove_totters: | if self.__remove_totters: | ||||
g1 = untotterTransformation(g1, self.__node_label, self.__edge_label) # @todo: this may not work. | |||||
g2 = untotterTransformation(g2, self.__node_label, self.__edge_label) | |||||
g1 = untotterTransformation(g1, self.__node_labels, self.__edge_labels) # @todo: this may not work. | |||||
g2 = untotterTransformation(g2, self.__node_labels, self.__edge_labels) | |||||
kernel = self.__kernel_do(g1, g2) | kernel = self.__kernel_do(g1, g2) | ||||
return kernel | return kernel | ||||
@@ -324,7 +324,7 @@ class Marginalized(GraphKernel): | |||||
def _wrapper_untotter(self, i): | def _wrapper_untotter(self, i): | ||||
return i, untotterTransformation(self._graphs[i], self.__node_label, self.__edge_label) # @todo: this may not work. | |||||
return i, untotterTransformation(self._graphs[i], self.__node_labels, self.__edge_labels) # @todo: this may not work. | |||||
def __add_dummy_labels(self, Gn): | def __add_dummy_labels(self, Gn): | ||||
@@ -52,94 +52,104 @@ def chooseDataset(ds_name): | |||||
return dataset | return dataset | ||||
# @pytest.mark.parametrize('ds_name', ['Alkane', 'AIDS']) | |||||
# @pytest.mark.parametrize('weight,compute_method', [(0.01, 'geo'), (1, 'exp')]) | |||||
# #@pytest.mark.parametrize('parallel', ['imap_unordered', None]) | |||||
# def test_commonwalkkernel(ds_name, weight, compute_method): | |||||
# """Test common walk kernel. | |||||
# """ | |||||
# from gklearn.kernels.commonWalkKernel import commonwalkkernel | |||||
@pytest.mark.parametrize('ds_name', ['Alkane', 'AIDS']) | |||||
@pytest.mark.parametrize('weight,compute_method', [(0.01, 'geo'), (1, 'exp')]) | |||||
@pytest.mark.parametrize('parallel', ['imap_unordered', None]) | |||||
def test_CommonWalk(ds_name, parallel, weight, compute_method): | |||||
"""Test common walk kernel. | |||||
""" | |||||
from gklearn.kernels import CommonWalk | |||||
import networkx as nx | |||||
dataset = chooseDataset(ds_name) | |||||
dataset.load_graphs([g for g in dataset.graphs if nx.number_of_nodes(g) > 1]) | |||||
# Gn, y = chooseDataset(ds_name) | |||||
try: | |||||
graph_kernel = CommonWalk(node_labels=dataset.node_labels, | |||||
edge_labels=dataset.edge_labels, | |||||
ds_infos=dataset.get_dataset_infos(keys=['directed']), | |||||
weight=weight, | |||||
compute_method=compute_method) | |||||
gram_matrix, run_time = graph_kernel.compute(dataset.graphs, | |||||
parallel=parallel, n_jobs=multiprocessing.cpu_count(), verbose=True) | |||||
kernel_list, run_time = graph_kernel.compute(dataset.graphs[0], dataset.graphs[1:], | |||||
parallel=parallel, n_jobs=multiprocessing.cpu_count(), verbose=True) | |||||
kernel, run_time = graph_kernel.compute(dataset.graphs[0], dataset.graphs[1], | |||||
parallel=parallel, n_jobs=multiprocessing.cpu_count(), verbose=True) | |||||
# try: | |||||
# Kmatrix, run_time, idx = commonwalkkernel(Gn, | |||||
# node_label='atom', | |||||
# edge_label='bond_type', | |||||
# weight=weight, | |||||
# compute_method=compute_method, | |||||
# # parallel=parallel, | |||||
# n_jobs=multiprocessing.cpu_count(), | |||||
# verbose=True) | |||||
# except Exception as exception: | |||||
# assert False, exception | |||||
except Exception as exception: | |||||
assert False, exception | |||||
# @pytest.mark.parametrize('ds_name', ['Alkane', 'AIDS']) | |||||
# @pytest.mark.parametrize('remove_totters', [True, False]) | |||||
# #@pytest.mark.parametrize('parallel', ['imap_unordered', None]) | |||||
# def test_marginalizedkernel(ds_name, remove_totters): | |||||
# """Test marginalized kernel. | |||||
# """ | |||||
# from gklearn.kernels.marginalizedKernel import marginalizedkernel | |||||
@pytest.mark.parametrize('ds_name', ['Alkane', 'AIDS']) | |||||
@pytest.mark.parametrize('remove_totters', [False]) #[True, False]) | |||||
@pytest.mark.parametrize('parallel', ['imap_unordered', None]) | |||||
def test_Marginalized(ds_name, parallel, remove_totters): | |||||
"""Test marginalized kernel. | |||||
""" | |||||
from gklearn.kernels import Marginalized | |||||
# Gn, y = chooseDataset(ds_name) | |||||
dataset = chooseDataset(ds_name) | |||||
try: | |||||
graph_kernel = Marginalized(node_labels=dataset.node_labels, | |||||
edge_labels=dataset.edge_labels, | |||||
ds_infos=dataset.get_dataset_infos(keys=['directed']), | |||||
p_quit=0.5, | |||||
n_iteration=2, | |||||
remove_totters=remove_totters) | |||||
gram_matrix, run_time = graph_kernel.compute(dataset.graphs, | |||||
parallel=parallel, n_jobs=multiprocessing.cpu_count(), verbose=True) | |||||
kernel_list, run_time = graph_kernel.compute(dataset.graphs[0], dataset.graphs[1:], | |||||
parallel=parallel, n_jobs=multiprocessing.cpu_count(), verbose=True) | |||||
kernel, run_time = graph_kernel.compute(dataset.graphs[0], dataset.graphs[1], | |||||
parallel=parallel, n_jobs=multiprocessing.cpu_count(), verbose=True) | |||||
# try: | |||||
# Kmatrix, run_time = marginalizedkernel(Gn, | |||||
# node_label='atom', | |||||
# edge_label='bond_type', | |||||
# p_quit=0.5, | |||||
# n_iteration=2, | |||||
# remove_totters=remove_totters, | |||||
# # parallel=parallel, | |||||
# n_jobs=multiprocessing.cpu_count(), | |||||
# verbose=True) | |||||
# except Exception as exception: | |||||
# assert False, exception | |||||
except Exception as exception: | |||||
assert False, exception | |||||
# @pytest.mark.parametrize( | # @pytest.mark.parametrize( | ||||
# 'compute_method,ds_name,sub_kernel', | |||||
# [ | |||||
# 'compute_method,ds_name,sub_kernel', | |||||
# [ | |||||
# # ('sylvester', 'Alkane', None), | # # ('sylvester', 'Alkane', None), | ||||
# # ('conjugate', 'Alkane', None), | # # ('conjugate', 'Alkane', None), | ||||
# # ('conjugate', 'AIDS', None), | # # ('conjugate', 'AIDS', None), | ||||
# # ('fp', 'Alkane', None), | # # ('fp', 'Alkane', None), | ||||
# # ('fp', 'AIDS', None), | # # ('fp', 'AIDS', None), | ||||
# ('spectral', 'Alkane', 'exp'), | |||||
# ('spectral', 'Alkane', 'geo'), | |||||
# ] | |||||
# ('spectral', 'Alkane', 'exp'), | |||||
# ('spectral', 'Alkane', 'geo'), | |||||
# ] | |||||
# ) | # ) | ||||
# #@pytest.mark.parametrize('parallel', ['imap_unordered', None]) | # #@pytest.mark.parametrize('parallel', ['imap_unordered', None]) | ||||
# def test_randomwalkkernel(ds_name, compute_method, sub_kernel): | # def test_randomwalkkernel(ds_name, compute_method, sub_kernel): | ||||
# """Test random walk kernel kernel. | |||||
# """ | |||||
# from gklearn.kernels.randomWalkKernel import randomwalkkernel | |||||
# from gklearn.utils.kernels import deltakernel, gaussiankernel, kernelproduct | |||||
# import functools | |||||
# """Test random walk kernel kernel. | |||||
# """ | |||||
# from gklearn.kernels.randomWalkKernel import randomwalkkernel | |||||
# from gklearn.utils.kernels import deltakernel, gaussiankernel, kernelproduct | |||||
# import functools | |||||
# Gn, y = chooseDataset(ds_name) | |||||
# Gn, y = chooseDataset(ds_name) | |||||
# mixkernel = functools.partial(kernelproduct, deltakernel, gaussiankernel) | |||||
# sub_kernels = [{'symb': deltakernel, 'nsymb': gaussiankernel, 'mix': mixkernel}] | |||||
# try: | |||||
# Kmatrix, run_time, idx = randomwalkkernel(Gn, | |||||
# compute_method=compute_method, | |||||
# weight=1e-3, | |||||
# p=None, | |||||
# q=None, | |||||
# edge_weight=None, | |||||
# node_kernels=sub_kernels, | |||||
# edge_kernels=sub_kernels, | |||||
# node_label='atom', | |||||
# edge_label='bond_type', | |||||
# sub_kernel=sub_kernel, | |||||
# # parallel=parallel, | |||||
# n_jobs=multiprocessing.cpu_count(), | |||||
# verbose=True) | |||||
# except Exception as exception: | |||||
# assert False, exception | |||||
# mixkernel = functools.partial(kernelproduct, deltakernel, gaussiankernel) | |||||
# sub_kernels = [{'symb': deltakernel, 'nsymb': gaussiankernel, 'mix': mixkernel}] | |||||
# try: | |||||
# Kmatrix, run_time, idx = randomwalkkernel(Gn, | |||||
# compute_method=compute_method, | |||||
# weight=1e-3, | |||||
# p=None, | |||||
# q=None, | |||||
# edge_weight=None, | |||||
# node_kernels=sub_kernels, | |||||
# edge_kernels=sub_kernels, | |||||
# node_label='atom', | |||||
# edge_label='bond_type', | |||||
# sub_kernel=sub_kernel, | |||||
# # parallel=parallel, | |||||
# n_jobs=multiprocessing.cpu_count(), | |||||
# verbose=True) | |||||
# except Exception as exception: | |||||
# assert False, exception | |||||
@pytest.mark.parametrize('ds_name', ['Alkane', 'Acyclic', 'Letter-med', 'AIDS', 'Fingerprint']) | @pytest.mark.parametrize('ds_name', ['Alkane', 'Acyclic', 'Letter-med', 'AIDS', 'Fingerprint']) | ||||
@@ -157,9 +167,9 @@ def test_ShortestPath(ds_name, parallel): | |||||
sub_kernels = {'symb': deltakernel, 'nsymb': gaussiankernel, 'mix': mixkernel} | sub_kernels = {'symb': deltakernel, 'nsymb': gaussiankernel, 'mix': mixkernel} | ||||
try: | try: | ||||
graph_kernel = ShortestPath(node_labels=dataset.node_labels, | graph_kernel = ShortestPath(node_labels=dataset.node_labels, | ||||
node_attrs=dataset.node_attrs, | |||||
ds_infos=dataset.get_dataset_infos(keys=['directed']), | |||||
node_kernels=sub_kernels) | |||||
node_attrs=dataset.node_attrs, | |||||
ds_infos=dataset.get_dataset_infos(keys=['directed']), | |||||
node_kernels=sub_kernels) | |||||
gram_matrix, run_time = graph_kernel.compute(dataset.graphs, | gram_matrix, run_time = graph_kernel.compute(dataset.graphs, | ||||
parallel=parallel, n_jobs=multiprocessing.cpu_count(), verbose=True) | parallel=parallel, n_jobs=multiprocessing.cpu_count(), verbose=True) | ||||
kernel_list, run_time = graph_kernel.compute(dataset.graphs[0], dataset.graphs[1:], | kernel_list, run_time = graph_kernel.compute(dataset.graphs[0], dataset.graphs[1:], | ||||
@@ -187,12 +197,12 @@ def test_StructuralSP(ds_name, parallel): | |||||
sub_kernels = {'symb': deltakernel, 'nsymb': gaussiankernel, 'mix': mixkernel} | sub_kernels = {'symb': deltakernel, 'nsymb': gaussiankernel, 'mix': mixkernel} | ||||
try: | try: | ||||
graph_kernel = StructuralSP(node_labels=dataset.node_labels, | graph_kernel = StructuralSP(node_labels=dataset.node_labels, | ||||
edge_labels=dataset.edge_labels, | |||||
node_attrs=dataset.node_attrs, | |||||
edge_attrs=dataset.edge_attrs, | |||||
ds_infos=dataset.get_dataset_infos(keys=['directed']), | |||||
node_kernels=sub_kernels, | |||||
edge_kernels=sub_kernels) | |||||
edge_labels=dataset.edge_labels, | |||||
node_attrs=dataset.node_attrs, | |||||
edge_attrs=dataset.edge_attrs, | |||||
ds_infos=dataset.get_dataset_infos(keys=['directed']), | |||||
node_kernels=sub_kernels, | |||||
edge_kernels=sub_kernels) | |||||
gram_matrix, run_time = graph_kernel.compute(dataset.graphs, | gram_matrix, run_time = graph_kernel.compute(dataset.graphs, | ||||
parallel=parallel, n_jobs=multiprocessing.cpu_count(), verbose=True) | parallel=parallel, n_jobs=multiprocessing.cpu_count(), verbose=True) | ||||
kernel_list, run_time = graph_kernel.compute(dataset.graphs[0], dataset.graphs[1:], | kernel_list, run_time = graph_kernel.compute(dataset.graphs[0], dataset.graphs[1:], | ||||
@@ -218,9 +228,9 @@ def test_PathUpToH(ds_name, parallel, k_func, compute_method): | |||||
try: | try: | ||||
graph_kernel = PathUpToH(node_labels=dataset.node_labels, | graph_kernel = PathUpToH(node_labels=dataset.node_labels, | ||||
edge_labels=dataset.edge_labels, | |||||
ds_infos=dataset.get_dataset_infos(keys=['directed']), | |||||
depth=2, k_func=k_func, compute_method=compute_method) | |||||
edge_labels=dataset.edge_labels, | |||||
ds_infos=dataset.get_dataset_infos(keys=['directed']), | |||||
depth=2, k_func=k_func, compute_method=compute_method) | |||||
gram_matrix, run_time = graph_kernel.compute(dataset.graphs, | gram_matrix, run_time = graph_kernel.compute(dataset.graphs, | ||||
parallel=parallel, n_jobs=multiprocessing.cpu_count(), verbose=True) | parallel=parallel, n_jobs=multiprocessing.cpu_count(), verbose=True) | ||||
kernel_list, run_time = graph_kernel.compute(dataset.graphs[0], dataset.graphs[1:], | kernel_list, run_time = graph_kernel.compute(dataset.graphs[0], dataset.graphs[1:], | ||||
@@ -245,9 +255,9 @@ def test_Treelet(ds_name, parallel): | |||||
pkernel = functools.partial(polynomialkernel, d=2, c=1e5) | pkernel = functools.partial(polynomialkernel, d=2, c=1e5) | ||||
try: | try: | ||||
graph_kernel = Treelet(node_labels=dataset.node_labels, | graph_kernel = Treelet(node_labels=dataset.node_labels, | ||||
edge_labels=dataset.edge_labels, | |||||
ds_infos=dataset.get_dataset_infos(keys=['directed']), | |||||
sub_kernel=pkernel) | |||||
edge_labels=dataset.edge_labels, | |||||
ds_infos=dataset.get_dataset_infos(keys=['directed']), | |||||
sub_kernel=pkernel) | |||||
gram_matrix, run_time = graph_kernel.compute(dataset.graphs, | gram_matrix, run_time = graph_kernel.compute(dataset.graphs, | ||||
parallel=parallel, n_jobs=multiprocessing.cpu_count(), verbose=True) | parallel=parallel, n_jobs=multiprocessing.cpu_count(), verbose=True) | ||||
kernel_list, run_time = graph_kernel.compute(dataset.graphs[0], dataset.graphs[1:], | kernel_list, run_time = graph_kernel.compute(dataset.graphs[0], dataset.graphs[1:], | ||||
@@ -271,9 +281,9 @@ def test_WLSubtree(ds_name, parallel): | |||||
try: | try: | ||||
graph_kernel = WLSubtree(node_labels=dataset.node_labels, | graph_kernel = WLSubtree(node_labels=dataset.node_labels, | ||||
edge_labels=dataset.edge_labels, | |||||
ds_infos=dataset.get_dataset_infos(keys=['directed']), | |||||
height=2) | |||||
edge_labels=dataset.edge_labels, | |||||
ds_infos=dataset.get_dataset_infos(keys=['directed']), | |||||
height=2) | |||||
gram_matrix, run_time = graph_kernel.compute(dataset.graphs, | gram_matrix, run_time = graph_kernel.compute(dataset.graphs, | ||||
parallel=parallel, n_jobs=multiprocessing.cpu_count(), verbose=True) | parallel=parallel, n_jobs=multiprocessing.cpu_count(), verbose=True) | ||||
kernel_list, run_time = graph_kernel.compute(dataset.graphs[0], dataset.graphs[1:], | kernel_list, run_time = graph_kernel.compute(dataset.graphs[0], dataset.graphs[1:], | ||||
@@ -222,6 +222,70 @@ def direct_product(G1, G2, node_label, edge_label): | |||||
return gt | return gt | ||||
def direct_product_graph(G1, G2, node_labels, edge_labels): | |||||
"""Return the direct/tensor product of directed graphs G1 and G2. | |||||
Parameters | |||||
---------- | |||||
G1, G2 : NetworkX graph | |||||
The original graphs. | |||||
node_labels : list | |||||
A list of node attributes used as labels. | |||||
edge_labels : list | |||||
A list of edge attributes used as labels. | |||||
Return | |||||
------ | |||||
gt : NetworkX graph | |||||
The direct product graph of G1 and G2. | |||||
Notes | |||||
----- | |||||
This method differs from networkx.tensor_product in that this method only adds nodes and edges in G1 and G2 that have the same labels to the direct product graph. | |||||
References | |||||
---------- | |||||
.. [1] Thomas Gärtner, Peter Flach, and Stefan Wrobel. On graph kernels: Hardness results and efficient alternatives. Learning Theory and Kernel Machines, pages 129–143, 2003. | |||||
""" | |||||
# arrange all graphs in a list | |||||
from itertools import product | |||||
# G = G.to_directed() | |||||
gt = nx.DiGraph() | |||||
# add nodes | |||||
for u, v in product(G1, G2): | |||||
label1 = tuple(G1.nodes[u][nl] for nl in node_labels) | |||||
label2 = tuple(G2.nodes[v][nl] for nl in node_labels) | |||||
if label1 == label2: | |||||
gt.add_node((u, v), node_label=label1) | |||||
# add edges, faster for sparse graphs (no so many edges), which is the most case for now. | |||||
for (u1, v1), (u2, v2) in product(G1.edges, G2.edges): | |||||
if (u1, u2) in gt and (v1, v2) in gt: | |||||
label1 = tuple(G1.edges[u1, v1][el] for el in edge_labels) | |||||
label2 = tuple(G2.edges[u2, v2][el] for el in edge_labels) | |||||
if label1 == label2: | |||||
gt.add_edge((u1, u2), (v1, v2), edge_label=label1) | |||||
# # add edges, faster for dense graphs (a lot of edges, complete graph would be super). | |||||
# for u, v in product(gt, gt): | |||||
# if (u[0], v[0]) in G1.edges and ( | |||||
# u[1], v[1] | |||||
# ) in G2.edges and G1.edges[u[0], | |||||
# v[0]][edge_label] == G2.edges[u[1], | |||||
# v[1]][edge_label]: | |||||
# gt.add_edge((u[0], u[1]), (v[0], v[1])) | |||||
# gt.edges[(u[0], u[1]), (v[0], v[1])].update({ | |||||
# edge_label: | |||||
# G1.edges[u[0], v[0]][edge_label] | |||||
# }) | |||||
# relabel nodes using consecutive integers for convenience of kernel calculation. | |||||
# gt = nx.convert_node_labels_to_integers( | |||||
# gt, first_label=0, label_attribute='label_orignal') | |||||
return gt | |||||
def graph_deepcopy(G): | def graph_deepcopy(G): | ||||
"""Deep copy a graph, including deep copy of all nodes, edges and | """Deep copy a graph, including deep copy of all nodes, edges and | ||||
attributes of the graph, nodes and edges. | attributes of the graph, nodes and edges. | ||||