diff --git a/gklearn/kernels/__init__.py b/gklearn/kernels/__init__.py index e642043..9cba06d 100644 --- a/gklearn/kernels/__init__.py +++ b/gklearn/kernels/__init__.py @@ -8,6 +8,7 @@ __author__ = "Linlin Jia" __date__ = "November 2018" from gklearn.kernels.graph_kernel import GraphKernel +from gklearn.kernels.common_walk import CommonWalk from gklearn.kernels.marginalized import Marginalized from gklearn.kernels.shortest_path import ShortestPath from gklearn.kernels.structural_sp import StructuralSP diff --git a/gklearn/kernels/common_walk.py b/gklearn/kernels/common_walk.py new file mode 100644 index 0000000..b892b5c --- /dev/null +++ b/gklearn/kernels/common_walk.py @@ -0,0 +1,282 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +""" +Created on Tue Aug 18 11:21:31 2020 + +@author: ljia + +@references: + + [1] Thomas Gärtner, Peter Flach, and Stefan Wrobel. On graph kernels: + Hardness results and efficient alternatives. Learning Theory and Kernel + Machines, pages 129–143, 2003. +""" + +import sys +from tqdm import tqdm +import numpy as np +import networkx as nx +from gklearn.utils import SpecialLabel +from gklearn.utils.parallel import parallel_gm, parallel_me +from gklearn.utils.utils import direct_product_graph +from gklearn.kernels import GraphKernel + + +class CommonWalk(GraphKernel): + + def __init__(self, **kwargs): + GraphKernel.__init__(self) + self.__node_labels = kwargs.get('node_labels', []) + self.__edge_labels = kwargs.get('edge_labels', []) + self.__weight = kwargs.get('weight', 1) + self.__compute_method = kwargs.get('compute_method', None) + self.__ds_infos = kwargs.get('ds_infos', {}) + self.__compute_method = self.__compute_method.lower() + + + def _compute_gm_series(self): + self.__check_graphs(self._graphs) + self.__add_dummy_labels(self._graphs) + if not self.__ds_infos['directed']: # convert + self._graphs = [G.to_directed() for G in self._graphs] + + # compute Gram matrix. + gram_matrix = np.zeros((len(self._graphs), len(self._graphs))) + + from itertools import combinations_with_replacement + itr = combinations_with_replacement(range(0, len(self._graphs)), 2) + if self._verbose >= 2: + iterator = tqdm(itr, desc='calculating kernels', file=sys.stdout) + else: + iterator = itr + + # direct product graph method - exponential + if self.__compute_method == 'exp': + for i, j in iterator: + kernel = self.__kernel_do_exp(self._graphs[i], self._graphs[j], self.__weight) + gram_matrix[i][j] = kernel + gram_matrix[j][i] = kernel + # direct product graph method - geometric + elif self.__compute_method == 'geo': + for i, j in iterator: + kernel = self.__kernel_do_geo(self._graphs[i], self._graphs[j], self.__weight) + gram_matrix[i][j] = kernel + gram_matrix[j][i] = kernel + + return gram_matrix + + + def _compute_gm_imap_unordered(self): + self.__check_graphs(self._graphs) + self.__add_dummy_labels(self._graphs) + if not self.__ds_infos['directed']: # convert + self._graphs = [G.to_directed() for G in self._graphs] + + # compute Gram matrix. + gram_matrix = np.zeros((len(self._graphs), len(self._graphs))) + + def init_worker(gn_toshare): + global G_gn + G_gn = gn_toshare + + # direct product graph method - exponential + if self.__compute_method == 'exp': + do_fun = self._wrapper_kernel_do_exp + # direct product graph method - geometric + elif self.__compute_method == 'geo': + do_fun = self._wrapper_kernel_do_geo + + parallel_gm(do_fun, gram_matrix, self._graphs, init_worker=init_worker, + glbv=(self._graphs,), n_jobs=self._n_jobs, verbose=self._verbose) + + return gram_matrix + + + def _compute_kernel_list_series(self, g1, g_list): + self.__check_graphs(g_list + [g1]) + self.__add_dummy_labels(g_list + [g1]) + if not self.__ds_infos['directed']: # convert + g1 = g1.to_directed() + g_list = [G.to_directed() for G in g_list] + + # compute kernel list. + kernel_list = [None] * len(g_list) + if self._verbose >= 2: + iterator = tqdm(range(len(g_list)), desc='calculating kernels', file=sys.stdout) + else: + iterator = range(len(g_list)) + + # direct product graph method - exponential + if self.__compute_method == 'exp': + for i in iterator: + kernel = self.__kernel_do_exp(g1, g_list[i], self.__weight) + kernel_list[i] = kernel + # direct product graph method - geometric + elif self.__compute_method == 'geo': + for i in iterator: + kernel = self.__kernel_do_geo(g1, g_list[i], self.__weight) + kernel_list[i] = kernel + + return kernel_list + + + def _compute_kernel_list_imap_unordered(self, g1, g_list): + self.__check_graphs(g_list + [g1]) + self.__add_dummy_labels(g_list + [g1]) + if not self.__ds_infos['directed']: # convert + g1 = g1.to_directed() + g_list = [G.to_directed() for G in g_list] + + # compute kernel list. + kernel_list = [None] * len(g_list) + + def init_worker(g1_toshare, g_list_toshare): + global G_g1, G_g_list + G_g1 = g1_toshare + G_g_list = g_list_toshare + + # direct product graph method - exponential + if self.__compute_method == 'exp': + do_fun = self._wrapper_kernel_list_do_exp + # direct product graph method - geometric + elif self.__compute_method == 'geo': + do_fun = self._wrapper_kernel_list_do_geo + + def func_assign(result, var_to_assign): + var_to_assign[result[0]] = result[1] + itr = range(len(g_list)) + len_itr = len(g_list) + parallel_me(do_fun, func_assign, kernel_list, itr, len_itr=len_itr, + init_worker=init_worker, glbv=(g1, g_list), method='imap_unordered', + n_jobs=self._n_jobs, itr_desc='calculating kernels', verbose=self._verbose) + + return kernel_list + + + def _wrapper_kernel_list_do_exp(self, itr): + return itr, self.__kernel_do_exp(G_g1, G_g_list[itr], self.__weight) + + + def _wrapper_kernel_list_do_geo(self, itr): + return itr, self.__kernel_do_geo(G_g1, G_g_list[itr], self.__weight) + + + def _compute_single_kernel_series(self, g1, g2): + self.__check_graphs([g1] + [g2]) + self.__add_dummy_labels([g1] + [g2]) + if not self.__ds_infos['directed']: # convert + g1 = g1.to_directed() + g2 = g2.to_directed() + + # direct product graph method - exponential + if self.__compute_method == 'exp': + kernel = self.__kernel_do_exp(g1, g2, self.__weight) + # direct product graph method - geometric + elif self.__compute_method == 'geo': + kernel = self.__kernel_do_geo(g1, g2, self.__weight) + + return kernel + + + def __kernel_do_exp(self, g1, g2, beta): + """Calculate common walk graph kernel between 2 graphs using exponential + series. + + Parameters + ---------- + g1, g2 : NetworkX graphs + Graphs between which the kernels are calculated. + beta : integer + Weight. + + Return + ------ + kernel : float + The common walk Kernel between 2 graphs. + """ + # get tensor product / direct product + gp = direct_product_graph(g1, g2, self.__node_labels, self.__edge_labels) + # return 0 if the direct product graph have no more than 1 node. + if nx.number_of_nodes(gp) < 2: + return 0 + A = nx.adjacency_matrix(gp).todense() + + ew, ev = np.linalg.eig(A) +# # remove imaginary part if possible. +# # @todo: don't know if it is necessary. +# for i in range(len(ew)): +# if np.abs(ew[i].imag) < 1e-9: +# ew[i] = ew[i].real +# for i in range(ev.shape[0]): +# for j in range(ev.shape[1]): +# if np.abs(ev[i, j].imag) < 1e-9: +# ev[i, j] = ev[i, j].real + + D = np.zeros((len(ew), len(ew)), dtype=complex) # @todo: use complex? + for i in range(len(ew)): + D[i][i] = np.exp(beta * ew[i]) + + exp_D = ev * D * ev.T + kernel = exp_D.sum() + if (kernel.real == 0 and np.abs(kernel.imag) < 1e-9) or np.abs(kernel.imag / kernel.real) < 1e-9: + kernel = kernel.real + + return kernel + + + def _wrapper_kernel_do_exp(self, itr): + i = itr[0] + j = itr[1] + return i, j, self.__kernel_do_exp(G_gn[i], G_gn[j], self.__weight) + + + def __kernel_do_geo(self, g1, g2, gamma): + """Calculate common walk graph kernel between 2 graphs using geometric + series. + + Parameters + ---------- + g1, g2 : NetworkX graphs + Graphs between which the kernels are calculated. + gamma : integer + Weight. + + Return + ------ + kernel : float + The common walk Kernel between 2 graphs. + """ + # get tensor product / direct product + gp = direct_product_graph(g1, g2, self.__node_labels, self.__edge_labels) + # return 0 if the direct product graph have no more than 1 node. + if nx.number_of_nodes(gp) < 2: + return 0 + A = nx.adjacency_matrix(gp).todense() + mat = np.identity(len(A)) - gamma * A + # try: + return mat.I.sum() + # except np.linalg.LinAlgError: + # return np.nan + + + def _wrapper_kernel_do_geo(self, itr): + i = itr[0] + j = itr[1] + return i, j, self.__kernel_do_geo(G_gn[i], G_gn[j], self.__weight) + + + def __check_graphs(self, Gn): + for g in Gn: + if nx.number_of_nodes(g) == 1: + raise Exception('Graphs must contain more than 1 node to construct adjacency matrices.') + + + def __add_dummy_labels(self, Gn): + if len(self.__node_labels) == 0 or (len(self.__node_labels) == 1 and self.__node_labels[0] == SpecialLabel.DUMMY): + for i in range(len(Gn)): + nx.set_node_attributes(Gn[i], '0', SpecialLabel.DUMMY) + self.__node_labels = [SpecialLabel.DUMMY] + if len(self.__edge_labels) == 0 or (len(self.__edge_labels) == 1 and self.__edge_labels[0] == SpecialLabel.DUMMY): + for i in range(len(Gn)): + nx.set_edge_attributes(Gn[i], '0', SpecialLabel.DUMMY) + self.__edge_labels = [SpecialLabel.DUMMY] \ No newline at end of file diff --git a/gklearn/kernels/graph_kernel.py b/gklearn/kernels/graph_kernel.py index db4abf8..7c6afde 100644 --- a/gklearn/kernels/graph_kernel.py +++ b/gklearn/kernels/graph_kernel.py @@ -10,6 +10,7 @@ import networkx as nx import multiprocessing import time + class GraphKernel(object): def __init__(self): diff --git a/gklearn/kernels/marginalized.py b/gklearn/kernels/marginalized.py index 6ddec43..6910468 100644 --- a/gklearn/kernels/marginalized.py +++ b/gklearn/kernels/marginalized.py @@ -51,7 +51,7 @@ class Marginalized(GraphKernel): else: iterator = self._graphs # @todo: this may not work. - self._graphs = [untotterTransformation(G, self.__node_label, self.__edge_label) for G in iterator] + self._graphs = [untotterTransformation(G, self.__node_labels, self.__edge_labels) for G in iterator] # compute Gram matrix. gram_matrix = np.zeros((len(self._graphs), len(self._graphs))) @@ -108,13 +108,13 @@ class Marginalized(GraphKernel): self.__add_dummy_labels(g_list + [g1]) if self.__remove_totters: - g1 = untotterTransformation(g1, self.__node_label, self.__edge_label) # @todo: this may not work. + g1 = untotterTransformation(g1, self.__node_labels, self.__edge_labels) # @todo: this may not work. if self._verbose >= 2: iterator = tqdm(g_list, desc='removing tottering', file=sys.stdout) else: iterator = g_list # @todo: this may not work. - g_list = [untotterTransformation(G, self.__node_label, self.__edge_label) for G in iterator] + g_list = [untotterTransformation(G, self.__node_labels, self.__edge_labels) for G in iterator] # compute kernel list. kernel_list = [None] * len(g_list) @@ -133,7 +133,7 @@ class Marginalized(GraphKernel): self.__add_dummy_labels(g_list + [g1]) if self.__remove_totters: - g1 = untotterTransformation(g1, self.__node_label, self.__edge_label) # @todo: this may not work. + g1 = untotterTransformation(g1, self.__node_labels, self.__edge_labels) # @todo: this may not work. pool = Pool(self._n_jobs) itr = range(0, len(g_list)) if len(g_list) < 100 * self._n_jobs: @@ -177,8 +177,8 @@ class Marginalized(GraphKernel): def _compute_single_kernel_series(self, g1, g2): self.__add_dummy_labels([g1] + [g2]) if self.__remove_totters: - g1 = untotterTransformation(g1, self.__node_label, self.__edge_label) # @todo: this may not work. - g2 = untotterTransformation(g2, self.__node_label, self.__edge_label) + g1 = untotterTransformation(g1, self.__node_labels, self.__edge_labels) # @todo: this may not work. + g2 = untotterTransformation(g2, self.__node_labels, self.__edge_labels) kernel = self.__kernel_do(g1, g2) return kernel @@ -324,7 +324,7 @@ class Marginalized(GraphKernel): def _wrapper_untotter(self, i): - return i, untotterTransformation(self._graphs[i], self.__node_label, self.__edge_label) # @todo: this may not work. + return i, untotterTransformation(self._graphs[i], self.__node_labels, self.__edge_labels) # @todo: this may not work. def __add_dummy_labels(self, Gn): diff --git a/gklearn/tests/test_graph_kernels.py b/gklearn/tests/test_graph_kernels.py index 59efc88..c6fcfbe 100644 --- a/gklearn/tests/test_graph_kernels.py +++ b/gklearn/tests/test_graph_kernels.py @@ -52,94 +52,104 @@ def chooseDataset(ds_name): return dataset -# @pytest.mark.parametrize('ds_name', ['Alkane', 'AIDS']) -# @pytest.mark.parametrize('weight,compute_method', [(0.01, 'geo'), (1, 'exp')]) -# #@pytest.mark.parametrize('parallel', ['imap_unordered', None]) -# def test_commonwalkkernel(ds_name, weight, compute_method): -# """Test common walk kernel. -# """ -# from gklearn.kernels.commonWalkKernel import commonwalkkernel +@pytest.mark.parametrize('ds_name', ['Alkane', 'AIDS']) +@pytest.mark.parametrize('weight,compute_method', [(0.01, 'geo'), (1, 'exp')]) +@pytest.mark.parametrize('parallel', ['imap_unordered', None]) +def test_CommonWalk(ds_name, parallel, weight, compute_method): + """Test common walk kernel. + """ + from gklearn.kernels import CommonWalk + import networkx as nx + + dataset = chooseDataset(ds_name) + dataset.load_graphs([g for g in dataset.graphs if nx.number_of_nodes(g) > 1]) -# Gn, y = chooseDataset(ds_name) + try: + graph_kernel = CommonWalk(node_labels=dataset.node_labels, + edge_labels=dataset.edge_labels, + ds_infos=dataset.get_dataset_infos(keys=['directed']), + weight=weight, + compute_method=compute_method) + gram_matrix, run_time = graph_kernel.compute(dataset.graphs, + parallel=parallel, n_jobs=multiprocessing.cpu_count(), verbose=True) + kernel_list, run_time = graph_kernel.compute(dataset.graphs[0], dataset.graphs[1:], + parallel=parallel, n_jobs=multiprocessing.cpu_count(), verbose=True) + kernel, run_time = graph_kernel.compute(dataset.graphs[0], dataset.graphs[1], + parallel=parallel, n_jobs=multiprocessing.cpu_count(), verbose=True) -# try: -# Kmatrix, run_time, idx = commonwalkkernel(Gn, -# node_label='atom', -# edge_label='bond_type', -# weight=weight, -# compute_method=compute_method, -# # parallel=parallel, -# n_jobs=multiprocessing.cpu_count(), -# verbose=True) -# except Exception as exception: -# assert False, exception + except Exception as exception: + assert False, exception -# @pytest.mark.parametrize('ds_name', ['Alkane', 'AIDS']) -# @pytest.mark.parametrize('remove_totters', [True, False]) -# #@pytest.mark.parametrize('parallel', ['imap_unordered', None]) -# def test_marginalizedkernel(ds_name, remove_totters): -# """Test marginalized kernel. -# """ -# from gklearn.kernels.marginalizedKernel import marginalizedkernel +@pytest.mark.parametrize('ds_name', ['Alkane', 'AIDS']) +@pytest.mark.parametrize('remove_totters', [False]) #[True, False]) +@pytest.mark.parametrize('parallel', ['imap_unordered', None]) +def test_Marginalized(ds_name, parallel, remove_totters): + """Test marginalized kernel. + """ + from gklearn.kernels import Marginalized -# Gn, y = chooseDataset(ds_name) + dataset = chooseDataset(ds_name) + + try: + graph_kernel = Marginalized(node_labels=dataset.node_labels, + edge_labels=dataset.edge_labels, + ds_infos=dataset.get_dataset_infos(keys=['directed']), + p_quit=0.5, + n_iteration=2, + remove_totters=remove_totters) + gram_matrix, run_time = graph_kernel.compute(dataset.graphs, + parallel=parallel, n_jobs=multiprocessing.cpu_count(), verbose=True) + kernel_list, run_time = graph_kernel.compute(dataset.graphs[0], dataset.graphs[1:], + parallel=parallel, n_jobs=multiprocessing.cpu_count(), verbose=True) + kernel, run_time = graph_kernel.compute(dataset.graphs[0], dataset.graphs[1], + parallel=parallel, n_jobs=multiprocessing.cpu_count(), verbose=True) -# try: -# Kmatrix, run_time = marginalizedkernel(Gn, -# node_label='atom', -# edge_label='bond_type', -# p_quit=0.5, -# n_iteration=2, -# remove_totters=remove_totters, -# # parallel=parallel, -# n_jobs=multiprocessing.cpu_count(), -# verbose=True) -# except Exception as exception: -# assert False, exception + except Exception as exception: + assert False, exception # @pytest.mark.parametrize( -# 'compute_method,ds_name,sub_kernel', -# [ +# 'compute_method,ds_name,sub_kernel', +# [ # # ('sylvester', 'Alkane', None), # # ('conjugate', 'Alkane', None), # # ('conjugate', 'AIDS', None), # # ('fp', 'Alkane', None), # # ('fp', 'AIDS', None), -# ('spectral', 'Alkane', 'exp'), -# ('spectral', 'Alkane', 'geo'), -# ] +# ('spectral', 'Alkane', 'exp'), +# ('spectral', 'Alkane', 'geo'), +# ] # ) # #@pytest.mark.parametrize('parallel', ['imap_unordered', None]) # def test_randomwalkkernel(ds_name, compute_method, sub_kernel): -# """Test random walk kernel kernel. -# """ -# from gklearn.kernels.randomWalkKernel import randomwalkkernel -# from gklearn.utils.kernels import deltakernel, gaussiankernel, kernelproduct -# import functools +# """Test random walk kernel kernel. +# """ +# from gklearn.kernels.randomWalkKernel import randomwalkkernel +# from gklearn.utils.kernels import deltakernel, gaussiankernel, kernelproduct +# import functools -# Gn, y = chooseDataset(ds_name) +# Gn, y = chooseDataset(ds_name) -# mixkernel = functools.partial(kernelproduct, deltakernel, gaussiankernel) -# sub_kernels = [{'symb': deltakernel, 'nsymb': gaussiankernel, 'mix': mixkernel}] -# try: -# Kmatrix, run_time, idx = randomwalkkernel(Gn, -# compute_method=compute_method, -# weight=1e-3, -# p=None, -# q=None, -# edge_weight=None, -# node_kernels=sub_kernels, -# edge_kernels=sub_kernels, -# node_label='atom', -# edge_label='bond_type', -# sub_kernel=sub_kernel, -# # parallel=parallel, -# n_jobs=multiprocessing.cpu_count(), -# verbose=True) -# except Exception as exception: -# assert False, exception +# mixkernel = functools.partial(kernelproduct, deltakernel, gaussiankernel) +# sub_kernels = [{'symb': deltakernel, 'nsymb': gaussiankernel, 'mix': mixkernel}] +# try: +# Kmatrix, run_time, idx = randomwalkkernel(Gn, +# compute_method=compute_method, +# weight=1e-3, +# p=None, +# q=None, +# edge_weight=None, +# node_kernels=sub_kernels, +# edge_kernels=sub_kernels, +# node_label='atom', +# edge_label='bond_type', +# sub_kernel=sub_kernel, +# # parallel=parallel, +# n_jobs=multiprocessing.cpu_count(), +# verbose=True) +# except Exception as exception: +# assert False, exception @pytest.mark.parametrize('ds_name', ['Alkane', 'Acyclic', 'Letter-med', 'AIDS', 'Fingerprint']) @@ -157,9 +167,9 @@ def test_ShortestPath(ds_name, parallel): sub_kernels = {'symb': deltakernel, 'nsymb': gaussiankernel, 'mix': mixkernel} try: graph_kernel = ShortestPath(node_labels=dataset.node_labels, - node_attrs=dataset.node_attrs, - ds_infos=dataset.get_dataset_infos(keys=['directed']), - node_kernels=sub_kernels) + node_attrs=dataset.node_attrs, + ds_infos=dataset.get_dataset_infos(keys=['directed']), + node_kernels=sub_kernels) gram_matrix, run_time = graph_kernel.compute(dataset.graphs, parallel=parallel, n_jobs=multiprocessing.cpu_count(), verbose=True) kernel_list, run_time = graph_kernel.compute(dataset.graphs[0], dataset.graphs[1:], @@ -187,12 +197,12 @@ def test_StructuralSP(ds_name, parallel): sub_kernels = {'symb': deltakernel, 'nsymb': gaussiankernel, 'mix': mixkernel} try: graph_kernel = StructuralSP(node_labels=dataset.node_labels, - edge_labels=dataset.edge_labels, - node_attrs=dataset.node_attrs, - edge_attrs=dataset.edge_attrs, - ds_infos=dataset.get_dataset_infos(keys=['directed']), - node_kernels=sub_kernels, - edge_kernels=sub_kernels) + edge_labels=dataset.edge_labels, + node_attrs=dataset.node_attrs, + edge_attrs=dataset.edge_attrs, + ds_infos=dataset.get_dataset_infos(keys=['directed']), + node_kernels=sub_kernels, + edge_kernels=sub_kernels) gram_matrix, run_time = graph_kernel.compute(dataset.graphs, parallel=parallel, n_jobs=multiprocessing.cpu_count(), verbose=True) kernel_list, run_time = graph_kernel.compute(dataset.graphs[0], dataset.graphs[1:], @@ -218,9 +228,9 @@ def test_PathUpToH(ds_name, parallel, k_func, compute_method): try: graph_kernel = PathUpToH(node_labels=dataset.node_labels, - edge_labels=dataset.edge_labels, - ds_infos=dataset.get_dataset_infos(keys=['directed']), - depth=2, k_func=k_func, compute_method=compute_method) + edge_labels=dataset.edge_labels, + ds_infos=dataset.get_dataset_infos(keys=['directed']), + depth=2, k_func=k_func, compute_method=compute_method) gram_matrix, run_time = graph_kernel.compute(dataset.graphs, parallel=parallel, n_jobs=multiprocessing.cpu_count(), verbose=True) kernel_list, run_time = graph_kernel.compute(dataset.graphs[0], dataset.graphs[1:], @@ -245,9 +255,9 @@ def test_Treelet(ds_name, parallel): pkernel = functools.partial(polynomialkernel, d=2, c=1e5) try: graph_kernel = Treelet(node_labels=dataset.node_labels, - edge_labels=dataset.edge_labels, - ds_infos=dataset.get_dataset_infos(keys=['directed']), - sub_kernel=pkernel) + edge_labels=dataset.edge_labels, + ds_infos=dataset.get_dataset_infos(keys=['directed']), + sub_kernel=pkernel) gram_matrix, run_time = graph_kernel.compute(dataset.graphs, parallel=parallel, n_jobs=multiprocessing.cpu_count(), verbose=True) kernel_list, run_time = graph_kernel.compute(dataset.graphs[0], dataset.graphs[1:], @@ -271,9 +281,9 @@ def test_WLSubtree(ds_name, parallel): try: graph_kernel = WLSubtree(node_labels=dataset.node_labels, - edge_labels=dataset.edge_labels, - ds_infos=dataset.get_dataset_infos(keys=['directed']), - height=2) + edge_labels=dataset.edge_labels, + ds_infos=dataset.get_dataset_infos(keys=['directed']), + height=2) gram_matrix, run_time = graph_kernel.compute(dataset.graphs, parallel=parallel, n_jobs=multiprocessing.cpu_count(), verbose=True) kernel_list, run_time = graph_kernel.compute(dataset.graphs[0], dataset.graphs[1:], diff --git a/gklearn/utils/utils.py b/gklearn/utils/utils.py index 19e8db4..c32169d 100644 --- a/gklearn/utils/utils.py +++ b/gklearn/utils/utils.py @@ -222,6 +222,70 @@ def direct_product(G1, G2, node_label, edge_label): return gt +def direct_product_graph(G1, G2, node_labels, edge_labels): + """Return the direct/tensor product of directed graphs G1 and G2. + + Parameters + ---------- + G1, G2 : NetworkX graph + The original graphs. + node_labels : list + A list of node attributes used as labels. + edge_labels : list + A list of edge attributes used as labels. + + Return + ------ + gt : NetworkX graph + The direct product graph of G1 and G2. + + Notes + ----- + This method differs from networkx.tensor_product in that this method only adds nodes and edges in G1 and G2 that have the same labels to the direct product graph. + + References + ---------- + .. [1] Thomas Gärtner, Peter Flach, and Stefan Wrobel. On graph kernels: Hardness results and efficient alternatives. Learning Theory and Kernel Machines, pages 129–143, 2003. + """ + # arrange all graphs in a list + from itertools import product + # G = G.to_directed() + gt = nx.DiGraph() + # add nodes + for u, v in product(G1, G2): + label1 = tuple(G1.nodes[u][nl] for nl in node_labels) + label2 = tuple(G2.nodes[v][nl] for nl in node_labels) + if label1 == label2: + gt.add_node((u, v), node_label=label1) + + # add edges, faster for sparse graphs (no so many edges), which is the most case for now. + for (u1, v1), (u2, v2) in product(G1.edges, G2.edges): + if (u1, u2) in gt and (v1, v2) in gt: + label1 = tuple(G1.edges[u1, v1][el] for el in edge_labels) + label2 = tuple(G2.edges[u2, v2][el] for el in edge_labels) + if label1 == label2: + gt.add_edge((u1, u2), (v1, v2), edge_label=label1) + + + # # add edges, faster for dense graphs (a lot of edges, complete graph would be super). + # for u, v in product(gt, gt): + # if (u[0], v[0]) in G1.edges and ( + # u[1], v[1] + # ) in G2.edges and G1.edges[u[0], + # v[0]][edge_label] == G2.edges[u[1], + # v[1]][edge_label]: + # gt.add_edge((u[0], u[1]), (v[0], v[1])) + # gt.edges[(u[0], u[1]), (v[0], v[1])].update({ + # edge_label: + # G1.edges[u[0], v[0]][edge_label] + # }) + + # relabel nodes using consecutive integers for convenience of kernel calculation. + # gt = nx.convert_node_labels_to_integers( + # gt, first_label=0, label_attribute='label_orignal') + return gt + + def graph_deepcopy(G): """Deep copy a graph, including deep copy of all nodes, edges and attributes of the graph, nodes and edges.