Browse Source

Add CommonWalk class.

v0.2.x
jajupmochi 4 years ago
parent
commit
6eb6a4a3ad
6 changed files with 452 additions and 94 deletions
  1. +1
    -0
      gklearn/kernels/__init__.py
  2. +282
    -0
      gklearn/kernels/common_walk.py
  3. +1
    -0
      gklearn/kernels/graph_kernel.py
  4. +7
    -7
      gklearn/kernels/marginalized.py
  5. +97
    -87
      gklearn/tests/test_graph_kernels.py
  6. +64
    -0
      gklearn/utils/utils.py

+ 1
- 0
gklearn/kernels/__init__.py View File

@@ -8,6 +8,7 @@ __author__ = "Linlin Jia"
__date__ = "November 2018"

from gklearn.kernels.graph_kernel import GraphKernel
from gklearn.kernels.common_walk import CommonWalk
from gklearn.kernels.marginalized import Marginalized
from gklearn.kernels.shortest_path import ShortestPath
from gklearn.kernels.structural_sp import StructuralSP


+ 282
- 0
gklearn/kernels/common_walk.py View File

@@ -0,0 +1,282 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Tue Aug 18 11:21:31 2020

@author: ljia

@references:

[1] Thomas Gärtner, Peter Flach, and Stefan Wrobel. On graph kernels:
Hardness results and efficient alternatives. Learning Theory and Kernel
Machines, pages 129–143, 2003.
"""

import sys
from tqdm import tqdm
import numpy as np
import networkx as nx
from gklearn.utils import SpecialLabel
from gklearn.utils.parallel import parallel_gm, parallel_me
from gklearn.utils.utils import direct_product_graph
from gklearn.kernels import GraphKernel


class CommonWalk(GraphKernel):
def __init__(self, **kwargs):
GraphKernel.__init__(self)
self.__node_labels = kwargs.get('node_labels', [])
self.__edge_labels = kwargs.get('edge_labels', [])
self.__weight = kwargs.get('weight', 1)
self.__compute_method = kwargs.get('compute_method', None)
self.__ds_infos = kwargs.get('ds_infos', {})
self.__compute_method = self.__compute_method.lower()


def _compute_gm_series(self):
self.__check_graphs(self._graphs)
self.__add_dummy_labels(self._graphs)
if not self.__ds_infos['directed']: # convert
self._graphs = [G.to_directed() for G in self._graphs]
# compute Gram matrix.
gram_matrix = np.zeros((len(self._graphs), len(self._graphs)))
from itertools import combinations_with_replacement
itr = combinations_with_replacement(range(0, len(self._graphs)), 2)
if self._verbose >= 2:
iterator = tqdm(itr, desc='calculating kernels', file=sys.stdout)
else:
iterator = itr
# direct product graph method - exponential
if self.__compute_method == 'exp':
for i, j in iterator:
kernel = self.__kernel_do_exp(self._graphs[i], self._graphs[j], self.__weight)
gram_matrix[i][j] = kernel
gram_matrix[j][i] = kernel
# direct product graph method - geometric
elif self.__compute_method == 'geo':
for i, j in iterator:
kernel = self.__kernel_do_geo(self._graphs[i], self._graphs[j], self.__weight)
gram_matrix[i][j] = kernel
gram_matrix[j][i] = kernel
return gram_matrix
def _compute_gm_imap_unordered(self):
self.__check_graphs(self._graphs)
self.__add_dummy_labels(self._graphs)
if not self.__ds_infos['directed']: # convert
self._graphs = [G.to_directed() for G in self._graphs]
# compute Gram matrix.
gram_matrix = np.zeros((len(self._graphs), len(self._graphs)))
def init_worker(gn_toshare):
global G_gn
G_gn = gn_toshare
# direct product graph method - exponential
if self.__compute_method == 'exp':
do_fun = self._wrapper_kernel_do_exp
# direct product graph method - geometric
elif self.__compute_method == 'geo':
do_fun = self._wrapper_kernel_do_geo
parallel_gm(do_fun, gram_matrix, self._graphs, init_worker=init_worker,
glbv=(self._graphs,), n_jobs=self._n_jobs, verbose=self._verbose)
return gram_matrix
def _compute_kernel_list_series(self, g1, g_list):
self.__check_graphs(g_list + [g1])
self.__add_dummy_labels(g_list + [g1])
if not self.__ds_infos['directed']: # convert
g1 = g1.to_directed()
g_list = [G.to_directed() for G in g_list]
# compute kernel list.
kernel_list = [None] * len(g_list)
if self._verbose >= 2:
iterator = tqdm(range(len(g_list)), desc='calculating kernels', file=sys.stdout)
else:
iterator = range(len(g_list))
# direct product graph method - exponential
if self.__compute_method == 'exp':
for i in iterator:
kernel = self.__kernel_do_exp(g1, g_list[i], self.__weight)
kernel_list[i] = kernel
# direct product graph method - geometric
elif self.__compute_method == 'geo':
for i in iterator:
kernel = self.__kernel_do_geo(g1, g_list[i], self.__weight)
kernel_list[i] = kernel
return kernel_list
def _compute_kernel_list_imap_unordered(self, g1, g_list):
self.__check_graphs(g_list + [g1])
self.__add_dummy_labels(g_list + [g1])
if not self.__ds_infos['directed']: # convert
g1 = g1.to_directed()
g_list = [G.to_directed() for G in g_list]
# compute kernel list.
kernel_list = [None] * len(g_list)

def init_worker(g1_toshare, g_list_toshare):
global G_g1, G_g_list
G_g1 = g1_toshare
G_g_list = g_list_toshare
# direct product graph method - exponential
if self.__compute_method == 'exp':
do_fun = self._wrapper_kernel_list_do_exp
# direct product graph method - geometric
elif self.__compute_method == 'geo':
do_fun = self._wrapper_kernel_list_do_geo
def func_assign(result, var_to_assign):
var_to_assign[result[0]] = result[1]
itr = range(len(g_list))
len_itr = len(g_list)
parallel_me(do_fun, func_assign, kernel_list, itr, len_itr=len_itr,
init_worker=init_worker, glbv=(g1, g_list), method='imap_unordered',
n_jobs=self._n_jobs, itr_desc='calculating kernels', verbose=self._verbose)
return kernel_list
def _wrapper_kernel_list_do_exp(self, itr):
return itr, self.__kernel_do_exp(G_g1, G_g_list[itr], self.__weight)


def _wrapper_kernel_list_do_geo(self, itr):
return itr, self.__kernel_do_geo(G_g1, G_g_list[itr], self.__weight)
def _compute_single_kernel_series(self, g1, g2):
self.__check_graphs([g1] + [g2])
self.__add_dummy_labels([g1] + [g2])
if not self.__ds_infos['directed']: # convert
g1 = g1.to_directed()
g2 = g2.to_directed()
# direct product graph method - exponential
if self.__compute_method == 'exp':
kernel = self.__kernel_do_exp(g1, g2, self.__weight)
# direct product graph method - geometric
elif self.__compute_method == 'geo':
kernel = self.__kernel_do_geo(g1, g2, self.__weight)

return kernel
def __kernel_do_exp(self, g1, g2, beta):
"""Calculate common walk graph kernel between 2 graphs using exponential
series.
Parameters
----------
g1, g2 : NetworkX graphs
Graphs between which the kernels are calculated.
beta : integer
Weight.
Return
------
kernel : float
The common walk Kernel between 2 graphs.
"""
# get tensor product / direct product
gp = direct_product_graph(g1, g2, self.__node_labels, self.__edge_labels)
# return 0 if the direct product graph have no more than 1 node.
if nx.number_of_nodes(gp) < 2:
return 0
A = nx.adjacency_matrix(gp).todense()
ew, ev = np.linalg.eig(A)
# # remove imaginary part if possible.
# # @todo: don't know if it is necessary.
# for i in range(len(ew)):
# if np.abs(ew[i].imag) < 1e-9:
# ew[i] = ew[i].real
# for i in range(ev.shape[0]):
# for j in range(ev.shape[1]):
# if np.abs(ev[i, j].imag) < 1e-9:
# ev[i, j] = ev[i, j].real

D = np.zeros((len(ew), len(ew)), dtype=complex) # @todo: use complex?
for i in range(len(ew)):
D[i][i] = np.exp(beta * ew[i])

exp_D = ev * D * ev.T
kernel = exp_D.sum()
if (kernel.real == 0 and np.abs(kernel.imag) < 1e-9) or np.abs(kernel.imag / kernel.real) < 1e-9:
kernel = kernel.real
return kernel
def _wrapper_kernel_do_exp(self, itr):
i = itr[0]
j = itr[1]
return i, j, self.__kernel_do_exp(G_gn[i], G_gn[j], self.__weight)
def __kernel_do_geo(self, g1, g2, gamma):
"""Calculate common walk graph kernel between 2 graphs using geometric
series.
Parameters
----------
g1, g2 : NetworkX graphs
Graphs between which the kernels are calculated.
gamma : integer
Weight.
Return
------
kernel : float
The common walk Kernel between 2 graphs.
"""
# get tensor product / direct product
gp = direct_product_graph(g1, g2, self.__node_labels, self.__edge_labels)
# return 0 if the direct product graph have no more than 1 node.
if nx.number_of_nodes(gp) < 2:
return 0
A = nx.adjacency_matrix(gp).todense()
mat = np.identity(len(A)) - gamma * A
# try:
return mat.I.sum()
# except np.linalg.LinAlgError:
# return np.nan

def _wrapper_kernel_do_geo(self, itr):
i = itr[0]
j = itr[1]
return i, j, self.__kernel_do_geo(G_gn[i], G_gn[j], self.__weight)
def __check_graphs(self, Gn):
for g in Gn:
if nx.number_of_nodes(g) == 1:
raise Exception('Graphs must contain more than 1 node to construct adjacency matrices.')
def __add_dummy_labels(self, Gn):
if len(self.__node_labels) == 0 or (len(self.__node_labels) == 1 and self.__node_labels[0] == SpecialLabel.DUMMY):
for i in range(len(Gn)):
nx.set_node_attributes(Gn[i], '0', SpecialLabel.DUMMY)
self.__node_labels = [SpecialLabel.DUMMY]
if len(self.__edge_labels) == 0 or (len(self.__edge_labels) == 1 and self.__edge_labels[0] == SpecialLabel.DUMMY):
for i in range(len(Gn)):
nx.set_edge_attributes(Gn[i], '0', SpecialLabel.DUMMY)
self.__edge_labels = [SpecialLabel.DUMMY]

+ 1
- 0
gklearn/kernels/graph_kernel.py View File

@@ -10,6 +10,7 @@ import networkx as nx
import multiprocessing
import time


class GraphKernel(object):
def __init__(self):


+ 7
- 7
gklearn/kernels/marginalized.py View File

@@ -51,7 +51,7 @@ class Marginalized(GraphKernel):
else:
iterator = self._graphs
# @todo: this may not work.
self._graphs = [untotterTransformation(G, self.__node_label, self.__edge_label) for G in iterator]
self._graphs = [untotterTransformation(G, self.__node_labels, self.__edge_labels) for G in iterator]
# compute Gram matrix.
gram_matrix = np.zeros((len(self._graphs), len(self._graphs)))
@@ -108,13 +108,13 @@ class Marginalized(GraphKernel):
self.__add_dummy_labels(g_list + [g1])
if self.__remove_totters:
g1 = untotterTransformation(g1, self.__node_label, self.__edge_label) # @todo: this may not work.
g1 = untotterTransformation(g1, self.__node_labels, self.__edge_labels) # @todo: this may not work.
if self._verbose >= 2:
iterator = tqdm(g_list, desc='removing tottering', file=sys.stdout)
else:
iterator = g_list
# @todo: this may not work.
g_list = [untotterTransformation(G, self.__node_label, self.__edge_label) for G in iterator]
g_list = [untotterTransformation(G, self.__node_labels, self.__edge_labels) for G in iterator]
# compute kernel list.
kernel_list = [None] * len(g_list)
@@ -133,7 +133,7 @@ class Marginalized(GraphKernel):
self.__add_dummy_labels(g_list + [g1])
if self.__remove_totters:
g1 = untotterTransformation(g1, self.__node_label, self.__edge_label) # @todo: this may not work.
g1 = untotterTransformation(g1, self.__node_labels, self.__edge_labels) # @todo: this may not work.
pool = Pool(self._n_jobs)
itr = range(0, len(g_list))
if len(g_list) < 100 * self._n_jobs:
@@ -177,8 +177,8 @@ class Marginalized(GraphKernel):
def _compute_single_kernel_series(self, g1, g2):
self.__add_dummy_labels([g1] + [g2])
if self.__remove_totters:
g1 = untotterTransformation(g1, self.__node_label, self.__edge_label) # @todo: this may not work.
g2 = untotterTransformation(g2, self.__node_label, self.__edge_label)
g1 = untotterTransformation(g1, self.__node_labels, self.__edge_labels) # @todo: this may not work.
g2 = untotterTransformation(g2, self.__node_labels, self.__edge_labels)
kernel = self.__kernel_do(g1, g2)
return kernel
@@ -324,7 +324,7 @@ class Marginalized(GraphKernel):

def _wrapper_untotter(self, i):
return i, untotterTransformation(self._graphs[i], self.__node_label, self.__edge_label) # @todo: this may not work.
return i, untotterTransformation(self._graphs[i], self.__node_labels, self.__edge_labels) # @todo: this may not work.
def __add_dummy_labels(self, Gn):


+ 97
- 87
gklearn/tests/test_graph_kernels.py View File

@@ -52,94 +52,104 @@ def chooseDataset(ds_name):
return dataset


# @pytest.mark.parametrize('ds_name', ['Alkane', 'AIDS'])
# @pytest.mark.parametrize('weight,compute_method', [(0.01, 'geo'), (1, 'exp')])
# #@pytest.mark.parametrize('parallel', ['imap_unordered', None])
# def test_commonwalkkernel(ds_name, weight, compute_method):
# """Test common walk kernel.
# """
# from gklearn.kernels.commonWalkKernel import commonwalkkernel
@pytest.mark.parametrize('ds_name', ['Alkane', 'AIDS'])
@pytest.mark.parametrize('weight,compute_method', [(0.01, 'geo'), (1, 'exp')])
@pytest.mark.parametrize('parallel', ['imap_unordered', None])
def test_CommonWalk(ds_name, parallel, weight, compute_method):
"""Test common walk kernel.
"""
from gklearn.kernels import CommonWalk
import networkx as nx
dataset = chooseDataset(ds_name)
dataset.load_graphs([g for g in dataset.graphs if nx.number_of_nodes(g) > 1])
# Gn, y = chooseDataset(ds_name)
try:
graph_kernel = CommonWalk(node_labels=dataset.node_labels,
edge_labels=dataset.edge_labels,
ds_infos=dataset.get_dataset_infos(keys=['directed']),
weight=weight,
compute_method=compute_method)
gram_matrix, run_time = graph_kernel.compute(dataset.graphs,
parallel=parallel, n_jobs=multiprocessing.cpu_count(), verbose=True)
kernel_list, run_time = graph_kernel.compute(dataset.graphs[0], dataset.graphs[1:],
parallel=parallel, n_jobs=multiprocessing.cpu_count(), verbose=True)
kernel, run_time = graph_kernel.compute(dataset.graphs[0], dataset.graphs[1],
parallel=parallel, n_jobs=multiprocessing.cpu_count(), verbose=True)

# try:
# Kmatrix, run_time, idx = commonwalkkernel(Gn,
# node_label='atom',
# edge_label='bond_type',
# weight=weight,
# compute_method=compute_method,
# # parallel=parallel,
# n_jobs=multiprocessing.cpu_count(),
# verbose=True)
# except Exception as exception:
# assert False, exception
except Exception as exception:
assert False, exception
# @pytest.mark.parametrize('ds_name', ['Alkane', 'AIDS'])
# @pytest.mark.parametrize('remove_totters', [True, False])
# #@pytest.mark.parametrize('parallel', ['imap_unordered', None])
# def test_marginalizedkernel(ds_name, remove_totters):
# """Test marginalized kernel.
# """
# from gklearn.kernels.marginalizedKernel import marginalizedkernel
@pytest.mark.parametrize('ds_name', ['Alkane', 'AIDS'])
@pytest.mark.parametrize('remove_totters', [False]) #[True, False])
@pytest.mark.parametrize('parallel', ['imap_unordered', None])
def test_Marginalized(ds_name, parallel, remove_totters):
"""Test marginalized kernel.
"""
from gklearn.kernels import Marginalized
# Gn, y = chooseDataset(ds_name)
dataset = chooseDataset(ds_name)
try:
graph_kernel = Marginalized(node_labels=dataset.node_labels,
edge_labels=dataset.edge_labels,
ds_infos=dataset.get_dataset_infos(keys=['directed']),
p_quit=0.5,
n_iteration=2,
remove_totters=remove_totters)
gram_matrix, run_time = graph_kernel.compute(dataset.graphs,
parallel=parallel, n_jobs=multiprocessing.cpu_count(), verbose=True)
kernel_list, run_time = graph_kernel.compute(dataset.graphs[0], dataset.graphs[1:],
parallel=parallel, n_jobs=multiprocessing.cpu_count(), verbose=True)
kernel, run_time = graph_kernel.compute(dataset.graphs[0], dataset.graphs[1],
parallel=parallel, n_jobs=multiprocessing.cpu_count(), verbose=True)

# try:
# Kmatrix, run_time = marginalizedkernel(Gn,
# node_label='atom',
# edge_label='bond_type',
# p_quit=0.5,
# n_iteration=2,
# remove_totters=remove_totters,
# # parallel=parallel,
# n_jobs=multiprocessing.cpu_count(),
# verbose=True)
# except Exception as exception:
# assert False, exception
except Exception as exception:
assert False, exception
# @pytest.mark.parametrize(
# 'compute_method,ds_name,sub_kernel',
# [
# 'compute_method,ds_name,sub_kernel',
# [
# # ('sylvester', 'Alkane', None),
# # ('conjugate', 'Alkane', None),
# # ('conjugate', 'AIDS', None),
# # ('fp', 'Alkane', None),
# # ('fp', 'AIDS', None),
# ('spectral', 'Alkane', 'exp'),
# ('spectral', 'Alkane', 'geo'),
# ]
# ('spectral', 'Alkane', 'exp'),
# ('spectral', 'Alkane', 'geo'),
# ]
# )
# #@pytest.mark.parametrize('parallel', ['imap_unordered', None])
# def test_randomwalkkernel(ds_name, compute_method, sub_kernel):
# """Test random walk kernel kernel.
# """
# from gklearn.kernels.randomWalkKernel import randomwalkkernel
# from gklearn.utils.kernels import deltakernel, gaussiankernel, kernelproduct
# import functools
# """Test random walk kernel kernel.
# """
# from gklearn.kernels.randomWalkKernel import randomwalkkernel
# from gklearn.utils.kernels import deltakernel, gaussiankernel, kernelproduct
# import functools
# Gn, y = chooseDataset(ds_name)
# Gn, y = chooseDataset(ds_name)

# mixkernel = functools.partial(kernelproduct, deltakernel, gaussiankernel)
# sub_kernels = [{'symb': deltakernel, 'nsymb': gaussiankernel, 'mix': mixkernel}]
# try:
# Kmatrix, run_time, idx = randomwalkkernel(Gn,
# compute_method=compute_method,
# weight=1e-3,
# p=None,
# q=None,
# edge_weight=None,
# node_kernels=sub_kernels,
# edge_kernels=sub_kernels,
# node_label='atom',
# edge_label='bond_type',
# sub_kernel=sub_kernel,
# # parallel=parallel,
# n_jobs=multiprocessing.cpu_count(),
# verbose=True)
# except Exception as exception:
# assert False, exception
# mixkernel = functools.partial(kernelproduct, deltakernel, gaussiankernel)
# sub_kernels = [{'symb': deltakernel, 'nsymb': gaussiankernel, 'mix': mixkernel}]
# try:
# Kmatrix, run_time, idx = randomwalkkernel(Gn,
# compute_method=compute_method,
# weight=1e-3,
# p=None,
# q=None,
# edge_weight=None,
# node_kernels=sub_kernels,
# edge_kernels=sub_kernels,
# node_label='atom',
# edge_label='bond_type',
# sub_kernel=sub_kernel,
# # parallel=parallel,
# n_jobs=multiprocessing.cpu_count(),
# verbose=True)
# except Exception as exception:
# assert False, exception

@pytest.mark.parametrize('ds_name', ['Alkane', 'Acyclic', 'Letter-med', 'AIDS', 'Fingerprint'])
@@ -157,9 +167,9 @@ def test_ShortestPath(ds_name, parallel):
sub_kernels = {'symb': deltakernel, 'nsymb': gaussiankernel, 'mix': mixkernel}
try:
graph_kernel = ShortestPath(node_labels=dataset.node_labels,
node_attrs=dataset.node_attrs,
ds_infos=dataset.get_dataset_infos(keys=['directed']),
node_kernels=sub_kernels)
node_attrs=dataset.node_attrs,
ds_infos=dataset.get_dataset_infos(keys=['directed']),
node_kernels=sub_kernels)
gram_matrix, run_time = graph_kernel.compute(dataset.graphs,
parallel=parallel, n_jobs=multiprocessing.cpu_count(), verbose=True)
kernel_list, run_time = graph_kernel.compute(dataset.graphs[0], dataset.graphs[1:],
@@ -187,12 +197,12 @@ def test_StructuralSP(ds_name, parallel):
sub_kernels = {'symb': deltakernel, 'nsymb': gaussiankernel, 'mix': mixkernel}
try:
graph_kernel = StructuralSP(node_labels=dataset.node_labels,
edge_labels=dataset.edge_labels,
node_attrs=dataset.node_attrs,
edge_attrs=dataset.edge_attrs,
ds_infos=dataset.get_dataset_infos(keys=['directed']),
node_kernels=sub_kernels,
edge_kernels=sub_kernels)
edge_labels=dataset.edge_labels,
node_attrs=dataset.node_attrs,
edge_attrs=dataset.edge_attrs,
ds_infos=dataset.get_dataset_infos(keys=['directed']),
node_kernels=sub_kernels,
edge_kernels=sub_kernels)
gram_matrix, run_time = graph_kernel.compute(dataset.graphs,
parallel=parallel, n_jobs=multiprocessing.cpu_count(), verbose=True)
kernel_list, run_time = graph_kernel.compute(dataset.graphs[0], dataset.graphs[1:],
@@ -218,9 +228,9 @@ def test_PathUpToH(ds_name, parallel, k_func, compute_method):
try:
graph_kernel = PathUpToH(node_labels=dataset.node_labels,
edge_labels=dataset.edge_labels,
ds_infos=dataset.get_dataset_infos(keys=['directed']),
depth=2, k_func=k_func, compute_method=compute_method)
edge_labels=dataset.edge_labels,
ds_infos=dataset.get_dataset_infos(keys=['directed']),
depth=2, k_func=k_func, compute_method=compute_method)
gram_matrix, run_time = graph_kernel.compute(dataset.graphs,
parallel=parallel, n_jobs=multiprocessing.cpu_count(), verbose=True)
kernel_list, run_time = graph_kernel.compute(dataset.graphs[0], dataset.graphs[1:],
@@ -245,9 +255,9 @@ def test_Treelet(ds_name, parallel):
pkernel = functools.partial(polynomialkernel, d=2, c=1e5)
try:
graph_kernel = Treelet(node_labels=dataset.node_labels,
edge_labels=dataset.edge_labels,
ds_infos=dataset.get_dataset_infos(keys=['directed']),
sub_kernel=pkernel)
edge_labels=dataset.edge_labels,
ds_infos=dataset.get_dataset_infos(keys=['directed']),
sub_kernel=pkernel)
gram_matrix, run_time = graph_kernel.compute(dataset.graphs,
parallel=parallel, n_jobs=multiprocessing.cpu_count(), verbose=True)
kernel_list, run_time = graph_kernel.compute(dataset.graphs[0], dataset.graphs[1:],
@@ -271,9 +281,9 @@ def test_WLSubtree(ds_name, parallel):

try:
graph_kernel = WLSubtree(node_labels=dataset.node_labels,
edge_labels=dataset.edge_labels,
ds_infos=dataset.get_dataset_infos(keys=['directed']),
height=2)
edge_labels=dataset.edge_labels,
ds_infos=dataset.get_dataset_infos(keys=['directed']),
height=2)
gram_matrix, run_time = graph_kernel.compute(dataset.graphs,
parallel=parallel, n_jobs=multiprocessing.cpu_count(), verbose=True)
kernel_list, run_time = graph_kernel.compute(dataset.graphs[0], dataset.graphs[1:],


+ 64
- 0
gklearn/utils/utils.py View File

@@ -222,6 +222,70 @@ def direct_product(G1, G2, node_label, edge_label):
return gt


def direct_product_graph(G1, G2, node_labels, edge_labels):
"""Return the direct/tensor product of directed graphs G1 and G2.

Parameters
----------
G1, G2 : NetworkX graph
The original graphs.
node_labels : list
A list of node attributes used as labels.
edge_labels : list
A list of edge attributes used as labels.
Return
------
gt : NetworkX graph
The direct product graph of G1 and G2.

Notes
-----
This method differs from networkx.tensor_product in that this method only adds nodes and edges in G1 and G2 that have the same labels to the direct product graph.

References
----------
.. [1] Thomas Gärtner, Peter Flach, and Stefan Wrobel. On graph kernels: Hardness results and efficient alternatives. Learning Theory and Kernel Machines, pages 129–143, 2003.
"""
# arrange all graphs in a list
from itertools import product
# G = G.to_directed()
gt = nx.DiGraph()
# add nodes
for u, v in product(G1, G2):
label1 = tuple(G1.nodes[u][nl] for nl in node_labels)
label2 = tuple(G2.nodes[v][nl] for nl in node_labels)
if label1 == label2:
gt.add_node((u, v), node_label=label1)

# add edges, faster for sparse graphs (no so many edges), which is the most case for now.
for (u1, v1), (u2, v2) in product(G1.edges, G2.edges):
if (u1, u2) in gt and (v1, v2) in gt:
label1 = tuple(G1.edges[u1, v1][el] for el in edge_labels)
label2 = tuple(G2.edges[u2, v2][el] for el in edge_labels)
if label1 == label2:
gt.add_edge((u1, u2), (v1, v2), edge_label=label1)


# # add edges, faster for dense graphs (a lot of edges, complete graph would be super).
# for u, v in product(gt, gt):
# if (u[0], v[0]) in G1.edges and (
# u[1], v[1]
# ) in G2.edges and G1.edges[u[0],
# v[0]][edge_label] == G2.edges[u[1],
# v[1]][edge_label]:
# gt.add_edge((u[0], u[1]), (v[0], v[1]))
# gt.edges[(u[0], u[1]), (v[0], v[1])].update({
# edge_label:
# G1.edges[u[0], v[0]][edge_label]
# })

# relabel nodes using consecutive integers for convenience of kernel calculation.
# gt = nx.convert_node_labels_to_integers(
# gt, first_label=0, label_attribute='label_orignal')
return gt


def graph_deepcopy(G):
"""Deep copy a graph, including deep copy of all nodes, edges and
attributes of the graph, nodes and edges.


Loading…
Cancel
Save