Browse Source

Merge pull request #19 from jajupmochi/v0.2

V0.2
tags/v0.2.0
linlin GitHub 5 years ago
parent
commit
d8708ebb00
No known key found for this signature in database GPG Key ID: 4AEE18F83AFDEB23
16 changed files with 2865 additions and 38 deletions
  1. +0
    -0
      gklearn/examples/__init__.py
  2. +73
    -0
      gklearn/examples/compute_distance_in_kernel_space.py
  3. +87
    -0
      gklearn/examples/compute_graph_kernel.py
  4. +115
    -0
      gklearn/examples/median_preimege_generator.py
  5. +2
    -1
      gklearn/kernels/__init__.py
  6. +338
    -0
      gklearn/kernels/marginalized.py
  7. +1
    -1
      gklearn/kernels/treelet.py
  8. +2
    -0
      gklearn/preimage/__init__.py
  9. +1192
    -0
      gklearn/preimage/experiments/xp_random_preimage.py
  10. +262
    -0
      gklearn/preimage/experiments/xp_random_preimage_generation.py
  11. +176
    -0
      gklearn/preimage/experiments/xp_simple_preimage.py
  12. +188
    -0
      gklearn/preimage/generate_random_preimages_by_class.py
  13. +28
    -30
      gklearn/preimage/median_preimage_generator.py
  14. +389
    -0
      gklearn/preimage/random_preimage_generator.py
  15. +5
    -5
      gklearn/preimage/utils.py
  16. +7
    -1
      gklearn/utils/utils.py

+ 0
- 0
gklearn/examples/__init__.py View File


+ 73
- 0
gklearn/examples/compute_distance_in_kernel_space.py View File

@@ -0,0 +1,73 @@
# -*- coding: utf-8 -*-
"""compute_distance_in_kernel_space.ipynb

Automatically generated by Colaboratory.

Original file is located at
https://colab.research.google.com/drive/17tZP6IrineQmzo9sRtfZOnHpHx6HnlMA

**This script demonstrates how to compute distance in kernel space between the image of a graph and the mean of images of a group of graphs.**
---

**0. Install `graphkit-learn`.**
"""

"""**1. Get dataset.**"""

from gklearn.utils import Dataset

# Predefined dataset name, use dataset "MUTAG".
ds_name = 'MUTAG'

# Initialize a Dataset.
dataset = Dataset()
# Load predefined dataset "MUTAG".
dataset.load_predefined_dataset(ds_name)
len(dataset.graphs)

"""**2. Compute graph kernel.**"""

from gklearn.kernels import PathUpToH
import multiprocessing

# Initailize parameters for graph kernel computation.
kernel_options = {'depth': 3,
'k_func': 'MinMax',
'compute_method': 'trie'
}

# Initialize graph kernel.
graph_kernel = PathUpToH(node_labels=dataset.node_labels, # list of node label names.
edge_labels=dataset.edge_labels, # list of edge label names.
ds_infos=dataset.get_dataset_infos(keys=['directed']), # dataset information required for computation.
**kernel_options, # options for computation.
)

# Compute Gram matrix.
gram_matrix, run_time = graph_kernel.compute(dataset.graphs,
parallel='imap_unordered', # or None.
n_jobs=multiprocessing.cpu_count(), # number of parallel jobs.
normalize=True, # whether to return normalized Gram matrix.
verbose=2 # whether to print out results.
)

"""**3. Compute distance in kernel space.**

Given a dataset $\mathcal{G}_N$, compute the distance in kernel space between the image of $G_1 \in \mathcal{G}_N$ and the mean of images of $\mathcal{G}_k \subset \mathcal{G}_N$.
"""

from gklearn.preimage.utils import compute_k_dis

# Index of $G_1$.
idx_1 = 10
# Indices of graphs in $\mathcal{G}_k$.
idx_graphs = range(0, 10)

# Compute the distance in kernel space.
dis_k = compute_k_dis(idx_1,
idx_graphs,
[1 / len(idx_graphs)] * len(idx_graphs), # weights for images of graphs in $\mathcal{G}_k$; all equal when computing the mean.
gram_matrix, # gram matrix of al graphs.
withterm3=False
)
print(dis_k)

+ 87
- 0
gklearn/examples/compute_graph_kernel.py View File

@@ -0,0 +1,87 @@
# -*- coding: utf-8 -*-
"""compute_graph_kernel.ipynb

Automatically generated by Colaboratory.

Original file is located at
https://colab.research.google.com/drive/17Q2QCl9CAtDweGF8LiWnWoN2laeJqT0u

**This script demonstrates how to compute a graph kernel.**
---

**0. Install `graphkit-learn`.**
"""

"""**1. Get dataset.**"""

from gklearn.utils import Dataset

# Predefined dataset name, use dataset "MUTAG".
ds_name = 'MUTAG'

# Initialize a Dataset.
dataset = Dataset()
# Load predefined dataset "MUTAG".
dataset.load_predefined_dataset(ds_name)
len(dataset.graphs)

"""**2. Compute graph kernel.**"""

from gklearn.kernels import PathUpToH

# Initailize parameters for graph kernel computation.
kernel_options = {'depth': 3,
'k_func': 'MinMax',
'compute_method': 'trie'
}

# Initialize graph kernel.
graph_kernel = PathUpToH(node_labels=dataset.node_labels, # list of node label names.
edge_labels=dataset.edge_labels, # list of edge label names.
ds_infos=dataset.get_dataset_infos(keys=['directed']), # dataset information required for computation.
**kernel_options, # options for computation.
)

print('done.')

import multiprocessing
import matplotlib.pyplot as plt

# Compute Gram matrix.
gram_matrix, run_time = graph_kernel.compute(dataset.graphs,
parallel='imap_unordered', # or None.
n_jobs=multiprocessing.cpu_count(), # number of parallel jobs.
normalize=True, # whether to return normalized Gram matrix.
verbose=2 # whether to print out results.
)
# Print results.
print()
print(gram_matrix)
print(run_time)
plt.imshow(gram_matrix)

import multiprocessing

# Compute grah kernels between a graph and a list of graphs.
kernel_list, run_time = graph_kernel.compute(dataset.graphs, # a list of graphs.
dataset.graphs[0], # a single graph.
parallel='imap_unordered', # or None.
n_jobs=multiprocessing.cpu_count(), # number of parallel jobs.
verbose=2 # whether to print out results.
)
# Print results.
print()
print(kernel_list)
print(run_time)

import multiprocessing

# Compute a grah kernel between two graphs.
kernel, run_time = graph_kernel.compute(dataset.graphs[0], # a single graph.
dataset.graphs[1], # another single graph.
verbose=2 # whether to print out results.
)
# Print results.
print()
print(kernel)
print(run_time)

+ 115
- 0
gklearn/examples/median_preimege_generator.py View File

@@ -0,0 +1,115 @@
# -*- coding: utf-8 -*-
"""example_median_preimege_generator.ipynb

Automatically generated by Colaboratory.

Original file is located at
https://colab.research.google.com/drive/1PIDvHOcmiLEQ5Np3bgBDdu0kLOquOMQK

**This script demonstrates how to generate a graph preimage using Boria's method.**
---
"""

"""**1. Get dataset.**"""

from gklearn.utils import Dataset, split_dataset_by_target

# Predefined dataset name, use dataset "MAO".
ds_name = 'MAO'
# The node/edge labels that will not be used in the computation.
irrelevant_labels = {'node_attrs': ['x', 'y', 'z'], 'edge_labels': ['bond_stereo']}

# Initialize a Dataset.
dataset_all = Dataset()
# Load predefined dataset "MAO".
dataset_all.load_predefined_dataset(ds_name)
# Remove irrelevant labels.
dataset_all.remove_labels(**irrelevant_labels)
# Split the whole dataset according to the classification targets.
datasets = split_dataset_by_target(dataset_all)
# Get the first class of graphs, whose median preimage will be computed.
dataset = datasets[0]
len(dataset.graphs)

"""**2. Set parameters.**"""

import multiprocessing

# Parameters for MedianPreimageGenerator (our method).
mpg_options = {'fit_method': 'k-graphs', # how to fit edit costs. "k-graphs" means use all graphs in median set when fitting.
'init_ecc': [4, 4, 2, 1, 1, 1], # initial edit costs.
'ds_name': ds_name, # name of the dataset.
'parallel': True, # whether the parallel scheme is to be used.
'time_limit_in_sec': 0, # maximum time limit to compute the preimage. If set to 0 then no limit.
'max_itrs': 100, # maximum iteration limit to optimize edit costs. If set to 0 then no limit.
'max_itrs_without_update': 3, # If the times that edit costs is not update is more than this number, then the optimization stops.
'epsilon_residual': 0.01, # In optimization, the residual is only considered changed if the change is bigger than this number.
'epsilon_ec': 0.1, # In optimization, the edit costs are only considered changed if the changes are bigger than this number.
'verbose': 2 # whether to print out results.
}
# Parameters for graph kernel computation.
kernel_options = {'name': 'PathUpToH', # use path kernel up to length h.
'depth': 9,
'k_func': 'MinMax',
'compute_method': 'trie',
'parallel': 'imap_unordered', # or None
'n_jobs': multiprocessing.cpu_count(),
'normalize': True, # whether to use normalized Gram matrix to optimize edit costs.
'verbose': 2 # whether to print out results.
}
# Parameters for GED computation.
ged_options = {'method': 'IPFP', # use IPFP huristic.
'initialization_method': 'RANDOM', # or 'NODE', etc.
'initial_solutions': 10, # when bigger than 1, then the method is considered mIPFP.
'edit_cost': 'CONSTANT', # use CONSTANT cost.
'attr_distance': 'euclidean', # the distance between non-symbolic node/edge labels is computed by euclidean distance.
'ratio_runs_from_initial_solutions': 1,
'threads': multiprocessing.cpu_count(), # parallel threads. Do not work if mpg_options['parallel'] = False.
'init_option': 'EAGER_WITHOUT_SHUFFLED_COPIES'
}
# Parameters for MedianGraphEstimator (Boria's method).
mge_options = {'init_type': 'MEDOID', # how to initial median (compute set-median). "MEDOID" is to use the graph with smallest SOD.
'random_inits': 10, # number of random initialization when 'init_type' = 'RANDOM'.
'time_limit': 600, # maximum time limit to compute the generalized median. If set to 0 then no limit.
'verbose': 2, # whether to print out results.
'refine': False # whether to refine the final SODs or not.
}
print('done.')

"""**3. Run median preimage generator.**"""

from gklearn.preimage import MedianPreimageGenerator

# Create median preimage generator instance.
mpg = MedianPreimageGenerator()
# Add dataset.
mpg.dataset = dataset
# Set parameters.
mpg.set_options(**mpg_options.copy())
mpg.kernel_options = kernel_options.copy()
mpg.ged_options = ged_options.copy()
mpg.mge_options = mge_options.copy()
# Run.
mpg.run()

"""**4. Get results.**"""

# Get results.
import pprint
pp = pprint.PrettyPrinter(indent=4) # pretty print
results = mpg.get_results()
pp.pprint(results)

# Draw generated graphs.
def draw_graph(graph):
import matplotlib.pyplot as plt
import networkx as nx
plt.figure()
pos = nx.spring_layout(graph)
nx.draw(graph, pos, node_size=500, labels=nx.get_node_attributes(graph, 'atom_symbol'), font_color='w', width=3, with_labels=True)
plt.show()
plt.clf()
plt.close()
draw_graph(mpg.set_median)
draw_graph(mpg.gen_median)

+ 2
- 1
gklearn/kernels/__init__.py View File

@@ -8,8 +8,9 @@ __author__ = "Linlin Jia"
__date__ = "November 2018"

from gklearn.kernels.graph_kernel import GraphKernel
from gklearn.kernels.structural_sp import StructuralSP
from gklearn.kernels.marginalized import Marginalized
from gklearn.kernels.shortest_path import ShortestPath
from gklearn.kernels.structural_sp import StructuralSP
from gklearn.kernels.path_up_to_h import PathUpToH
from gklearn.kernels.treelet import Treelet
from gklearn.kernels.weisfeiler_lehman import WeisfeilerLehman, WLSubtree

+ 338
- 0
gklearn/kernels/marginalized.py View File

@@ -0,0 +1,338 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Wed Jun 3 22:22:57 2020

@author: ljia

@references:

[1] H. Kashima, K. Tsuda, and A. Inokuchi. Marginalized kernels between
labeled graphs. In Proceedings of the 20th International Conference on
Machine Learning, Washington, DC, United States, 2003.

[2] Pierre Mahé, Nobuhisa Ueda, Tatsuya Akutsu, Jean-Luc Perret, and
Jean-Philippe Vert. Extensions of marginalized graph kernels. In
Proceedings of the twenty-first international conference on Machine
learning, page 70. ACM, 2004.
"""

import sys
from multiprocessing import Pool
from tqdm import tqdm
import numpy as np
import networkx as nx
from gklearn.utils import SpecialLabel
from gklearn.utils.kernels import deltakernel
from gklearn.utils.parallel import parallel_gm, parallel_me
from gklearn.utils.utils import untotterTransformation
from gklearn.kernels import GraphKernel


class Marginalized(GraphKernel):
def __init__(self, **kwargs):
GraphKernel.__init__(self)
self.__node_labels = kwargs.get('node_labels', [])
self.__edge_labels = kwargs.get('edge_labels', [])
self.__p_quit = kwargs.get('p_quit', 0.5)
self.__n_iteration = kwargs.get('n_iteration', 10)
self.__remove_totters = kwargs.get('remove_totters', False)
self.__ds_infos = kwargs.get('ds_infos', {})
self.__n_iteration = int(self.__n_iteration)


def _compute_gm_series(self):
self.__add_dummy_labels(self._graphs)
if self.__remove_totters:
if self._verbose >= 2:
iterator = tqdm(self._graphs, desc='removing tottering', file=sys.stdout)
else:
iterator = self._graphs
# @todo: this may not work.
self._graphs = [untotterTransformation(G, self.__node_label, self.__edge_label) for G in iterator]
# compute Gram matrix.
gram_matrix = np.zeros((len(self._graphs), len(self._graphs)))
from itertools import combinations_with_replacement
itr = combinations_with_replacement(range(0, len(self._graphs)), 2)
if self._verbose >= 2:
iterator = tqdm(itr, desc='calculating kernels', file=sys.stdout)
else:
iterator = itr
for i, j in iterator:
kernel = self.__kernel_do(self._graphs[i], self._graphs[j])
gram_matrix[i][j] = kernel
gram_matrix[j][i] = kernel # @todo: no directed graph considered?
return gram_matrix
def _compute_gm_imap_unordered(self):
self.__add_dummy_labels(self._graphs)
if self.__remove_totters:
pool = Pool(self._n_jobs)
itr = range(0, len(self._graphs))
if len(self._graphs) < 100 * self._n_jobs:
chunksize = int(len(self._graphs) / self._n_jobs) + 1
else:
chunksize = 100
remove_fun = self._wrapper_untotter
if self._verbose >= 2:
iterator = tqdm(pool.imap_unordered(remove_fun, itr, chunksize),
desc='removing tottering', file=sys.stdout)
else:
iterator = pool.imap_unordered(remove_fun, itr, chunksize)
for i, g in iterator:
self._graphs[i] = g
pool.close()
pool.join()
# compute Gram matrix.
gram_matrix = np.zeros((len(self._graphs), len(self._graphs)))
def init_worker(gn_toshare):
global G_gn
G_gn = gn_toshare
do_fun = self._wrapper_kernel_do
parallel_gm(do_fun, gram_matrix, self._graphs, init_worker=init_worker,
glbv=(self._graphs,), n_jobs=self._n_jobs, verbose=self._verbose)
return gram_matrix
def _compute_kernel_list_series(self, g1, g_list):
self.__add_dummy_labels(g_list + [g1])
if self.__remove_totters:
g1 = untotterTransformation(g1, self.__node_label, self.__edge_label) # @todo: this may not work.
if self._verbose >= 2:
iterator = tqdm(g_list, desc='removing tottering', file=sys.stdout)
else:
iterator = g_list
# @todo: this may not work.
g_list = [untotterTransformation(G, self.__node_label, self.__edge_label) for G in iterator]
# compute kernel list.
kernel_list = [None] * len(g_list)
if self._verbose >= 2:
iterator = tqdm(range(len(g_list)), desc='calculating kernels', file=sys.stdout)
else:
iterator = range(len(g_list))
for i in iterator:
kernel = self.__kernel_do(g1, g_list[i])
kernel_list[i] = kernel
return kernel_list
def _compute_kernel_list_imap_unordered(self, g1, g_list):
self.__add_dummy_labels(g_list + [g1])
if self.__remove_totters:
g1 = untotterTransformation(g1, self.__node_label, self.__edge_label) # @todo: this may not work.
pool = Pool(self._n_jobs)
itr = range(0, len(g_list))
if len(g_list) < 100 * self._n_jobs:
chunksize = int(len(g_list) / self._n_jobs) + 1
else:
chunksize = 100
remove_fun = self._wrapper_untotter
if self._verbose >= 2:
iterator = tqdm(pool.imap_unordered(remove_fun, itr, chunksize),
desc='removing tottering', file=sys.stdout)
else:
iterator = pool.imap_unordered(remove_fun, itr, chunksize)
for i, g in iterator:
g_list[i] = g
pool.close()
pool.join()
# compute kernel list.
kernel_list = [None] * len(g_list)

def init_worker(g1_toshare, g_list_toshare):
global G_g1, G_g_list
G_g1 = g1_toshare
G_g_list = g_list_toshare
do_fun = self._wrapper_kernel_list_do
def func_assign(result, var_to_assign):
var_to_assign[result[0]] = result[1]
itr = range(len(g_list))
len_itr = len(g_list)
parallel_me(do_fun, func_assign, kernel_list, itr, len_itr=len_itr,
init_worker=init_worker, glbv=(g1, g_list), method='imap_unordered',
n_jobs=self._n_jobs, itr_desc='calculating kernels', verbose=self._verbose)
return kernel_list
def _wrapper_kernel_list_do(self, itr):
return itr, self.__kernel_do(G_g1, G_g_list[itr])
def _compute_single_kernel_series(self, g1, g2):
self.__add_dummy_labels([g1] + [g2])
if self.__remove_totters:
g1 = untotterTransformation(g1, self.__node_label, self.__edge_label) # @todo: this may not work.
g2 = untotterTransformation(g2, self.__node_label, self.__edge_label)
kernel = self.__kernel_do(g1, g2)
return kernel
def __kernel_do(self, g1, g2):
"""Calculate marginalized graph kernel between 2 graphs.
Parameters
----------
g1, g2 : NetworkX graphs
2 graphs between which the kernel is calculated.
Return
------
kernel : float
Marginalized kernel between 2 graphs.
"""
# init parameters
kernel = 0
num_nodes_G1 = nx.number_of_nodes(g1)
num_nodes_G2 = nx.number_of_nodes(g2)
# the initial probability distribution in the random walks generating step
# (uniform distribution over |G|)
p_init_G1 = 1 / num_nodes_G1
p_init_G2 = 1 / num_nodes_G2
q = self.__p_quit * self.__p_quit
r1 = q
# # initial R_inf
# # matrix to save all the R_inf for all pairs of nodes
# R_inf = np.zeros([num_nodes_G1, num_nodes_G2])
#
# # calculate R_inf with a simple interative method
# for i in range(1, n_iteration):
# R_inf_new = np.zeros([num_nodes_G1, num_nodes_G2])
# R_inf_new.fill(r1)
#
# # calculate R_inf for each pair of nodes
# for node1 in g1.nodes(data=True):
# neighbor_n1 = g1[node1[0]]
# # the transition probability distribution in the random walks
# # generating step (uniform distribution over the vertices adjacent
# # to the current vertex)
# if len(neighbor_n1) > 0:
# p_trans_n1 = (1 - p_quit) / len(neighbor_n1)
# for node2 in g2.nodes(data=True):
# neighbor_n2 = g2[node2[0]]
# if len(neighbor_n2) > 0:
# p_trans_n2 = (1 - p_quit) / len(neighbor_n2)
#
# for neighbor1 in neighbor_n1:
# for neighbor2 in neighbor_n2:
# t = p_trans_n1 * p_trans_n2 * \
# deltakernel(g1.node[neighbor1][node_label],
# g2.node[neighbor2][node_label]) * \
# deltakernel(
# neighbor_n1[neighbor1][edge_label],
# neighbor_n2[neighbor2][edge_label])
#
# R_inf_new[node1[0]][node2[0]] += t * R_inf[neighbor1][
# neighbor2] # ref [1] equation (8)
# R_inf[:] = R_inf_new
#
# # add elements of R_inf up and calculate kernel
# for node1 in g1.nodes(data=True):
# for node2 in g2.nodes(data=True):
# s = p_init_G1 * p_init_G2 * deltakernel(
# node1[1][node_label], node2[1][node_label])
# kernel += s * R_inf[node1[0]][node2[0]] # ref [1] equation (6)
R_inf = {} # dict to save all the R_inf for all pairs of nodes
# initial R_inf, the 1st iteration.
for node1 in g1.nodes():
for node2 in g2.nodes():
# R_inf[(node1[0], node2[0])] = r1
if len(g1[node1]) > 0:
if len(g2[node2]) > 0:
R_inf[(node1, node2)] = r1
else:
R_inf[(node1, node2)] = self.__p_quit
else:
if len(g2[node2]) > 0:
R_inf[(node1, node2)] = self.__p_quit
else:
R_inf[(node1, node2)] = 1
# compute all transition probability first.
t_dict = {}
if self.__n_iteration > 1:
for node1 in g1.nodes():
neighbor_n1 = g1[node1]
# the transition probability distribution in the random walks
# generating step (uniform distribution over the vertices adjacent
# to the current vertex)
if len(neighbor_n1) > 0:
p_trans_n1 = (1 - self.__p_quit) / len(neighbor_n1)
for node2 in g2.nodes():
neighbor_n2 = g2[node2]
if len(neighbor_n2) > 0:
p_trans_n2 = (1 - self.__p_quit) / len(neighbor_n2)
for neighbor1 in neighbor_n1:
for neighbor2 in neighbor_n2:
t_dict[(node1, node2, neighbor1, neighbor2)] = \
p_trans_n1 * p_trans_n2 * \
deltakernel(tuple(g1.nodes[neighbor1][nl] for nl in self.__node_labels), tuple(g2.nodes[neighbor2][nl] for nl in self.__node_labels)) * \
deltakernel(tuple(neighbor_n1[neighbor1][el] for el in self.__edge_labels), tuple(neighbor_n2[neighbor2][el] for el in self.__edge_labels))
# calculate R_inf with a simple interative method
for i in range(2, self.__n_iteration + 1):
R_inf_old = R_inf.copy()
# calculate R_inf for each pair of nodes
for node1 in g1.nodes():
neighbor_n1 = g1[node1]
# the transition probability distribution in the random walks
# generating step (uniform distribution over the vertices adjacent
# to the current vertex)
if len(neighbor_n1) > 0:
for node2 in g2.nodes():
neighbor_n2 = g2[node2]
if len(neighbor_n2) > 0:
R_inf[(node1, node2)] = r1
for neighbor1 in neighbor_n1:
for neighbor2 in neighbor_n2:
R_inf[(node1, node2)] += \
(t_dict[(node1, node2, neighbor1, neighbor2)] * \
R_inf_old[(neighbor1, neighbor2)]) # ref [1] equation (8)
# add elements of R_inf up and calculate kernel
for (n1, n2), value in R_inf.items():
s = p_init_G1 * p_init_G2 * deltakernel(tuple(g1.nodes[n1][nl] for nl in self.__node_labels), tuple(g2.nodes[n2][nl] for nl in self.__node_labels))
kernel += s * value # ref [1] equation (6)
return kernel
def _wrapper_kernel_do(self, itr):
i = itr[0]
j = itr[1]
return i, j, self.__kernel_do(G_gn[i], G_gn[j])

def _wrapper_untotter(self, i):
return i, untotterTransformation(self._graphs[i], self.__node_label, self.__edge_label) # @todo: this may not work.
def __add_dummy_labels(self, Gn):
if len(self.__node_labels) == 0 or (len(self.__node_labels) == 1 and self.__node_labels[0] == SpecialLabel.DUMMY):
for i in range(len(Gn)):
nx.set_node_attributes(Gn[i], '0', SpecialLabel.DUMMY)
self.__node_labels = [SpecialLabel.DUMMY]
if len(self.__edge_labels) == 0 or (len(self.__edge_labels) == 1 and self.__edge_labels[0] == SpecialLabel.DUMMY):
for i in range(len(Gn)):
nx.set_edge_attributes(Gn[i], '0', SpecialLabel.DUMMY)
self.__edge_labels = [SpecialLabel.DUMMY]

+ 1
- 1
gklearn/kernels/treelet.py View File

@@ -195,7 +195,7 @@ class Treelet(GraphKernel):
Return
------
kernel : float
Treelet Kernel between 2 graphs.
Treelet kernel between 2 graphs.
"""
keys = set(canonkey1.keys()) & set(canonkey2.keys()) # find same canonical keys in both graphs
vector1 = np.array([(canonkey1[key] if (key in canonkey1.keys()) else 0) for key in keys])


+ 2
- 0
gklearn/preimage/__init__.py View File

@@ -12,4 +12,6 @@ __date__ = "March 2020"

from gklearn.preimage.preimage_generator import PreimageGenerator
from gklearn.preimage.median_preimage_generator import MedianPreimageGenerator
from gklearn.preimage.random_preimage_generator import RandomPreimageGenerator
from gklearn.preimage.kernel_knn_cv import kernel_knn_cv
from gklearn.preimage.generate_random_preimages_by_class import generate_random_preimages_by_class

+ 1192
- 0
gklearn/preimage/experiments/xp_random_preimage.py
File diff suppressed because it is too large
View File


+ 262
- 0
gklearn/preimage/experiments/xp_random_preimage_generation.py View File

@@ -0,0 +1,262 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Mon Jun 1 11:37:57 2020

@author: ljia
"""
import multiprocessing
import numpy as np
import networkx as nx
import os
from gklearn.utils.graphfiles import saveGXL
from gklearn.preimage import RandomPreimageGenerator
from gklearn.utils import Dataset


dir_root = '../results/xp_random_preimage_generation/'


def xp_random_preimage_generation(kernel_name):
"""
Experiment similar to the one in Bakir's paper. A test to check if RandomPreimageGenerator class works correctly.

Returns
-------
None.

"""
alpha1_list = np.linspace(0, 1, 11)
k_dis_datasets = []
k_dis_preimages = []
preimages = []
bests_from_dataset = []
for alpha1 in alpha1_list:
print('alpha1 =', alpha1, ':\n')
# set parameters.
ds_name = 'MUTAG'
rpg_options = {'k': 5,
'r_max': 10, #
'l': 500,
'alphas': None,
'parallel': True,
'verbose': 2}
if kernel_name == 'PathUpToH':
kernel_options = {'name': 'PathUpToH',
'depth': 2, #
'k_func': 'MinMax', #
'compute_method': 'trie',
'parallel': 'imap_unordered',
# 'parallel': None,
'n_jobs': multiprocessing.cpu_count(),
'normalize': True,
'verbose': 0}
elif kernel_name == 'Marginalized':
kernel_options = {'name': 'Marginalized',
'p_quit': 0.8, #
'n_iteration': 7, #
'remove_totters': False,
'parallel': 'imap_unordered',
# 'parallel': None,
'n_jobs': multiprocessing.cpu_count(),
'normalize': True,
'verbose': 0}
edge_required = True
irrelevant_labels = {'edge_labels': ['label_0']}
cut_range = None
# create/get Gram matrix.
dir_save = dir_root + ds_name + '.' + kernel_options['name'] + '/'
if not os.path.exists(dir_save):
os.makedirs(dir_save)
gm_fname = dir_save + 'gram_matrix_unnorm.' + ds_name + '.' + kernel_options['name'] + '.gm.npz'
gmfile_exist = os.path.isfile(os.path.abspath(gm_fname))
if gmfile_exist:
gmfile = np.load(gm_fname, allow_pickle=True) # @todo: may not be safe.
gram_matrix_unnorm = gmfile['gram_matrix_unnorm']
time_precompute_gm = gmfile['run_time']
# 1. get dataset.
print('1. getting dataset...')
dataset_all = Dataset()
dataset_all.load_predefined_dataset(ds_name)
dataset_all.trim_dataset(edge_required=edge_required)
if irrelevant_labels is not None:
dataset_all.remove_labels(**irrelevant_labels)
if cut_range is not None:
dataset_all.cut_graphs(cut_range)
# # add two "random" graphs.
# g1 = nx.Graph()
# g1.add_nodes_from(range(0, 16), label_0='0')
# g1.add_nodes_from(range(16, 25), label_0='1')
# g1.add_node(25, label_0='2')
# g1.add_nodes_from([26, 27], label_0='3')
# g1.add_edges_from([(0, 1), (1, 2), (2, 3), (3, 4), (4, 5), (5, 6), (6, 7), (7, 8), (8, 9), (9, 10), (10, 11), (11, 12), (5, 0), (4, 9), (12, 3), (10, 13), (13, 14), (14, 15), (15, 8), (0, 16), (1, 17), (2, 18), (12, 19), (11, 20), (13, 21), (15, 22), (7, 23), (6, 24), (14, 25), (25, 26), (25, 27)])
# g2 = nx.Graph()
# g2.add_nodes_from(range(0, 12), label_0='0')
# g2.add_nodes_from(range(12, 19), label_0='1')
# g2.add_nodes_from([19, 20, 21], label_0='2')
# g2.add_nodes_from([22, 23], label_0='3')
# g2.add_edges_from([(0, 1), (1, 2), (2, 3), (3, 4), (4, 5), (5, 6), (6, 19), (19, 7), (7, 8), (8, 9), (9, 10), (10, 11), (11, 20), (20, 7), (5, 0), (4, 8), (0, 12), (1, 13), (2, 14), (9, 15), (10, 16), (11, 17), (6, 18), (3, 21), (21, 22), (21, 23)])
# dataset_all.load_graphs([g1, g2] + dataset_all.graphs, targets=None)
# 2. initialize rpg and setting parameters.
print('2. initializing rpg and setting parameters...')
# nb_graphs = len(dataset_all.graphs) - 2
# rpg_options['alphas'] = [alpha1, 1 - alpha1] + [0] * nb_graphs
nb_graphs = len(dataset_all.graphs)
alphas = [0] * nb_graphs
alphas[1] = alpha1
alphas[6] = 1 - alpha1
rpg_options['alphas'] = alphas
if gmfile_exist:
rpg_options['gram_matrix_unnorm'] = gram_matrix_unnorm
rpg_options['runtime_precompute_gm'] = time_precompute_gm
rpg = RandomPreimageGenerator()
rpg.dataset = dataset_all
rpg.set_options(**rpg_options.copy())
rpg.kernel_options = kernel_options.copy()
# 3. compute preimage.
print('3. computing preimage...')
rpg.run()
results = rpg.get_results()
k_dis_datasets.append(results['k_dis_dataset'])
k_dis_preimages.append(results['k_dis_preimage'])
bests_from_dataset.append(rpg.best_from_dataset)
preimages.append(rpg.preimage)
# 4. save results.
# write Gram matrices to file.
if not gmfile_exist:
np.savez(dir_save + 'gram_matrix_unnorm.' + ds_name + '.' + kernel_options['name'] + '.gm', gram_matrix_unnorm=rpg.gram_matrix_unnorm, run_time=results['runtime_precompute_gm'])
# save graphs.
fn_best_dataset = dir_save + 'g_best_dataset.' + 'alpha1_' + str(alpha1)[0:3]
saveGXL(rpg.best_from_dataset, fn_best_dataset + '.gxl', method='default',
node_labels=dataset_all.node_labels, edge_labels=dataset_all.edge_labels,
node_attrs=dataset_all.node_attrs, edge_attrs=dataset_all.edge_attrs)
fn_preimage = dir_save + 'g_preimage.' + 'alpha1_' + str(alpha1)[0:3]
saveGXL(rpg.preimage, fn_preimage + '.gxl', method='default',
node_labels=dataset_all.node_labels, edge_labels=dataset_all.edge_labels,
node_attrs=dataset_all.node_attrs, edge_attrs=dataset_all.edge_attrs)
# draw graphs.
__draw_graph(rpg.best_from_dataset, fn_best_dataset)
__draw_graph(rpg.preimage, fn_preimage)
# save distances.
np.savez(dir_save + 'distances.' + ds_name + '.' + kernel_options['name'], k_dis_datasets=k_dis_datasets, k_dis_preimages=k_dis_preimages)

# plot results figure.
__plot_results(alpha1_list, k_dis_datasets, k_dis_preimages, dir_save)
print('\ncomplete.\n')
return k_dis_datasets, k_dis_preimages, bests_from_dataset, preimages


def __draw_graph(graph, file_prefix):
# import matplotlib
# matplotlib.use('agg')
import matplotlib.pyplot as plt
plt.figure()
pos = nx.spring_layout(graph)
nx.draw(graph, pos, node_size=500, labels=nx.get_node_attributes(graph, 'label_0'), font_color='w', width=3, with_labels=True)
plt.savefig(file_prefix + '.eps', format='eps', dpi=300)
# plt.show()
plt.clf()
plt.close()


def __plot_results(alpha1_list, k_dis_datasets, k_dis_preimages, dir_save):
import matplotlib.pyplot as plt
fig, ax = plt.subplots(1, 1, figsize=(7, 4.5))

ind = np.arange(len(alpha1_list)) # the x locations for the groups
width = 0.35 # the width of the bars: can also be len(x) sequence
ax.bar(ind, k_dis_preimages, width, label='Reconstructed pre-image', zorder=3, color='#133AAC')
ax.set_xlabel(r'$\alpha \in [0,1]$')
ax.set_ylabel(r'$d(g_i,g^\star(\alpha))$')
#ax.set_title('Runtime of the shortest path kernel on all datasets')
plt.xticks(ind, [str(i)[0:3] for i in alpha1_list])
#ax.set_yticks(np.logspace(-16, -3, num=20, base=10))
#ax.set_ylim(bottom=1e-15)
ax.grid(axis='y', zorder=0)
ax.spines['top'].set_visible(False)
ax.spines['bottom'].set_visible(False)
ax.spines['left'].set_visible(False)
ax.spines['right'].set_visible(False)
ax.xaxis.set_ticks_position('none')

ax.plot(ind, k_dis_datasets, 'b.-', label=r'Nearest neighbor in $D_N$', color='orange', zorder=4)
ax.yaxis.set_ticks_position('none')
fig.subplots_adjust(bottom=.2)
fig.legend(loc='lower center', ncol=2, frameon=False) # , ncol=5, labelspacing=0.1, handletextpad=0.4, columnspacing=0.6)
plt.savefig(dir_save + 'distances in kernel space.eps', format='eps', dpi=300,
transparent=True, bbox_inches='tight')
plt.show()
plt.clf()
plt.close()

if __name__ == '__main__':
# kernel_name = 'PathUpToH'
kernel_name = 'Marginalized'
k_dis_datasets, k_dis_preimages, bests_from_dataset, preimages = xp_random_preimage_generation(kernel_name)
# # save graphs.
# dir_save = dir_root + 'MUTAG.PathUpToH/'
# for i, alpha1 in enumerate(np.linspace(0, 1, 11)):
# fn_best_dataset = dir_save + 'g_best_dataset.' + 'alpha1_' + str(alpha1)[0:3]
# saveGXL(bests_from_dataset[i], fn_best_dataset + '.gxl', method='default',
# node_labels=['label_0'], edge_labels=[],
# node_attrs=[], edge_attrs=[])
# fn_preimage = dir_save + 'g_preimage.' + 'alpha1_' + str(alpha1)[0:3]
# saveGXL(preimages[i], fn_preimage + '.gxl', method='default',
# node_labels=['label_0'], edge_labels=[],
# node_attrs=[], edge_attrs=[])

# # draw graphs.
# dir_save = dir_root + 'MUTAG.PathUpToH/'
# for i, alpha1 in enumerate(np.linspace(0, 1, 11)):
# fn_best_dataset = dir_save + 'g_best_dataset.' + 'alpha1_' + str(alpha1)[0:3]
# __draw_graph(bests_from_dataset[i], fn_best_dataset)
# fn_preimage = dir_save + 'g_preimage.' + 'alpha1_' + str(alpha1)[0:3]
# __draw_graph(preimages[i], fn_preimage)

# # plot results figure.
# alpha1_list = np.linspace(0, 1, 11)
# dir_save = dir_root + 'MUTAG.PathUpToH/'
# __plot_results(alpha1_list, k_dis_datasets, k_dis_preimages, dir_save)

# k_dis_datasets = [0.0,
# 0.08882515554098754,
# 0.17765031108197632,
# 0.2664754666229643,
# 0.35530062216395264,
# 0.44412577770494066,
# 0.35530062216395236,
# 0.2664754666229643,
# 0.17765031108197632,
# 0.08882515554098878,
# 0.0]

# k_dis_preimages = [0.0,
# 0.08882515554098754,
# 0.17765031108197632,
# 0.2664754666229643,
# 0.35530062216395264,
# 0.44412577770494066,
# 0.35530062216395236,
# 0.2664754666229643,
# 0.17765031108197632,
# 0.08882515554098878,
# 0.0]

+ 176
- 0
gklearn/preimage/experiments/xp_simple_preimage.py View File

@@ -0,0 +1,176 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Fri Jun 12 10:30:17 2020

@author: ljia

This script constructs simple preimages to test preimage methods and find bugs and shortcomings in them.
"""


def xp_simple_preimage():
import numpy as np
"""**1. Get dataset.**"""

from gklearn.utils import Dataset, split_dataset_by_target
# Predefined dataset name, use dataset "MAO".
ds_name = 'MAO'
# The node/edge labels that will not be used in the computation.
irrelevant_labels = {'node_attrs': ['x', 'y', 'z'], 'edge_labels': ['bond_stereo']}
# Initialize a Dataset.
dataset_all = Dataset()
# Load predefined dataset "MAO".
dataset_all.load_predefined_dataset(ds_name)
# Remove irrelevant labels.
dataset_all.remove_labels(**irrelevant_labels)
# Split the whole dataset according to the classification targets.
datasets = split_dataset_by_target(dataset_all)
# Get the first class of graphs, whose median preimage will be computed.
dataset = datasets[0]
len(dataset.graphs)
"""**2. Set parameters.**"""
import multiprocessing
# Parameters for MedianPreimageGenerator (our method).
mpg_options = {'fit_method': 'k-graphs', # how to fit edit costs. "k-graphs" means use all graphs in median set when fitting.
'init_ecc': [4, 4, 2, 1, 1, 1], # initial edit costs.
'ds_name': ds_name, # name of the dataset.
'parallel': True, # whether the parallel scheme is to be used.
'time_limit_in_sec': 0, # maximum time limit to compute the preimage. If set to 0 then no limit.
'max_itrs': 10, # maximum iteration limit to optimize edit costs. If set to 0 then no limit.
'max_itrs_without_update': 3, # If the times that edit costs is not update is more than this number, then the optimization stops.
'epsilon_residual': 0.01, # In optimization, the residual is only considered changed if the change is bigger than this number.
'epsilon_ec': 0.1, # In optimization, the edit costs are only considered changed if the changes are bigger than this number.
'verbose': 2 # whether to print out results.
}
# Parameters for graph kernel computation.
kernel_options = {'name': 'PathUpToH', # use path kernel up to length h.
'depth': 9,
'k_func': 'MinMax',
'compute_method': 'trie',
'parallel': 'imap_unordered', # or None
'n_jobs': multiprocessing.cpu_count(),
'normalize': True, # whether to use normalized Gram matrix to optimize edit costs.
'verbose': 2 # whether to print out results.
}
# Parameters for GED computation.
ged_options = {'method': 'IPFP', # use IPFP huristic.
'initialization_method': 'RANDOM', # or 'NODE', etc.
'initial_solutions': 10, # when bigger than 1, then the method is considered mIPFP.
'edit_cost': 'CONSTANT', # use CONSTANT cost.
'attr_distance': 'euclidean', # the distance between non-symbolic node/edge labels is computed by euclidean distance.
'ratio_runs_from_initial_solutions': 1,
'threads': multiprocessing.cpu_count(), # parallel threads. Do not work if mpg_options['parallel'] = False.
'init_option': 'EAGER_WITHOUT_SHUFFLED_COPIES'
}
# Parameters for MedianGraphEstimator (Boria's method).
mge_options = {'init_type': 'MEDOID', # how to initial median (compute set-median). "MEDOID" is to use the graph with smallest SOD.
'random_inits': 10, # number of random initialization when 'init_type' = 'RANDOM'.
'time_limit': 600, # maximum time limit to compute the generalized median. If set to 0 then no limit.
'verbose': 2, # whether to print out results.
'refine': False # whether to refine the final SODs or not.
}
print('done.')
"""**3. Compute the Gram matrix and distance matrix.**"""
from gklearn.utils.utils import get_graph_kernel_by_name
# Get a graph kernel instance.
graph_kernel = get_graph_kernel_by_name(kernel_options['name'],
node_labels=dataset.node_labels, edge_labels=dataset.edge_labels,
node_attrs=dataset.node_attrs, edge_attrs=dataset.edge_attrs,
ds_infos=dataset.get_dataset_infos(keys=['directed']),
kernel_options=kernel_options)
# Compute Gram matrix.
gram_matrix, run_time = graph_kernel.compute(dataset.graphs, **kernel_options)
# Compute distance matrix.
from gklearn.utils import compute_distance_matrix
dis_mat, _, _, _ = compute_distance_matrix(gram_matrix)
print('done.')

"""**4. Find the candidate graph.**"""
from gklearn.preimage.utils import compute_k_dis
# Number of the nearest neighbors.
k_neighbors = 10
# For each graph G in dataset, compute the distance between its image \Phi(G) and the mean of its neighbors' images.
dis_min = np.inf # the minimum distance between possible \Phi(G) and the mean of its neighbors.
for idx, G in enumerate(dataset.graphs):
# Find the k nearest neighbors of G.
dis_list = dis_mat[idx] # distance between \Phi(G) and image of each graphs.
idx_sort = np.argsort(dis_list) # sort distances and get the sorted indices.
idx_nearest = idx_sort[1:k_neighbors+1] # indices of the k-nearest neighbors.
dis_k_nearest = [dis_list[i] for i in idx_nearest] # k-nearest distances, except the 0.
G_k_nearest = [dataset.graphs[i] for i in idx_nearest] # k-nearest neighbors.
# Compute the distance between \Phi(G) and the mean of its neighbors.
dis_tmp = compute_k_dis(idx, # the index of G in Gram matrix.
idx_nearest, # the indices of the neighbors
[1 / k_neighbors] * k_neighbors, # coefficients for neighbors.
gram_matrix,
withterm3=False)
# Check if the new distance is smallers.
if dis_tmp < dis_min:
dis_min = dis_tmp
G_cand = G
G_neighbors = G_k_nearest
print('The minimum distance is', dis_min)
"""**5. Run median preimage generator.**"""
from gklearn.preimage import MedianPreimageGenerator
# Set the dataset as the k-nearest neighbors.
dataset.load_graphs(G_neighbors)
# Create median preimage generator instance.
mpg = MedianPreimageGenerator()
# Add dataset.
mpg.dataset = dataset
# Set parameters.
mpg.set_options(**mpg_options.copy())
mpg.kernel_options = kernel_options.copy()
mpg.ged_options = ged_options.copy()
mpg.mge_options = mge_options.copy()
# Run.
mpg.run()
"""**4. Get results.**"""
# Get results.
import pprint
pp = pprint.PrettyPrinter(indent=4) # pretty print
results = mpg.get_results()
pp.pprint(results)
draw_graph(mpg.set_median)
draw_graph(mpg.gen_median)
draw_graph(G_cand)


# Draw generated graphs.
def draw_graph(graph):
import matplotlib.pyplot as plt
import networkx as nx
plt.figure()
pos = nx.spring_layout(graph)
nx.draw(graph, pos, node_size=500, labels=nx.get_node_attributes(graph, 'atom_symbol'), font_color='w', width=3, with_labels=True)
plt.show()
plt.clf()
plt.close()


if __name__ == '__main__':
xp_simple_preimage()

+ 188
- 0
gklearn/preimage/generate_random_preimages_by_class.py View File

@@ -0,0 +1,188 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Mon Jun 1 17:02:51 2020

@author: ljia
"""

import numpy as np
from gklearn.utils import Dataset
import csv
import os
import os.path
from gklearn.preimage import RandomPreimageGenerator
from gklearn.utils import split_dataset_by_target
from gklearn.utils.graphfiles import saveGXL


def generate_random_preimages_by_class(ds_name, rpg_options, kernel_options, save_results=True, save_preimages=True, load_gm='auto', dir_save='', irrelevant_labels=None, edge_required=False, cut_range=None):
# 1. get dataset.
print('1. getting dataset...')
dataset_all = Dataset()
dataset_all.load_predefined_dataset(ds_name)
dataset_all.trim_dataset(edge_required=edge_required)
if irrelevant_labels is not None:
dataset_all.remove_labels(**irrelevant_labels)
if cut_range is not None:
dataset_all.cut_graphs(cut_range)
datasets = split_dataset_by_target(dataset_all)

if save_results:
# create result files.
print('creating output files...')
fn_output_detail, fn_output_summary = __init_output_file_preimage(ds_name, kernel_options['name'], dir_save)

dis_k_dataset_list = []
dis_k_preimage_list = []
time_precompute_gm_list = []
time_generate_list = []
time_total_list = []
itrs_list = []
num_updates_list = []
if load_gm == 'auto':
gm_fname = dir_save + 'gram_matrix_unnorm.' + ds_name + '.' + kernel_options['name'] + '.gm.npz'
gmfile_exist = os.path.isfile(os.path.abspath(gm_fname))
if gmfile_exist:
gmfile = np.load(gm_fname, allow_pickle=True) # @todo: may not be safe.
gram_matrix_unnorm_list = [item for item in gmfile['gram_matrix_unnorm_list']]
time_precompute_gm_list = gmfile['run_time_list'].tolist()
else:
gram_matrix_unnorm_list = []
time_precompute_gm_list = []
elif not load_gm:
gram_matrix_unnorm_list = []
time_precompute_gm_list = []
else:
gm_fname = dir_save + 'gram_matrix_unnorm.' + ds_name + '.' + kernel_options['name'] + '.gm.npz'
gmfile = np.load(gm_fname, allow_pickle=True) # @todo: may not be safe.
gram_matrix_unnorm_list = [item for item in gmfile['gram_matrix_unnorm_list']]
time_precompute_gm_list = gmfile['run_time_list'].tolist()
print('starting generating preimage for each class of target...')
idx_offset = 0
for idx, dataset in enumerate(datasets):
target = dataset.targets[0]
print('\ntarget =', target, '\n')
# if target != 1:
# continue
num_graphs = len(dataset.graphs)
if num_graphs < 2:
print('\nnumber of graphs = ', num_graphs, ', skip.\n')
idx_offset += 1
continue
# 2. set parameters.
print('2. initializing mpg and setting parameters...')
if load_gm:
if gmfile_exist:
rpg_options['gram_matrix_unnorm'] = gram_matrix_unnorm_list[idx - idx_offset]
rpg_options['runtime_precompute_gm'] = time_precompute_gm_list[idx - idx_offset]
rpg = RandomPreimageGenerator()
rpg.dataset = dataset
rpg.set_options(**rpg_options.copy())
rpg.kernel_options = kernel_options.copy()

# 3. compute preimage.
print('3. computing preimage...')
rpg.run()
results = rpg.get_results()
# 4. save results (and median graphs).
print('4. saving results (and preimages)...')
# write result detail.
if save_results:
print('writing results to files...')

f_detail = open(dir_save + fn_output_detail, 'a')
csv.writer(f_detail).writerow([ds_name, kernel_options['name'],
num_graphs, target, 1,
results['k_dis_dataset'], results['k_dis_preimage'],
results['runtime_precompute_gm'],
results['runtime_generate_preimage'], results['runtime_total'],
results['itrs'], results['num_updates']])
f_detail.close()
# compute result summary.
dis_k_dataset_list.append(results['k_dis_dataset'])
dis_k_preimage_list.append(results['k_dis_preimage'])
time_precompute_gm_list.append(results['runtime_precompute_gm'])
time_generate_list.append(results['runtime_generate_preimage'])
time_total_list.append(results['runtime_total'])
itrs_list.append(results['itrs'])
num_updates_list.append(results['num_updates'])
# write result summary for each letter.
f_summary = open(dir_save + fn_output_summary, 'a')
csv.writer(f_summary).writerow([ds_name, kernel_options['name'],
num_graphs, target,
results['k_dis_dataset'], results['k_dis_preimage'],
results['runtime_precompute_gm'],
results['runtime_generate_preimage'], results['runtime_total'],
results['itrs'], results['num_updates']])
f_summary.close()
# save median graphs.
if save_preimages:
if not os.path.exists(dir_save + 'preimages/'):
os.makedirs(dir_save + 'preimages/')
print('Saving preimages to files...')
fn_best_dataset = dir_save + 'preimages/g_best_dataset.' + 'nbg' + str(num_graphs) + '.y' + str(target) + '.repeat' + str(1)
saveGXL(rpg.best_from_dataset, fn_best_dataset + '.gxl', method='default',
node_labels=dataset.node_labels, edge_labels=dataset.edge_labels,
node_attrs=dataset.node_attrs, edge_attrs=dataset.edge_attrs)
fn_preimage = dir_save + 'preimages/g_preimage.' + 'nbg' + str(num_graphs) + '.y' + str(target) + '.repeat' + str(1)
saveGXL(rpg.preimage, fn_preimage + '.gxl', method='default',
node_labels=dataset.node_labels, edge_labels=dataset.edge_labels,
node_attrs=dataset.node_attrs, edge_attrs=dataset.edge_attrs)
if (load_gm == 'auto' and not gmfile_exist) or not load_gm:
gram_matrix_unnorm_list.append(rpg.gram_matrix_unnorm)

# write result summary for each class.
if save_results:
dis_k_dataset_mean = np.mean(dis_k_dataset_list)
dis_k_preimage_mean = np.mean(dis_k_preimage_list)
time_precompute_gm_mean = np.mean(time_precompute_gm_list)
time_generate_mean = np.mean(time_generate_list)
time_total_mean = np.mean(time_total_list)
itrs_mean = np.mean(itrs_list)
num_updates_mean = np.mean(num_updates_list)
f_summary = open(dir_save + fn_output_summary, 'a')
csv.writer(f_summary).writerow([ds_name, kernel_options['name'],
num_graphs, 'all',
dis_k_dataset_mean, dis_k_preimage_mean,
time_precompute_gm_mean,
time_generate_mean, time_total_mean, itrs_mean,
num_updates_mean])
f_summary.close()
# write Gram matrices to file.
if (load_gm == 'auto' and not gmfile_exist) or not load_gm:
np.savez(dir_save + 'gram_matrix_unnorm.' + ds_name + '.' + kernel_options['name'] + '.gm', gram_matrix_unnorm_list=gram_matrix_unnorm_list, run_time_list=time_precompute_gm_list)

print('\ncomplete.\n')

def __init_output_file_preimage(ds_name, gkernel, dir_output):
if not os.path.exists(dir_output):
os.makedirs(dir_output)
fn_output_detail = 'results_detail.' + ds_name + '.' + gkernel + '.csv'
f_detail = open(dir_output + fn_output_detail, 'a')
csv.writer(f_detail).writerow(['dataset', 'graph kernel', 'num graphs',
'target', 'repeat', 'dis_k best from dataset', 'dis_k preimage',
'time precompute gm', 'time generate preimage', 'time total',
'itrs', 'num updates'])
f_detail.close()
fn_output_summary = 'results_summary.' + ds_name + '.' + gkernel + '.csv'
f_summary = open(dir_output + fn_output_summary, 'a')
csv.writer(f_summary).writerow(['dataset', 'graph kernel', 'num graphs',
'target', 'dis_k best from dataset', 'dis_k preimage',
'time precompute gm', 'time generate preimage', 'time total',
'itrs', 'num updates'])
f_summary.close()
return fn_output_detail, fn_output_summary

+ 28
- 30
gklearn/preimage/median_preimage_generator.py View File

@@ -19,7 +19,7 @@ from gklearn.ged.median import constant_node_costs,mge_options_to_string
from gklearn.gedlib import librariesImport, gedlibpy
from gklearn.utils import Timer
from gklearn.utils.utils import get_graph_kernel_by_name
# from gklearn.utils.dataset import Dataset

class MedianPreimageGenerator(PreimageGenerator):
@@ -127,8 +127,7 @@ class MedianPreimageGenerator(PreimageGenerator):
# 3. compute set median and gen median using optimized edit costs.
if self._verbose >= 2:
print('\nstart computing set median and gen median using optimized edit costs...\n')
# group_fnames = [Gn[g].graph['filename'] for g in group_min]
self.__generate_preimage_iam()
self.__gmg_bcu()
end_generate_preimage = time.time()
self.__runtime_generate_preimage = end_generate_preimage - end_optimize_ec
self.__runtime_total = end_generate_preimage - start
@@ -140,19 +139,13 @@ class MedianPreimageGenerator(PreimageGenerator):
# 4. compute kernel distances to the true median.
if self._verbose >= 2:
print('\nstart computing distances to true median....\n')
# Gn_median = [Gn[g].copy() for g in group_min]
self.__compute_distances_to_true_median()
# dis_k_sm, dis_k_gm, dis_k_gi, dis_k_gi_min, idx_dis_k_gi_min =
# idx_dis_k_gi_min = group_min[idx_dis_k_gi_min]
# print('index min dis_k_gi:', idx_dis_k_gi_min)
# print('sod_sm:', sod_sm)
# print('sod_gm:', sod_gm)

# 5. print out results.
if self._verbose:
print()
print('================================================================================')
print('Finished generalization of preimages.')
print('Finished generation of preimages.')
print('--------------------------------------------------------------------------------')
print('The optimized edit cost constants:', self.__edit_cost_constants)
print('SOD of the set median:', self.__sod_set_median)
@@ -169,11 +162,6 @@ class MedianPreimageGenerator(PreimageGenerator):
print('Is optimization of edit costs converged:', self.__converged)
print('================================================================================')
print()
# collect return values.
# return (sod_sm, sod_gm), \
# (dis_k_sm, dis_k_gm, dis_k_gi, dis_k_gi_min, idx_dis_k_gi_min), \
# (time_fitting, time_generating)


def get_results(self):
@@ -203,20 +191,22 @@ class MedianPreimageGenerator(PreimageGenerator):
"""
if self.__fit_method == 'random': # random
if self.__ged_options['edit_cost'] == 'LETTER':
self.__edit_cost_constants = random.sample(range(1, 10), 3)
self.__edit_cost_constants = [item * 0.1 for item in self.__edit_cost_constants]
self.__edit_cost_constants = random.sample(range(1, 1000), 3)
self.__edit_cost_constants = [item * 0.001 for item in self.__edit_cost_constants]
elif self.__ged_options['edit_cost'] == 'LETTER2':
random.seed(time.time())
self.__edit_cost_constants = random.sample(range(1, 10), 5)
# self.__edit_cost_constants = [item * 0.1 for item in self.__edit_cost_constants]
self.__edit_cost_constants = random.sample(range(1, 1000), 5)
self.__edit_cost_constants = [item * 0.01 for item in self.__edit_cost_constants]
elif self.__ged_options['edit_cost'] == 'NON_SYMBOLIC':
self.__edit_cost_constants = random.sample(range(1, 10), 6)
self.__edit_cost_constants = random.sample(range(1, 1000), 6)
self.__edit_cost_constants = [item * 0.01 for item in self.__edit_cost_constants]
if self._dataset.node_attrs == []:
self.__edit_cost_constants[2] = 0
if self._dataset.edge_attrs == []:
self.__edit_cost_constants[5] = 0
else:
self.__edit_cost_constants = random.sample(range(1, 10), 6)
self.__edit_cost_constants = random.sample(range(1, 1000), 6)
self.__edit_cost_constants = [item * 0.01 for item in self.__edit_cost_constants]
if self._verbose >= 2:
print('edit cost constants used:', self.__edit_cost_constants)
elif self.__fit_method == 'expert': # expert
@@ -861,7 +851,15 @@ class MedianPreimageGenerator(PreimageGenerator):
print()

def __generate_preimage_iam(self):
def __gmg_bcu(self):
"""
The local search algorithm based on block coordinate update (BCU) for estimating a generalized median graph (GMG).

Returns
-------
None.

"""
# Set up the ged environment.
ged_env = gedlibpy.GEDEnv() # @todo: maybe create a ged_env as a private varible.
# gedlibpy.restart_env()
@@ -910,24 +908,24 @@ class MedianPreimageGenerator(PreimageGenerator):
# compute distance in kernel space for set median.
kernels_to_sm, _ = self._graph_kernel.compute(self.__set_median, self._dataset.graphs, **self._kernel_options)
kernel_sm, _ = self._graph_kernel.compute(self.__set_median, self.__set_median, **self._kernel_options)
kernels_to_sm = [kernels_to_sm[i] / np.sqrt(self.__gram_matrix_unnorm[i, i] * kernel_sm) for i in range(len(kernels_to_sm))] # normalize
if self._kernel_options['normalize']:
kernels_to_sm = [kernels_to_sm[i] / np.sqrt(self.__gram_matrix_unnorm[i, i] * kernel_sm) for i in range(len(kernels_to_sm))] # normalize
kernel_sm = 1
# @todo: not correct kernel value
gram_with_sm = np.concatenate((np.array([kernels_to_sm]), np.copy(self._graph_kernel.gram_matrix)), axis=0)
gram_with_sm = np.concatenate((np.array([[1] + kernels_to_sm]).T, gram_with_sm), axis=1)
gram_with_sm = np.concatenate((np.array([[kernel_sm] + kernels_to_sm]).T, gram_with_sm), axis=1)
self.__k_dis_set_median = compute_k_dis(0, range(1, 1+len(self._dataset.graphs)),
[1 / len(self._dataset.graphs)] * len(self._dataset.graphs),
gram_with_sm, withterm3=False)
# print(gen_median.nodes(data=True))
# print(gen_median.edges(data=True))
# print(set_median.nodes(data=True))
# print(set_median.edges(data=True))
# compute distance in kernel space for generalized median.
kernels_to_gm, _ = self._graph_kernel.compute(self.__gen_median, self._dataset.graphs, **self._kernel_options)
kernel_gm, _ = self._graph_kernel.compute(self.__gen_median, self.__gen_median, **self._kernel_options)
kernels_to_gm = [kernels_to_gm[i] / np.sqrt(self.__gram_matrix_unnorm[i, i] * kernel_gm) for i in range(len(kernels_to_gm))] # normalize
if self._kernel_options['normalize']:
kernels_to_gm = [kernels_to_gm[i] / np.sqrt(self.__gram_matrix_unnorm[i, i] * kernel_gm) for i in range(len(kernels_to_gm))] # normalize
kernel_gm = 1
gram_with_gm = np.concatenate((np.array([kernels_to_gm]), np.copy(self._graph_kernel.gram_matrix)), axis=0)
gram_with_gm = np.concatenate((np.array([[1] + kernels_to_gm]).T, gram_with_gm), axis=1)
gram_with_gm = np.concatenate((np.array([[kernel_gm] + kernels_to_gm]).T, gram_with_gm), axis=1)
self.__k_dis_gen_median = compute_k_dis(0, range(1, 1+len(self._dataset.graphs)),
[1 / len(self._dataset.graphs)] * len(self._dataset.graphs),
gram_with_gm, withterm3=False)


+ 389
- 0
gklearn/preimage/random_preimage_generator.py View File

@@ -0,0 +1,389 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Fri May 29 14:29:52 2020

@author: ljia
"""

import numpy as np
import time
import sys
from tqdm import tqdm
import multiprocessing
import networkx as nx
from multiprocessing import Pool
from functools import partial
from gklearn.preimage import PreimageGenerator
from gklearn.preimage.utils import compute_k_dis
from gklearn.utils import Timer
from gklearn.utils.utils import get_graph_kernel_by_name
# from gklearn.utils.dataset import Dataset


class RandomPreimageGenerator(PreimageGenerator):
def __init__(self, dataset=None):
PreimageGenerator.__init__(self, dataset=dataset)
# arguments to set.
self.__k = 5 # number of nearest neighbors of phi in D_N.
self.__r_max = 10 # maximum number of iterations.
self.__l = 500 # numbers of graphs generated for each graph in D_k U {g_i_hat}.
self.__alphas = None # weights of linear combinations of points in kernel space.
self.__parallel = True
self.__n_jobs = multiprocessing.cpu_count()
self.__time_limit_in_sec = 0
self.__max_itrs = 20
# values to compute.
self.__runtime_generate_preimage = None
self.__runtime_total = None
self.__preimage = None
self.__best_from_dataset = None
self.__k_dis_preimage = None
self.__k_dis_dataset = None
self.__itrs = 0
self.__converged = False # @todo
self.__num_updates = 0
# values that can be set or to be computed.
self.__gram_matrix_unnorm = None
self.__runtime_precompute_gm = None

def set_options(self, **kwargs):
self._kernel_options = kwargs.get('kernel_options', {})
self._graph_kernel = kwargs.get('graph_kernel', None)
self._verbose = kwargs.get('verbose', 2)
self.__k = kwargs.get('k', 5)
self.__r_max = kwargs.get('r_max', 10)
self.__l = kwargs.get('l', 500)
self.__alphas = kwargs.get('alphas', None)
self.__parallel = kwargs.get('parallel', True)
self.__n_jobs = kwargs.get('n_jobs', multiprocessing.cpu_count())
self.__time_limit_in_sec = kwargs.get('time_limit_in_sec', 0)
self.__max_itrs = kwargs.get('max_itrs', 20)
self.__gram_matrix_unnorm = kwargs.get('gram_matrix_unnorm', None)
self.__runtime_precompute_gm = kwargs.get('runtime_precompute_gm', None)
def run(self):
self._graph_kernel = get_graph_kernel_by_name(self._kernel_options['name'],
node_labels=self._dataset.node_labels,
edge_labels=self._dataset.edge_labels,
node_attrs=self._dataset.node_attrs,
edge_attrs=self._dataset.edge_attrs,
ds_infos=self._dataset.get_dataset_infos(keys=['directed']),
kernel_options=self._kernel_options)
# record start time.
start = time.time()
# 1. precompute gram matrix.
if self.__gram_matrix_unnorm is None:
gram_matrix, run_time = self._graph_kernel.compute(self._dataset.graphs, **self._kernel_options)
self.__gram_matrix_unnorm = self._graph_kernel.gram_matrix_unnorm
end_precompute_gm = time.time()
self.__runtime_precompute_gm = end_precompute_gm - start
else:
if self.__runtime_precompute_gm is None:
raise Exception('Parameter "runtime_precompute_gm" must be given when using pre-computed Gram matrix.')
self._graph_kernel.gram_matrix_unnorm = self.__gram_matrix_unnorm
if self._kernel_options['normalize']:
self._graph_kernel.gram_matrix = self._graph_kernel.normalize_gm(np.copy(self.__gram_matrix_unnorm))
else:
self._graph_kernel.gram_matrix = np.copy(self.__gram_matrix_unnorm)
end_precompute_gm = time.time()
start -= self.__runtime_precompute_gm
# 2. compute k nearest neighbors of phi in D_N.
if self._verbose >= 2:
print('\nstart computing k nearest neighbors of phi in D_N...\n')
D_N = self._dataset.graphs
if self.__alphas is None:
self.__alphas = [1 / len(D_N)] * len(D_N)
k_dis_list = [] # distance between g_star and each graph.
term3 = 0
for i1, a1 in enumerate(self.__alphas):
for i2, a2 in enumerate(self.__alphas):
term3 += a1 * a2 * self._graph_kernel.gram_matrix[i1, i2]
for idx in range(len(D_N)):
k_dis_list.append(compute_k_dis(idx, range(0, len(D_N)), self.__alphas, self._graph_kernel.gram_matrix, term3=term3, withterm3=True))
# sort.
sort_idx = np.argsort(k_dis_list)
dis_gs = [k_dis_list[idis] for idis in sort_idx[0:self.__k]] # the k shortest distances.
nb_best = len(np.argwhere(dis_gs == dis_gs[0]).flatten().tolist())
g0hat_list = [D_N[idx].copy() for idx in sort_idx[0:nb_best]] # the nearest neighbors of phi in D_N
self.__best_from_dataset = g0hat_list[0] # get the first best graph if there are muitlple.
self.__k_dis_dataset = dis_gs[0]
if self.__k_dis_dataset == 0: # get the exact pre-image.
end_generate_preimage = time.time()
self.__runtime_generate_preimage = end_generate_preimage - end_precompute_gm
self.__runtime_total = end_generate_preimage - start
self.__preimage = self.__best_from_dataset.copy()
self.__k_dis_preimage = self.__k_dis_dataset
if self._verbose:
print()
print('=============================================================================')
print('The exact pre-image is found from the input dataset.')
print('-----------------------------------------------------------------------------')
print('Distance in kernel space for the best graph from dataset and for preimage:', self.__k_dis_dataset)
print('Time to pre-compute Gram matrix:', self.__runtime_precompute_gm)
print('Time to generate pre-images:', self.__runtime_generate_preimage)
print('Total time:', self.__runtime_total)
print('=============================================================================')
print()
return
dhat = dis_gs[0] # the nearest distance
Gk = [D_N[ig].copy() for ig in sort_idx[0:self.__k]] # the k nearest neighbors
Gs_nearest = [nx.convert_node_labels_to_integers(g) for g in Gk] # [g.copy() for g in Gk]
# 3. start iterations.
if self._verbose >= 2:
print('starting iterations...')
gihat_list = []
dihat_list = []
r = 0
dis_of_each_itr = [dhat]
if self.__parallel:
self._kernel_options['parallel'] = None
self.__itrs = 0
self.__num_updates = 0
timer = Timer(self.__time_limit_in_sec)
while not self.__termination_criterion_met(timer, self.__itrs, r):
print('\n- r =', r)
found = False
dis_bests = dis_gs + dihat_list
# compute numbers of edges to be inserted/deleted.
# @todo what if the log is negetive? how to choose alpha (scalar)?
fdgs_list = np.array(dis_bests)
if np.min(fdgs_list) < 1: # in case the log is negetive.
fdgs_list /= np.min(fdgs_list)
fdgs_list = [int(item) for item in np.ceil(np.log(fdgs_list))]
if np.min(fdgs_list) < 1: # in case the log is smaller than 1.
fdgs_list = np.array(fdgs_list) + 1
# expand the number of modifications to increase the possiblity.
nb_vpairs_list = [nx.number_of_nodes(g) * (nx.number_of_nodes(g) - 1) for g in (Gs_nearest + gihat_list)]
nb_vpairs_min = np.min(nb_vpairs_list)
idx_fdgs_max = np.argmax(fdgs_list)
fdgs_max_old = fdgs_list[idx_fdgs_max]
fdgs_max = fdgs_max_old
nb_modif = 1
for idx, nb in enumerate(range(nb_vpairs_min, nb_vpairs_min - fdgs_max, -1)):
nb_modif *= nb / (fdgs_max - idx)
while fdgs_max < nb_vpairs_min and nb_modif < self.__l:
fdgs_max += 1
nb_modif *= (nb_vpairs_min - fdgs_max + 1) / fdgs_max
nb_increase = int(fdgs_max - fdgs_max_old)
if nb_increase > 0:
fdgs_list += 1
for ig, gs in enumerate(Gs_nearest + gihat_list):
if self._verbose >= 2:
print('-- computing', ig + 1, 'graphs out of', len(Gs_nearest) + len(gihat_list))
gnew, dhat, found = self.__generate_l_graphs(gs, fdgs_list[ig], dhat, ig, found, term3)
if found:
r = 0
gihat_list = [gnew]
dihat_list = [dhat]
else:
r += 1
dis_of_each_itr.append(dhat)
self.__itrs += 1
if self._verbose >= 2:
print('Total number of iterations is', self.__itrs, '.')
print('The preimage is updated', self.__num_updates, 'times.')
print('The shortest distances for previous iterations are', dis_of_each_itr, '.')
# get results and print.
end_generate_preimage = time.time()
self.__runtime_generate_preimage = end_generate_preimage - end_precompute_gm
self.__runtime_total = end_generate_preimage - start
self.__preimage = (g0hat_list[0] if len(gihat_list) == 0 else gihat_list[0])
self.__k_dis_preimage = dhat
if self._verbose:
print()
print('=============================================================================')
print('Finished generation of preimages.')
print('-----------------------------------------------------------------------------')
print('Distance in kernel space for the best graph from dataset:', self.__k_dis_dataset)
print('Distance in kernel space for the preimage:', self.__k_dis_preimage)
print('Total number of iterations for optimizing:', self.__itrs)
print('Total number of updating preimage:', self.__num_updates)
print('Time to pre-compute Gram matrix:', self.__runtime_precompute_gm)
print('Time to generate pre-images:', self.__runtime_generate_preimage)
print('Total time:', self.__runtime_total)
print('=============================================================================')
print()
def __generate_l_graphs(self, g_init, fdgs, dhat, ig, found, term3):
if self.__parallel:
gnew, dhat, found = self.__generate_l_graphs_parallel(g_init, fdgs, dhat, ig, found, term3)
else:
gnew, dhat, found = self.__generate_l_graphs_series(g_init, fdgs, dhat, ig, found, term3)
return gnew, dhat, found
def __generate_l_graphs_series(self, g_init, fdgs, dhat, ig, found, term3):
gnew = None
updated = False
for trial in range(0, self.__l):
if self._verbose >= 2:
print('---', trial + 1, 'trial out of', self.__l)

gtemp, dnew = self.__do_trial(g_init, fdgs, term3, trial)

# get the better graph preimage.
if dnew <= dhat: # @todo: the new distance is smaller or also equal?
if dhat - dnew > 1e-6:
if self._verbose >= 2:
print('trial =', str(trial))
print('\nI am smaller!')
print('index (as in D_k U {gihat} =', str(ig))
print('distance:', dhat, '->', dnew)
updated = True
else:
if self._verbose >= 2:
print('I am equal!')
dhat = dnew
gnew = gtemp.copy()
found = True # found better or equally good graph.
if updated:
self.__num_updates += 1
return gnew, dhat, found
def __generate_l_graphs_parallel(self, g_init, fdgs, dhat, ig, found, term3):
gnew = None
len_itr = self.__l
gnew_list = [None] * len_itr
dnew_list = [None] * len_itr
itr = range(0, len_itr)
n_jobs = multiprocessing.cpu_count()
if len_itr < 100 * n_jobs:
chunksize = int(len_itr / n_jobs) + 1
else:
chunksize = 100
do_fun = partial(self._generate_graph_parallel, g_init, fdgs, term3)
pool = Pool(processes=n_jobs)
if self._verbose >= 2:
iterator = tqdm(pool.imap_unordered(do_fun, itr, chunksize),
desc='Generating l graphs', file=sys.stdout)
else:
iterator = pool.imap_unordered(do_fun, itr, chunksize)
for idx, gnew, dnew in iterator:
gnew_list[idx] = gnew
dnew_list[idx] = dnew
pool.close()
pool.join()
# check if get the better graph preimage.
idx_min = np.argmin(dnew_list)
dnew = dnew_list[idx_min]
if dnew <= dhat: # @todo: the new distance is smaller or also equal?
if dhat - dnew > 1e-6: # @todo: use a proportion and watch out for 0.
if self._verbose >= 2:
print('I am smaller!')
print('index (as in D_k U {gihat}) =', str(ig))
print('distance:', dhat, '->', dnew, '\n')
self.__num_updates += 1
else:
if self._verbose >= 2:
print('I am equal!')
dhat = dnew
gnew = gnew_list[idx_min]
found = True # found better graph.
return gnew, dhat, found
def _generate_graph_parallel(self, g_init, fdgs, term3, itr):
trial = itr
gtemp, dnew = self.__do_trial(g_init, fdgs, term3, trial)
return trial, gtemp, dnew
def __do_trial(self, g_init, fdgs, term3, trial):
# add and delete edges.
gtemp = g_init.copy()
seed = (trial + int(time.time())) % (2 ** 32 - 1)
rdm_state = np.random.RandomState(seed=seed)
# which edges to change.
# @todo: should we use just half of the adjacency matrix for undirected graphs?
nb_vpairs = nx.number_of_nodes(g_init) * (nx.number_of_nodes(g_init) - 1)
# @todo: what if fdgs is bigger than nb_vpairs?
idx_change = rdm_state.randint(0, high=nb_vpairs, size=(fdgs if
fdgs < nb_vpairs else nb_vpairs))
# print(idx_change)
for item in idx_change:
node1 = int(item / (nx.number_of_nodes(g_init) - 1))
node2 = (item - node1 * (nx.number_of_nodes(g_init) - 1))
if node2 >= node1: # skip the self pair.
node2 += 1
# @todo: is the randomness correct?
if not gtemp.has_edge(node1, node2):
gtemp.add_edge(node1, node2)
else:
gtemp.remove_edge(node1, node2)
# compute new distances.
kernels_to_gtmp, _ = self._graph_kernel.compute(gtemp, self._dataset.graphs, **self._kernel_options)
kernel_gtmp, _ = self._graph_kernel.compute(gtemp, gtemp, **self._kernel_options)
if self._kernel_options['normalize']:
kernels_to_gtmp = [kernels_to_gtmp[i] / np.sqrt(self.__gram_matrix_unnorm[i, i] * kernel_gtmp) for i in range(len(kernels_to_gtmp))] # normalize
kernel_gtmp = 1
# @todo: not correct kernel value
gram_with_gtmp = np.concatenate((np.array([kernels_to_gtmp]), np.copy(self._graph_kernel.gram_matrix)), axis=0)
gram_with_gtmp = np.concatenate((np.array([[kernel_gtmp] + kernels_to_gtmp]).T, gram_with_gtmp), axis=1)
dnew = compute_k_dis(0, range(1, 1 + len(self._dataset.graphs)), self.__alphas, gram_with_gtmp, term3=term3, withterm3=True)
return gtemp, dnew

def get_results(self):
results = {}
results['runtime_precompute_gm'] = self.__runtime_precompute_gm
results['runtime_generate_preimage'] = self.__runtime_generate_preimage
results['runtime_total'] = self.__runtime_total
results['k_dis_dataset'] = self.__k_dis_dataset
results['k_dis_preimage'] = self.__k_dis_preimage
results['itrs'] = self.__itrs
results['num_updates'] = self.__num_updates
return results


def __termination_criterion_met(self, timer, itr, r):
if timer.expired() or (itr >= self.__max_itrs if self.__max_itrs >= 0 else False):
# if self.__state == AlgorithmState.TERMINATED:
# self.__state = AlgorithmState.INITIALIZED
return True
return (r >= self.__r_max if self.__r_max >= 0 else False)
# return converged or (itrs_without_update > self.__max_itrs_without_update if self.__max_itrs_without_update >= 0 else False)
@property
def preimage(self):
return self.__preimage
@property
def best_from_dataset(self):
return self.__best_from_dataset
@property
def gram_matrix_unnorm(self):
return self.__gram_matrix_unnorm
@gram_matrix_unnorm.setter
def gram_matrix_unnorm(self, value):
self.__gram_matrix_unnorm = value

+ 5
- 5
gklearn/preimage/utils.py View File

@@ -256,7 +256,7 @@ def generate_median_preimages_by_class(ds_name, mpg_options, kernel_options, ged
if (load_gm == 'auto' and not gmfile_exist) or not load_gm:
gram_matrix_unnorm_list.append(mpg.gram_matrix_unnorm)

# write result summary for each letter.
# write result summary for each class.
if save_results:
sod_sm_mean = np.mean(sod_sm_list)
sod_gm_mean = np.mean(sod_gm_list)
@@ -387,15 +387,15 @@ def dis_gstar(idx_g, idx_gi, alpha, Kmatrix, term3=0, withterm3=True):
return np.sqrt(term1 - term2 + term3)


def compute_k_dis(idx_g, idx_gi, alpha, Kmatrix, term3=0, withterm3=True):
def compute_k_dis(idx_g, idx_gi, alphas, Kmatrix, term3=0, withterm3=True):
term1 = Kmatrix[idx_g, idx_g]
term2 = 0
for i, a in enumerate(alpha):
for i, a in enumerate(alphas):
term2 += a * Kmatrix[idx_g, idx_gi[i]]
term2 *= 2
if withterm3 == False:
for i1, a1 in enumerate(alpha):
for i2, a2 in enumerate(alpha):
for i1, a1 in enumerate(alphas):
for i2, a2 in enumerate(alphas):
term3 += a1 * a2 * Kmatrix[idx_gi[i1], idx_gi[i2]]
return np.sqrt(term1 - term2 + term3)



+ 7
- 1
gklearn/utils/utils.py View File

@@ -300,7 +300,13 @@ def get_edge_labels(Gn, edge_label):


def get_graph_kernel_by_name(name, node_labels=None, edge_labels=None, node_attrs=None, edge_attrs=None, ds_infos=None, kernel_options={}):
if name == 'ShortestPath':
if name == 'Marginalized':
from gklearn.kernels import Marginalized
graph_kernel = Marginalized(node_labels=node_labels,
edge_labels=edge_labels,
ds_infos=ds_infos,
**kernel_options)
elif name == 'ShortestPath':
from gklearn.kernels import ShortestPath
graph_kernel = ShortestPath(node_labels=node_labels,
node_attrs=node_attrs,


Loading…
Cancel
Save