Browse Source

1. fix bugs.

2. update README.md.
3. update preimage module, class Dataset.
4. update requirements.
5. add helper function to compute Gram matrix for each class.
v0.2.x
jajupmochi 5 years ago
parent
commit
5dae69b878
12 changed files with 323 additions and 95 deletions
  1. +1
    -1
      README.md
  2. +4
    -4
      gklearn/ged/median/median_graph_estimator.py
  3. +1
    -1
      gklearn/ged/util/util.py
  4. +15
    -3
      gklearn/kernels/graph_kernel.py
  5. +66
    -41
      gklearn/preimage/median_preimage_generator.py
  6. +7
    -1
      gklearn/preimage/preimage_generator.py
  7. +92
    -35
      gklearn/preimage/utils.py
  8. +2
    -0
      gklearn/utils/__init__.py
  9. +36
    -2
      gklearn/utils/dataset.py
  10. +56
    -0
      gklearn/utils/utils.py
  11. +33
    -0
      notebooks/tests/test_tqdm.py
  12. +10
    -7
      requirements.txt

+ 1
- 1
README.md View File

@@ -4,7 +4,7 @@
[![Documentation Status](https://readthedocs.org/projects/graphkit-learn/badge/?version=master)](https://graphkit-learn.readthedocs.io/en/master/?badge=master) [![Documentation Status](https://readthedocs.org/projects/graphkit-learn/badge/?version=master)](https://graphkit-learn.readthedocs.io/en/master/?badge=master)
[![PyPI version](https://badge.fury.io/py/graphkit-learn.svg)](https://badge.fury.io/py/graphkit-learn) [![PyPI version](https://badge.fury.io/py/graphkit-learn.svg)](https://badge.fury.io/py/graphkit-learn)


A python package for graph kernels.
A python package for graph kernels, graph edit distances and graph pre-image problem.


## Requirements ## Requirements




+ 4
- 4
gklearn/ged/median/median_graph_estimator.py View File

@@ -348,7 +348,7 @@ class MedianGraphEstimator(object):
# Print information about current iteration. # Print information about current iteration.
if self.__print_to_stdout == 2: if self.__print_to_stdout == 2:
progress = tqdm(desc='\rComputing initial node maps', total=len(graph_ids), file=sys.stdout)
progress = tqdm(desc='Computing initial node maps', total=len(graph_ids), file=sys.stdout)
# Compute node maps and sum of distances for initial median. # Compute node maps and sum of distances for initial median.
self.__sum_of_distances = 0 self.__sum_of_distances = 0
@@ -457,7 +457,7 @@ class MedianGraphEstimator(object):
self.__itrs[median_pos] += 1 self.__itrs[median_pos] += 1
# Update the best median. # Update the best median.
if self.__sum_of_distances < self.__best_init_sum_of_distances:
if self.__sum_of_distances < best_sum_of_distances:
best_sum_of_distances = self.__sum_of_distances best_sum_of_distances = self.__sum_of_distances
node_maps_from_best_median = self.__node_maps_from_median node_maps_from_best_median = self.__node_maps_from_median
best_median = median best_median = median
@@ -588,7 +588,7 @@ class MedianGraphEstimator(object):
# Print information about current iteration. # Print information about current iteration.
if self.__print_to_stdout == 2: if self.__print_to_stdout == 2:
progress = tqdm(desc='\rComputing medoid', total=len(graph_ids), file=sys.stdout)
progress = tqdm(desc='Computing medoid', total=len(graph_ids), file=sys.stdout)
# Compute the medoid. # Compute the medoid.
medoid_id = graph_ids[0] medoid_id = graph_ids[0]
@@ -718,7 +718,7 @@ class MedianGraphEstimator(object):
def __update_node_maps(self): def __update_node_maps(self):
# Print information about current iteration. # Print information about current iteration.
if self.__print_to_stdout == 2: if self.__print_to_stdout == 2:
progress = tqdm(desc='\rUpdating node maps', total=len(self.__node_maps_from_median), file=sys.stdout)
progress = tqdm(desc='Updating node maps', total=len(self.__node_maps_from_median), file=sys.stdout)
# Update the node maps. # Update the node maps.
node_maps_were_modified = False node_maps_were_modified = False


+ 1
- 1
gklearn/ged/util/util.py View File

@@ -307,7 +307,7 @@ def ged_options_to_string(options):
opt_str = ' ' opt_str = ' '
for key, val in options.items(): for key, val in options.items():
if key == 'initialization_method': if key == 'initialization_method':
opt_str += '--initial_solutions ' + str(val) + ' '
opt_str += '--initialization-method ' + str(val) + ' '
elif key == 'initialization_options': elif key == 'initialization_options':
opt_str += '--initialization-options ' + str(val) + ' ' opt_str += '--initialization-options ' + str(val) + ' '
elif key == 'lower_bound_method': elif key == 'lower_bound_method':


+ 15
- 3
gklearn/kernels/graph_kernel.py View File

@@ -76,11 +76,11 @@ class GraphKernel(object):
def compute_distance_matrix(self): def compute_distance_matrix(self):
dis_mat = np.empty((len(self._graphs), len(self._graphs)))
if self._gram_matrix is None: if self._gram_matrix is None:
raise Exception('Please compute the Gram matrix before computing distance matrix.') raise Exception('Please compute the Gram matrix before computing distance matrix.')
for i in range(len(self._graphs)):
for j in range(i, len(self._graphs)):
dis_mat = np.empty((len(self._gram_matrix), len(self._gram_matrix)))
for i in range(len(self._gram_matrix)):
for j in range(i, len(self._gram_matrix)):
dis = self._gram_matrix[i, i] + self._gram_matrix[j, j] - 2 * self._gram_matrix[i, j] dis = self._gram_matrix[i, i] + self._gram_matrix[j, j] - 2 * self._gram_matrix[i, j]
if dis < 0: if dis < 0:
if dis > -1e-10: if dis > -1e-10:
@@ -184,18 +184,22 @@ class GraphKernel(object):
def parallel(self): def parallel(self):
return self._parallel return self._parallel
@property @property
def n_jobs(self): def n_jobs(self):
return self._n_jobs return self._n_jobs



@property @property
def verbose(self): def verbose(self):
return self._verbose return self._verbose
@property @property
def normalize(self): def normalize(self):
return self._normalize return self._normalize
@property @property
def run_time(self): def run_time(self):
return self._run_time return self._run_time
@@ -205,7 +209,15 @@ class GraphKernel(object):
def gram_matrix(self): def gram_matrix(self):
return self._gram_matrix return self._gram_matrix
@gram_matrix.setter
def gram_matrix(self, value):
self._gram_matrix = value
@property @property
def gram_matrix_unnorm(self): def gram_matrix_unnorm(self):
return self._gram_matrix_unnorm return self._gram_matrix_unnorm

@gram_matrix_unnorm.setter
def gram_matrix_unnorm(self, value):
self._gram_matrix_unnorm = value

+ 66
- 41
gklearn/preimage/median_preimage_generator.py View File

@@ -36,10 +36,9 @@ class MedianPreimageGenerator(PreimageGenerator):
self.__time_limit_in_sec = 0 self.__time_limit_in_sec = 0
self.__max_itrs = 100 self.__max_itrs = 100
self.__max_itrs_without_update = 3 self.__max_itrs_without_update = 3
self.__epsilon_ratio = 0.01
self.__epsilon_residual = 0.01
self.__epsilon_ec = 0.1
# values to compute. # values to compute.
self.__edit_cost_constants = []
self.__runtime_precompute_gm = None
self.__runtime_optimize_ec = None self.__runtime_optimize_ec = None
self.__runtime_generate_preimage = None self.__runtime_generate_preimage = None
self.__runtime_total = None self.__runtime_total = None
@@ -54,7 +53,11 @@ class MedianPreimageGenerator(PreimageGenerator):
self.__itrs = 0 self.__itrs = 0
self.__converged = False self.__converged = False
self.__num_updates_ecc = 0 self.__num_updates_ecc = 0
# values that can be set or to be computed.
self.__edit_cost_constants = []
self.__gram_matrix_unnorm = None
self.__runtime_precompute_gm = None

def set_options(self, **kwargs): def set_options(self, **kwargs):
self._kernel_options = kwargs.get('kernel_options', {}) self._kernel_options = kwargs.get('kernel_options', {})
@@ -71,7 +74,10 @@ class MedianPreimageGenerator(PreimageGenerator):
self.__time_limit_in_sec = kwargs.get('time_limit_in_sec', 0) self.__time_limit_in_sec = kwargs.get('time_limit_in_sec', 0)
self.__max_itrs = kwargs.get('max_itrs', 100) self.__max_itrs = kwargs.get('max_itrs', 100)
self.__max_itrs_without_update = kwargs.get('max_itrs_without_update', 3) self.__max_itrs_without_update = kwargs.get('max_itrs_without_update', 3)
self.__epsilon_ratio = kwargs.get('epsilon_ratio', 0.01)
self.__epsilon_residual = kwargs.get('epsilon_residual', 0.01)
self.__epsilon_ec = kwargs.get('epsilon_ec', 0.1)
self.__gram_matrix_unnorm = kwargs.get('gram_matrix_unnorm', None)
self.__runtime_precompute_gm = kwargs.get('runtime_precompute_gm', None)
def run(self): def run(self):
@@ -81,9 +87,18 @@ class MedianPreimageGenerator(PreimageGenerator):
start = time.time() start = time.time()
# 1. precompute gram matrix. # 1. precompute gram matrix.
gram_matrix, run_time = self.__graph_kernel.compute(self._dataset.graphs, **self._kernel_options)
end_precompute_gm = time.time()
self.__runtime_precompute_gm = end_precompute_gm - start
if self.__gram_matrix_unnorm is None:
gram_matrix, run_time = self._graph_kernel.compute(self._dataset.graphs, **self._kernel_options)
self.__gram_matrix_unnorm = self._graph_kernel.gram_matrix_unnorm
end_precompute_gm = time.time()
self.__runtime_precompute_gm = end_precompute_gm - start
else:
if self.__runtime_precompute_gm is None:
raise Exception('Parameter "runtime_precompute_gm" must be given when using pre-computed Gram matrix.')
self._graph_kernel.gram_matrix_unnorm = self.__gram_matrix_unnorm
self._graph_kernel.gram_matrix = self._graph_kernel.normalize_gm(np.copy(self.__gram_matrix_unnorm))
end_precompute_gm = time.time()
start -= self.__runtime_precompute_gm
# 2. optimize edit cost constants. # 2. optimize edit cost constants.
self.__optimize_edit_cost_constants() self.__optimize_edit_cost_constants()
@@ -134,6 +149,7 @@ class MedianPreimageGenerator(PreimageGenerator):
print('Total number of updating edit costs:', self.__num_updates_ecc) print('Total number of updating edit costs:', self.__num_updates_ecc)
print('Is optimization of edit costs converged:', self.__converged) print('Is optimization of edit costs converged:', self.__converged)
print('================================================================================') print('================================================================================')
print()
# collect return values. # collect return values.
# return (sod_sm, sod_gm), \ # return (sod_sm, sod_gm), \
@@ -222,7 +238,7 @@ class MedianPreimageGenerator(PreimageGenerator):
def __optimize_ecc_by_kernel_distances(self): def __optimize_ecc_by_kernel_distances(self):
# compute distances in feature space. # compute distances in feature space.
dis_k_mat, _, _, _ = self.__graph_kernel.compute_distance_matrix()
dis_k_mat, _, _, _ = self._graph_kernel.compute_distance_matrix()
dis_k_vec = [] dis_k_vec = []
for i in range(len(dis_k_mat)): for i in range(len(dis_k_mat)):
# for j in range(i, len(dis_k_mat)): # for j in range(i, len(dis_k_mat)):
@@ -256,7 +272,7 @@ class MedianPreimageGenerator(PreimageGenerator):
timer = Timer(self.__time_limit_in_sec) timer = Timer(self.__time_limit_in_sec)
while not self.__termination_criterion_met(self.__converged, timer, self.__itrs, itrs_without_update): while not self.__termination_criterion_met(self.__converged, timer, self.__itrs, itrs_without_update):
if self._verbose >= 2: if self._verbose >= 2:
print('\niteration', self.__itrs)
print('\niteration', self.__itrs + 1)
time0 = time.time() time0 = time.time()
# "fit" geds to distances in feature space by tuning edit costs using theLeast Squares Method. # "fit" geds to distances in feature space by tuning edit costs using theLeast Squares Method.
# np.savez('results/xp_fit_method/fit_data_debug' + str(self.__itrs) + '.gm', # np.savez('results/xp_fit_method/fit_data_debug' + str(self.__itrs) + '.gm',
@@ -286,21 +302,21 @@ class MedianPreimageGenerator(PreimageGenerator):
# check convergency. # check convergency.
ec_changed = False ec_changed = False
for i, cost in enumerate(self.__edit_cost_constants): for i, cost in enumerate(self.__edit_cost_constants):
# if cost == 0:
# if edit_cost_list[-2][i] > self.__epsilon_ratio:
# ec_changed = True
# break
# elif abs(cost - edit_cost_list[-2][i]) / cost > self.__epsilon_ratio:
# ec_changed = True
# break
if abs(cost - edit_cost_list[-2][i]) > self.__epsilon_ratio:
if cost == 0:
if edit_cost_list[-2][i] > self.__epsilon_ec:
ec_changed = True
break
elif abs(cost - edit_cost_list[-2][i]) / cost > self.__epsilon_ec:
ec_changed = True ec_changed = True
break break
# if abs(cost - edit_cost_list[-2][i]) > self.__epsilon_ec:
# ec_changed = True
# break
residual_changed = False residual_changed = False
if residual_list[-1] == 0: if residual_list[-1] == 0:
if residual_list[-2] > self.__epsilon_ratio:
if residual_list[-2] > self.__epsilon_residual:
residual_changed = True residual_changed = True
elif abs(residual_list[-1] - residual_list[-2]) / residual_list[-1] > self.__epsilon_ratio:
elif abs(residual_list[-1] - residual_list[-2]) / residual_list[-1] > self.__epsilon_residual:
residual_changed = True residual_changed = True
self.__converged = not (ec_changed or residual_changed) self.__converged = not (ec_changed or residual_changed)
if self.__converged: if self.__converged:
@@ -313,14 +329,14 @@ class MedianPreimageGenerator(PreimageGenerator):
if self._verbose >= 2: if self._verbose >= 2:
print() print()
print('-------------------------------------------------------------------------') print('-------------------------------------------------------------------------')
print('States of iteration', str(self.__itrs))
print('States of iteration', self.__itrs + 1)
print('-------------------------------------------------------------------------') print('-------------------------------------------------------------------------')
# print('Time spend:', self.__runtime_optimize_ec) # print('Time spend:', self.__runtime_optimize_ec)
print('Total number of iterations for optimizing:', self.__itrs)
print('Total number of iterations for optimizing:', self.__itrs + 1)
print('Total number of updating edit costs:', self.__num_updates_ecc) print('Total number of updating edit costs:', self.__num_updates_ecc)
print('Is optimization of edit costs converged:', self.__converged)
print('Does edit cost changed:', ec_changed)
print('Does residual changed:', residual_changed)
print('Was optimization of edit costs converged:', self.__converged)
print('Did edit costs change:', ec_changed)
print('Did residual change:', residual_changed)
print('Iterations without update:', itrs_without_update) print('Iterations without update:', itrs_without_update)
print('Current edit cost constants:', self.__edit_cost_constants) print('Current edit cost constants:', self.__edit_cost_constants)
print('Residual list:', residual_list) print('Residual list:', residual_list)
@@ -634,11 +650,11 @@ class MedianPreimageGenerator(PreimageGenerator):
def __compute_distances_to_true_median(self): def __compute_distances_to_true_median(self):
# compute distance in kernel space for set median. # compute distance in kernel space for set median.
kernels_to_sm, _ = self.__graph_kernel.compute(self.__set_median, self._dataset.graphs, **self._kernel_options)
kernel_sm, _ = self.__graph_kernel.compute(self.__set_median, self.__set_median, **self._kernel_options)
kernels_to_sm = [kernels_to_sm[i] / np.sqrt(self.__graph_kernel.gram_matrix_unnorm[i, i] * kernel_sm) for i in range(len(kernels_to_sm))] # normalize
kernels_to_sm, _ = self._graph_kernel.compute(self.__set_median, self._dataset.graphs, **self._kernel_options)
kernel_sm, _ = self._graph_kernel.compute(self.__set_median, self.__set_median, **self._kernel_options)
kernels_to_sm = [kernels_to_sm[i] / np.sqrt(self.__gram_matrix_unnorm[i, i] * kernel_sm) for i in range(len(kernels_to_sm))] # normalize
# @todo: not correct kernel value # @todo: not correct kernel value
gram_with_sm = np.concatenate((np.array([kernels_to_sm]), np.copy(self.__graph_kernel.gram_matrix)), axis=0)
gram_with_sm = np.concatenate((np.array([kernels_to_sm]), np.copy(self._graph_kernel.gram_matrix)), axis=0)
gram_with_sm = np.concatenate((np.array([[1] + kernels_to_sm]).T, gram_with_sm), axis=1) gram_with_sm = np.concatenate((np.array([[1] + kernels_to_sm]).T, gram_with_sm), axis=1)
self.__k_dis_set_median = compute_k_dis(0, range(1, 1+len(self._dataset.graphs)), self.__k_dis_set_median = compute_k_dis(0, range(1, 1+len(self._dataset.graphs)),
[1 / len(self._dataset.graphs)] * len(self._dataset.graphs), [1 / len(self._dataset.graphs)] * len(self._dataset.graphs),
@@ -649,10 +665,10 @@ class MedianPreimageGenerator(PreimageGenerator):
# print(set_median.edges(data=True)) # print(set_median.edges(data=True))
# compute distance in kernel space for generalized median. # compute distance in kernel space for generalized median.
kernels_to_gm, _ = self.__graph_kernel.compute(self.__gen_median, self._dataset.graphs, **self._kernel_options)
kernel_gm, _ = self.__graph_kernel.compute(self.__gen_median, self.__gen_median, **self._kernel_options)
kernels_to_gm = [kernels_to_gm[i] / np.sqrt(self.__graph_kernel.gram_matrix_unnorm[i, i] * kernel_gm) for i in range(len(kernels_to_gm))] # normalize
gram_with_gm = np.concatenate((np.array([kernels_to_gm]), np.copy(self.__graph_kernel.gram_matrix)), axis=0)
kernels_to_gm, _ = self._graph_kernel.compute(self.__gen_median, self._dataset.graphs, **self._kernel_options)
kernel_gm, _ = self._graph_kernel.compute(self.__gen_median, self.__gen_median, **self._kernel_options)
kernels_to_gm = [kernels_to_gm[i] / np.sqrt(self.__gram_matrix_unnorm[i, i] * kernel_gm) for i in range(len(kernels_to_gm))] # normalize
gram_with_gm = np.concatenate((np.array([kernels_to_gm]), np.copy(self._graph_kernel.gram_matrix)), axis=0)
gram_with_gm = np.concatenate((np.array([[1] + kernels_to_gm]).T, gram_with_gm), axis=1) gram_with_gm = np.concatenate((np.array([[1] + kernels_to_gm]).T, gram_with_gm), axis=1)
self.__k_dis_gen_median = compute_k_dis(0, range(1, 1+len(self._dataset.graphs)), self.__k_dis_gen_median = compute_k_dis(0, range(1, 1+len(self._dataset.graphs)),
[1 / len(self._dataset.graphs)] * len(self._dataset.graphs), [1 / len(self._dataset.graphs)] * len(self._dataset.graphs),
@@ -679,12 +695,12 @@ class MedianPreimageGenerator(PreimageGenerator):
def __set_graph_kernel_by_name(self): def __set_graph_kernel_by_name(self):
if self.kernel_options['name'] == 'structuralspkernel': if self.kernel_options['name'] == 'structuralspkernel':
from gklearn.kernels import StructuralSP from gklearn.kernels import StructuralSP
self.__graph_kernel = StructuralSP(node_labels=self.dataset.node_labels,
edge_labels=self.dataset.edge_labels,
node_attrs=self.dataset.node_attrs,
edge_attrs=self.dataset.edge_attrs,
ds_infos=self.dataset.get_dataset_infos(keys=['directed']),
**self.kernel_options)
self._graph_kernel = StructuralSP(node_labels=self._dataset.node_labels,
edge_labels=self._dataset.edge_labels,
node_attrs=self._dataset.node_attrs,
edge_attrs=self._dataset.edge_attrs,
ds_infos=self._dataset.get_dataset_infos(keys=['directed']),
**self._kernel_options)
# def __clean_graph(self, G, node_labels=[], edge_labels=[], node_attrs=[], edge_attrs=[]): # def __clean_graph(self, G, node_labels=[], edge_labels=[], node_attrs=[], edge_attrs=[]):
@@ -692,7 +708,7 @@ class MedianPreimageGenerator(PreimageGenerator):
""" """
Cleans node and edge labels and attributes of the given graph. Cleans node and edge labels and attributes of the given graph.
""" """
G_new = nx.Graph()
G_new = nx.Graph(**G.graph)
for nd, attrs in G.nodes(data=True): for nd, attrs in G.nodes(data=True):
G_new.add_node(str(nd)) # @todo: should we keep this as str()? G_new.add_node(str(nd)) # @todo: should we keep this as str()?
for l_name in self._dataset.node_labels: for l_name in self._dataset.node_labels:
@@ -760,4 +776,13 @@ class MedianPreimageGenerator(PreimageGenerator):
@property @property
def best_from_dataset(self): def best_from_dataset(self):
return self.__best_from_dataset
return self.__best_from_dataset
@property
def gram_matrix_unnorm(self):
return self.__gram_matrix_unnorm
@gram_matrix_unnorm.setter
def gram_matrix_unnorm(self, value):
self.__gram_matrix_unnorm = value

+ 7
- 1
gklearn/preimage/preimage_generator.py View File

@@ -5,7 +5,7 @@ Created on Thu Mar 26 18:26:36 2020


@author: ljia @author: ljia
""" """
from gklearn.utils import Dataset
# from gklearn.utils import Dataset


class PreimageGenerator(object): class PreimageGenerator(object):
@@ -32,6 +32,11 @@ class PreimageGenerator(object):
@kernel_options.setter @kernel_options.setter
def kernel_options(self, value): def kernel_options(self, value):
self._kernel_options = value self._kernel_options = value


@property
def graph_kernel(self):
return self._graph_kernel
@property @property
@@ -41,3 +46,4 @@ class PreimageGenerator(object):
@verbose.setter @verbose.setter
def verbose(self, value): def verbose(self, value):
self._verbose = value self._verbose = value


+ 92
- 35
gklearn/preimage/utils.py View File

@@ -21,21 +21,23 @@ from gklearn.kernels.treeletKernel import treeletkernel
from gklearn.kernels.weisfeilerLehmanKernel import weisfeilerlehmankernel from gklearn.kernels.weisfeilerLehmanKernel import weisfeilerlehmankernel
from gklearn.utils import Dataset from gklearn.utils import Dataset
import csv import csv
import matplotlib.pyplot as plt
import networkx as nx import networkx as nx




def generate_median_preimage_by_class(ds_name, mpg_options, kernel_options, ged_options, mge_options, save_results=True, save_medians=True, plot_medians=True, dir_save='', ):
def generate_median_preimages_by_class(ds_name, mpg_options, kernel_options, ged_options, mge_options, save_results=True, save_medians=True, plot_medians=True, load_gm='auto', dir_save='', irrelevant_labels=None):
import os.path
from gklearn.preimage import MedianPreimageGenerator from gklearn.preimage import MedianPreimageGenerator
from gklearn.utils import split_dataset_by_target from gklearn.utils import split_dataset_by_target
from gklearn.utils.graphfiles import saveGXL from gklearn.utils.graphfiles import saveGXL
# 1. get dataset. # 1. get dataset.
print('getting dataset...')
print('1. getting dataset...')
dataset_all = Dataset() dataset_all = Dataset()
dataset_all.load_predefined_dataset(ds_name) dataset_all.load_predefined_dataset(ds_name)
if not irrelevant_labels is None:
dataset_all.remove_labels(**irrelevant_labels)
# dataset_all.cut_graphs(range(0, 100))
datasets = split_dataset_by_target(dataset_all) datasets = split_dataset_by_target(dataset_all)
# dataset.cut_graphs(range(0, 10))


if save_results: if save_results:
# create result files. # create result files.
@@ -47,7 +49,6 @@ def generate_median_preimage_by_class(ds_name, mpg_options, kernel_options, ged_
dis_k_sm_list = [] dis_k_sm_list = []
dis_k_gm_list = [] dis_k_gm_list = []
dis_k_gi_min_list = [] dis_k_gi_min_list = []
time_precompute_gm_list = []
time_optimize_ec_list = [] time_optimize_ec_list = []
time_generate_list = [] time_generate_list = []
time_total_list = [] time_total_list = []
@@ -58,6 +59,26 @@ def generate_median_preimage_by_class(ds_name, mpg_options, kernel_options, ged_
nb_dis_k_sm2gm = [0, 0, 0] nb_dis_k_sm2gm = [0, 0, 0]
nb_dis_k_gi2sm = [0, 0, 0] nb_dis_k_gi2sm = [0, 0, 0]
nb_dis_k_gi2gm = [0, 0, 0] nb_dis_k_gi2gm = [0, 0, 0]
dis_k_max_list = []
dis_k_min_list = []
dis_k_mean_list = []
if load_gm == 'auto':
gm_fname = dir_save + 'gram_matrix_unnorm.' + ds_name + '.' + kernel_options['name'] + '.gm.npz'
gmfile_exist = os.path.isfile(os.path.abspath(gm_fname))
if gmfile_exist:
gmfile = np.load(gm_fname)
gram_matrix_unnorm_list = gmfile['gram_matrix_unnorm_list']
time_precompute_gm_list = gmfile['run_time_list'].tolist()
else:
gram_matrix_unnorm_list = []
time_precompute_gm_list = []
elif not load_gm:
gram_matrix_unnorm_list = []
time_precompute_gm_list = []
else:
gmfile = np.load()
gram_matrix_unnorm_list = gmfile['gram_matrix_unnorm_list']
time_precompute_gm_list = gmfile['run_time_list']
# repeats_better_sod_sm2gm = [] # repeats_better_sod_sm2gm = []
# repeats_better_dis_k_sm2gm = [] # repeats_better_dis_k_sm2gm = []
# repeats_better_dis_k_gi2sm = [] # repeats_better_dis_k_gi2sm = []
@@ -65,16 +86,23 @@ def generate_median_preimage_by_class(ds_name, mpg_options, kernel_options, ged_
print('start generating preimage for each class of target...') print('start generating preimage for each class of target...')
for dataset in datasets:
print('\ntarget =', dataset.targets[0], '\n')
num_graphs = len(dataset.graphs)
for idx, dataset in enumerate(datasets):
target = dataset.targets[0]
print('\ntarget =', target, '\n')
# if target != 1:
# continue
num_graphs = len(dataset.graphs)
if num_graphs < 2: if num_graphs < 2:
print('\nnumber of graphs = ', num_graphs, ', skip.\n') print('\nnumber of graphs = ', num_graphs, ', skip.\n')
continue continue
# 2. set parameters. # 2. set parameters.
print('1. initializing mpg and setting parameters...')
print('2. initializing mpg and setting parameters...')
if load_gm:
if gmfile_exist:
mpg_options['gram_matrix_unnorm'] = gram_matrix_unnorm_list[idx]
mpg_options['runtime_precompute_gm'] = time_precompute_gm_list[idx]
mpg = MedianPreimageGenerator() mpg = MedianPreimageGenerator()
mpg.dataset = dataset mpg.dataset = dataset
mpg.set_options(**mpg_options.copy()) mpg.set_options(**mpg_options.copy())
@@ -83,10 +111,19 @@ def generate_median_preimage_by_class(ds_name, mpg_options, kernel_options, ged_
mpg.mge_options = mge_options.copy() mpg.mge_options = mge_options.copy()


# 3. compute median preimage. # 3. compute median preimage.
print('2. computing median preimage...')
print('3. computing median preimage...')
mpg.run() mpg.run()
results = mpg.get_results() results = mpg.get_results()
# 4. compute pairwise kernel distances.
print('4. computing pairwise kernel distances...')
_, dis_k_max, dis_k_min, dis_k_mean = mpg.graph_kernel.compute_distance_matrix()
dis_k_max_list.append(dis_k_max)
dis_k_min_list.append(dis_k_min)
dis_k_mean_list.append(dis_k_mean)
# 5. save results (and median graphs).
print('5. saving results (and median graphs)...')
# write result detail. # write result detail.
if save_results: if save_results:
print('writing results to files...') print('writing results to files...')
@@ -99,7 +136,7 @@ def generate_median_preimage_by_class(ds_name, mpg_options, kernel_options, ged_
csv.writer(f_detail).writerow([ds_name, kernel_options['name'], csv.writer(f_detail).writerow([ds_name, kernel_options['name'],
ged_options['edit_cost'], ged_options['method'], ged_options['edit_cost'], ged_options['method'],
ged_options['attr_distance'], mpg_options['fit_method'], ged_options['attr_distance'], mpg_options['fit_method'],
num_graphs, dataset.targets[0], 1,
num_graphs, target, 1,
results['sod_set_median'], results['sod_gen_median'], results['sod_set_median'], results['sod_gen_median'],
results['k_dis_set_median'], results['k_dis_gen_median'], results['k_dis_set_median'], results['k_dis_gen_median'],
results['k_dis_dataset'], sod_sm2gm, dis_k_sm2gm, results['k_dis_dataset'], sod_sm2gm, dis_k_sm2gm,
@@ -161,7 +198,7 @@ def generate_median_preimage_by_class(ds_name, mpg_options, kernel_options, ged_
csv.writer(f_summary).writerow([ds_name, kernel_options['name'], csv.writer(f_summary).writerow([ds_name, kernel_options['name'],
ged_options['edit_cost'], ged_options['method'], ged_options['edit_cost'], ged_options['method'],
ged_options['attr_distance'], mpg_options['fit_method'], ged_options['attr_distance'], mpg_options['fit_method'],
num_graphs, dataset.targets[0],
num_graphs, target,
results['sod_set_median'], results['sod_gen_median'], results['sod_set_median'], results['sod_gen_median'],
results['k_dis_set_median'], results['k_dis_gen_median'], results['k_dis_set_median'], results['k_dis_gen_median'],
results['k_dis_dataset'], sod_sm2gm, dis_k_sm2gm, results['k_dis_dataset'], sod_sm2gm, dis_k_sm2gm,
@@ -175,17 +212,18 @@ def generate_median_preimage_by_class(ds_name, mpg_options, kernel_options, ged_
# save median graphs. # save median graphs.
if save_medians: if save_medians:
fn_pre_sm = dir_save + 'medians/set_median.' + mpg_options['fit_method'] + '.k' + str(num_graphs) + '.y' + str(dataset.targets[0]) + '.repeat' + str(1)
print('Saving median graphs to files...')
fn_pre_sm = dir_save + 'medians/set_median.' + mpg_options['fit_method'] + '.nbg' + str(num_graphs) + '.y' + str(target) + '.repeat' + str(1)
saveGXL(mpg.set_median, fn_pre_sm + '.gxl', method='default', saveGXL(mpg.set_median, fn_pre_sm + '.gxl', method='default',
node_labels=dataset.node_labels, edge_labels=dataset.edge_labels,
node_labels=dataset.node_labels, edge_labels=dataset.edge_labels,
node_attrs=dataset.node_attrs, edge_attrs=dataset.edge_attrs) node_attrs=dataset.node_attrs, edge_attrs=dataset.edge_attrs)
fn_pre_gm = dir_save + 'medians/gen_median.' + mpg_options['fit_method'] + '.k' + str(num_graphs) + '.y' + str(dataset.targets[0]) + '.repeat' + str(1)
fn_pre_gm = dir_save + 'medians/gen_median.' + mpg_options['fit_method'] + '.nbg' + str(num_graphs) + '.y' + str(target) + '.repeat' + str(1)
saveGXL(mpg.gen_median, fn_pre_gm + '.gxl', method='default', saveGXL(mpg.gen_median, fn_pre_gm + '.gxl', method='default',
node_labels=dataset.node_labels, edge_labels=dataset.edge_labels,
node_labels=dataset.node_labels, edge_labels=dataset.edge_labels,
node_attrs=dataset.node_attrs, edge_attrs=dataset.edge_attrs) node_attrs=dataset.node_attrs, edge_attrs=dataset.edge_attrs)
fn_best_dataset = dir_save + 'medians/g_best_dataset.' + mpg_options['fit_method'] + '.k' + str(num_graphs) + '.y' + str(dataset.targets[0]) + '.repeat' + str(1)
fn_best_dataset = dir_save + 'medians/g_best_dataset.' + mpg_options['fit_method'] + '.nbg' + str(num_graphs) + '.y' + str(target) + '.repeat' + str(1)
saveGXL(mpg.best_from_dataset, fn_best_dataset + '.gxl', method='default', saveGXL(mpg.best_from_dataset, fn_best_dataset + '.gxl', method='default',
node_labels=dataset.node_labels, edge_labels=dataset.edge_labels,
node_labels=dataset.node_labels, edge_labels=dataset.edge_labels,
node_attrs=dataset.node_attrs, edge_attrs=dataset.edge_attrs) node_attrs=dataset.node_attrs, edge_attrs=dataset.edge_attrs)
# plot median graphs. # plot median graphs.
@@ -194,7 +232,9 @@ def generate_median_preimage_by_class(ds_name, mpg_options, kernel_options, ged_
draw_Letter_graph(mpg.set_median, fn_pre_sm) draw_Letter_graph(mpg.set_median, fn_pre_sm)
draw_Letter_graph(mpg.gen_median, fn_pre_gm) draw_Letter_graph(mpg.gen_median, fn_pre_gm)
draw_Letter_graph(mpg.best_from_dataset, fn_best_dataset) draw_Letter_graph(mpg.best_from_dataset, fn_best_dataset)
if (load_gm == 'auto' and not gmfile_exist) or not load_gm:
gram_matrix_unnorm_list.append(mpg.gram_matrix_unnorm)


# write result summary for each letter. # write result summary for each letter.
if save_results: if save_results:
@@ -227,6 +267,18 @@ def generate_median_preimage_by_class(ds_name, mpg_options, kernel_options, ged_
num_converged, num_updates_ecc_mean]) num_converged, num_updates_ecc_mean])
f_summary.close() f_summary.close()
# save total pairwise kernel distances.
dis_k_max = np.max(dis_k_max_list)
dis_k_min = np.min(dis_k_min_list)
dis_k_mean = np.mean(dis_k_mean_list)
print('The maximum pairwise distance in kernel space:', dis_k_max)
print('The minimum pairwise distance in kernel space:', dis_k_min)
print('The average pairwise distance in kernel space:', dis_k_mean)
# write Gram matrices to file.
if (load_gm == 'auto' and not gmfile_exist) or not load_gm:
np.savez(dir_save + 'gram_matrix_unnorm.' + ds_name + '.' + kernel_options['name'] + '.gm', gram_matrix_unnorm_list=gram_matrix_unnorm_list, run_time_list=time_precompute_gm_list)

print('\ncomplete.') print('\ncomplete.')


@@ -235,7 +287,7 @@ def __init_output_file(ds_name, gkernel, fit_method, dir_output):
fn_output_detail = 'results_detail.' + ds_name + '.' + gkernel + '.csv' fn_output_detail = 'results_detail.' + ds_name + '.' + gkernel + '.csv'
f_detail = open(dir_output + fn_output_detail, 'a') f_detail = open(dir_output + fn_output_detail, 'a')
csv.writer(f_detail).writerow(['dataset', 'graph kernel', 'edit cost', csv.writer(f_detail).writerow(['dataset', 'graph kernel', 'edit cost',
'GED method', 'attr distance', 'fit method', 'k',
'GED method', 'attr distance', 'fit method', 'num graphs',
'target', 'repeat', 'SOD SM', 'SOD GM', 'dis_k SM', 'dis_k GM', 'target', 'repeat', 'SOD SM', 'SOD GM', 'dis_k SM', 'dis_k GM',
'min dis_k gi', 'SOD SM -> GM', 'dis_k SM -> GM', 'dis_k gi -> SM', 'min dis_k gi', 'SOD SM -> GM', 'dis_k SM -> GM', 'dis_k gi -> SM',
'dis_k gi -> GM', 'edit cost constants', 'time precompute gm', 'dis_k gi -> GM', 'edit cost constants', 'time precompute gm',
@@ -247,7 +299,7 @@ def __init_output_file(ds_name, gkernel, fit_method, dir_output):
fn_output_summary = 'results_summary.' + ds_name + '.' + gkernel + '.csv' fn_output_summary = 'results_summary.' + ds_name + '.' + gkernel + '.csv'
f_summary = open(dir_output + fn_output_summary, 'a') f_summary = open(dir_output + fn_output_summary, 'a')
csv.writer(f_summary).writerow(['dataset', 'graph kernel', 'edit cost', csv.writer(f_summary).writerow(['dataset', 'graph kernel', 'edit cost',
'GED method', 'attr distance', 'fit method', 'k',
'GED method', 'attr distance', 'fit method', 'num graphs',
'target', 'SOD SM', 'SOD GM', 'dis_k SM', 'dis_k GM', 'target', 'SOD SM', 'SOD GM', 'dis_k SM', 'dis_k GM',
'min dis_k gi', 'SOD SM -> GM', 'dis_k SM -> GM', 'dis_k gi -> SM', 'min dis_k gi', 'SOD SM -> GM', 'dis_k SM -> GM', 'dis_k gi -> SM',
'dis_k gi -> GM', 'time precompute gm', 'time optimize ec', 'dis_k gi -> GM', 'time precompute gm', 'time optimize ec',
@@ -263,24 +315,28 @@ def __init_output_file(ds_name, gkernel, fit_method, dir_output):




def get_relations(sign): def get_relations(sign):
if sign == -1:
return 'better'
elif sign == 0:
return 'same'
elif sign == 1:
return 'worse'
if sign == -1:
return 'better'
elif sign == 0:
return 'same'
elif sign == 1:
return 'worse'
#Dessin median courrant #Dessin median courrant
def draw_Letter_graph(graph, file_prefix): def draw_Letter_graph(graph, file_prefix):
plt.figure()
pos = {}
for n in graph.nodes:
pos[n] = np.array([float(graph.node[n]['x']),float(graph.node[n]['y'])])
nx.draw_networkx(graph, pos)
plt.savefig(file_prefix + '.eps', format='eps', dpi=300)
# plt.show()
plt.clf()
import matplotlib
matplotlib.use('agg')
import matplotlib.pyplot as plt
plt.figure()
pos = {}
for n in graph.nodes:
pos[n] = np.array([float(graph.nodes[n]['x']),float(graph.nodes[n]['y'])])
nx.draw_networkx(graph, pos)
plt.savefig(file_prefix + '.eps', format='eps', dpi=300)
# plt.show()
plt.clf()
plt.close()




def remove_edges(Gn): def remove_edges(Gn):
@@ -288,6 +344,7 @@ def remove_edges(Gn):
for _, _, attrs in G.edges(data=True): for _, _, attrs in G.edges(data=True):
attrs.clear() attrs.clear()
def dis_gstar(idx_g, idx_gi, alpha, Kmatrix, term3=0, withterm3=True): def dis_gstar(idx_g, idx_gi, alpha, Kmatrix, term3=0, withterm3=True):
term1 = Kmatrix[idx_g, idx_g] term1 = Kmatrix[idx_g, idx_g]
term2 = 0 term2 = 0


+ 2
- 0
gklearn/utils/__init__.py View File

@@ -17,3 +17,5 @@ __date__ = "November 2017"
# from utils import utils # from utils import utils
from gklearn.utils.dataset import Dataset, split_dataset_by_target from gklearn.utils.dataset import Dataset, split_dataset_by_target
from gklearn.utils.timer import Timer from gklearn.utils.timer import Timer
from gklearn.utils.utils import get_graph_kernel_by_name
from gklearn.utils.utils import compute_gram_matrices_by_class

+ 36
- 2
gklearn/utils/dataset.py View File

@@ -56,9 +56,10 @@ class Dataset(object):
def load_graphs(self, graphs, targets=None): def load_graphs(self, graphs, targets=None):
# this has to be followed by set_labels().
self.__graphs = graphs self.__graphs = graphs
self.__targets = targets self.__targets = targets
self.set_labels_attrs()
# self.set_labels_attrs()
def load_predefined_dataset(self, ds_name): def load_predefined_dataset(self, ds_name):
@@ -94,6 +95,13 @@ class Dataset(object):
self.set_labels_attrs() self.set_labels_attrs()

def set_labels(self, node_labels=[], node_attrs=[], edge_labels=[], edge_attrs=[]):
self.__node_labels = node_labels
self.__node_attrs = node_attrs
self.__edge_labels = edge_labels
self.__edge_attrs = edge_attrs

def set_labels_attrs(self, node_labels=None, node_attrs=None, edge_labels=None, edge_attrs=None): def set_labels_attrs(self, node_labels=None, node_attrs=None, edge_labels=None, edge_attrs=None):
# @todo: remove labels which have only one possible values. # @todo: remove labels which have only one possible values.
@@ -371,9 +379,34 @@ class Dataset(object):
print(OrderedDict(sorted(infos.items(), key=lambda i: keys.index(i[0])))) print(OrderedDict(sorted(infos.items(), key=lambda i: keys.index(i[0]))))
def remove_labels(self, node_labels=[], edge_labels=[], node_attrs=[], edge_attrs=[]):
for g in self.__graphs:
for nd in g.nodes():
for nl in node_labels:
del g.nodes[nd][nl]
for na in node_attrs:
del g.nodes[nd][na]
for ed in g.edges():
for el in edge_labels:
del g.edges[ed][el]
for ea in edge_attrs:
del g.edges[ed][ea]
if len(node_labels) > 0:
self.__node_labels = [nl for nl in self.__node_labels if nl not in node_labels]
if len(edge_labels) > 0:
self.__edge_labels = [el for el in self.__edge_labels if el not in edge_labels]
if len(node_attrs) > 0:
self.__node_attrs = [na for na in self.__node_attrs if na not in node_attrs]
if len(edge_attrs) > 0:
self.__edge_attrs = [ea for ea in self.__edge_attrs if ea not in edge_attrs]
def cut_graphs(self, range_): def cut_graphs(self, range_):
self.__graphs = [self.__graphs[i] for i in range_] self.__graphs = [self.__graphs[i] for i in range_]
self.set_labels_attrs()
if self.__targets is not None:
self.__targets = [self.__targets[i] for i in range_]
# @todo
# self.set_labels_attrs()
def __get_dataset_size(self): def __get_dataset_size(self):
@@ -574,5 +607,6 @@ def split_dataset_by_target(dataset):
sub_graphs = [graphs[i] for i in val] sub_graphs = [graphs[i] for i in val]
sub_dataset = Dataset() sub_dataset = Dataset()
sub_dataset.load_graphs(sub_graphs, [key] * len(val)) sub_dataset.load_graphs(sub_graphs, [key] * len(val))
sub_dataset.set_labels(node_labels=dataset.node_labels, node_attrs=dataset.node_attrs, edge_labels=dataset.edge_labels, edge_attrs=dataset.edge_attrs)
datasets.append(sub_dataset) datasets.append(sub_dataset)
return datasets return datasets

+ 56
- 0
gklearn/utils/utils.py View File

@@ -296,3 +296,59 @@ def get_edge_labels(Gn, edge_label):
for G in Gn: for G in Gn:
el = el | set(nx.get_edge_attributes(G, edge_label).values()) el = el | set(nx.get_edge_attributes(G, edge_label).values())
return el return el


def get_graph_kernel_by_name(name, node_labels=None, edge_labels=None, node_attrs=None, edge_attrs=None, ds_infos=None, kernel_options={}):
if name == 'structuralspkernel':
from gklearn.kernels import StructuralSP
graph_kernel = StructuralSP(node_labels=node_labels, edge_labels=edge_labels,
node_attrs=node_attrs, edge_attrs=edge_attrs,
ds_infos=ds_infos, **kernel_options)
return graph_kernel


def compute_gram_matrices_by_class(ds_name, kernel_options, save_results=True, dir_save='', irrelevant_labels=None):
from gklearn.utils import Dataset, split_dataset_by_target
# 1. get dataset.
print('1. getting dataset...')
dataset_all = Dataset()
dataset_all.load_predefined_dataset(ds_name)
if not irrelevant_labels is None:
dataset_all.remove_labels(**irrelevant_labels)
# dataset_all.cut_graphs(range(0, 10))
datasets = split_dataset_by_target(dataset_all)
gram_matrix_unnorm_list = []
run_time_list = []
print('start generating preimage for each class of target...')
for idx, dataset in enumerate(datasets):
target = dataset.targets[0]
print('\ntarget =', target, '\n')
# 2. initialize graph kernel.
print('2. initializing graph kernel and setting parameters...')
graph_kernel = get_graph_kernel_by_name(kernel_options['name'],
node_labels=dataset.node_labels,
edge_labels=dataset.edge_labels,
node_attrs=dataset.node_attrs,
edge_attrs=dataset.edge_attrs,
ds_infos=dataset.get_dataset_infos(keys=['directed']),
kernel_options=kernel_options)

# 3. compute gram matrix.
print('3. computing gram matrix...')
gram_matrix, run_time = graph_kernel.compute(dataset.graphs, **kernel_options)
gram_matrix_unnorm = graph_kernel.gram_matrix_unnorm
gram_matrix_unnorm_list.append(gram_matrix_unnorm)
run_time_list.append(run_time)
# 4. save results.
print()
print('4. saving results...')
if save_results:
np.savez(dir_save + 'gram_matrix_unnorm.' + ds_name + '.' + kernel_options['name'] + '.gm', gram_matrix_unnorm_list=gram_matrix_unnorm_list, run_time_list=run_time_list)

print('\ncomplete.')

+ 33
- 0
notebooks/tests/test_tqdm.py View File

@@ -0,0 +1,33 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Fri Apr 3 10:38:59 2020

@author: ljia
"""

from tqdm import tqdm
import sys

print('start')

for i in tqdm(range(10000000), file=sys.stdout):
x = i
# print(x)
# =============================================================================
# summary
# terminal, IPython 7.0.1 (Spyder 4): Works.
# write to file: does not work. Progress bar splits as the progress goes.
# Jupyter:
# =============================================================================

# for i in tqdm(range(10000000)):
# x = i
# print(x)
# =============================================================================
# summary
# terminal, IPython 7.0.1 (Spyder 4): does not work. When combines with other
# print, progress bar splits.
# write to file: does not work. Cannot write progress bar to file.
# Jupyter:
# =============================================================================

+ 10
- 7
requirements.txt View File

@@ -1,7 +1,10 @@
numpy==1.15.2
scipy==1.1.0
matplotlib==3.0.0
networkx==2.2
scikit-learn==0.20.0
tabulate==0.8.2
tqdm==4.26.0
numpy>=1.15.2
scipy>=1.1.0
matplotlib>=3.0.0
networkx>=2.2
scikit-learn>=0.20.0
tabulate>=0.8.2
tqdm>=4.26.0
# cvxpy # for preimage.
# cvxopt # for preimage.
# mosek # for preimage.

Loading…
Cancel
Save