From b63a60fe46788b86634e4f6ba09b0b3b8af406ef Mon Sep 17 00:00:00 2001 From: jajupmochi Date: Mon, 1 Jun 2020 16:43:38 +0200 Subject: [PATCH 01/17] Add RandomPreimageGenerator introduced inBakir's paper. --- gklearn/preimage/__init__.py | 1 + .../experiments/xp_random_preimage_generation.py | 124 +++++++++ gklearn/preimage/median_preimage_generator.py | 30 +-- gklearn/preimage/random_preimage_generator.py | 287 +++++++++++++++++++++ 4 files changed, 423 insertions(+), 19 deletions(-) create mode 100644 gklearn/preimage/experiments/xp_random_preimage_generation.py create mode 100644 gklearn/preimage/random_preimage_generator.py diff --git a/gklearn/preimage/__init__.py b/gklearn/preimage/__init__.py index 21e688e..385762e 100644 --- a/gklearn/preimage/__init__.py +++ b/gklearn/preimage/__init__.py @@ -12,4 +12,5 @@ __date__ = "March 2020" from gklearn.preimage.preimage_generator import PreimageGenerator from gklearn.preimage.median_preimage_generator import MedianPreimageGenerator +from gklearn.preimage.random_preimage_generator import RandomPreimageGenerator from gklearn.preimage.kernel_knn_cv import kernel_knn_cv diff --git a/gklearn/preimage/experiments/xp_random_preimage_generation.py b/gklearn/preimage/experiments/xp_random_preimage_generation.py new file mode 100644 index 0000000..51f7a7b --- /dev/null +++ b/gklearn/preimage/experiments/xp_random_preimage_generation.py @@ -0,0 +1,124 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +""" +Created on Mon Jun 1 11:37:57 2020 + +@author: ljia +""" +import multiprocessing +import numpy as np +import networkx as nx +import os +from gklearn.preimage import RandomPreimageGenerator +from gklearn.utils import Dataset + + +dir_root = '../results/xp_random_preimage_generation/' + + +def xp_random_preimage_generation(): + """ + Experiment similar to the one in Bakir's paper. A test to check if RandomPreimageGenerator class works correctly. + + Returns + ------- + None. + + """ + alpha1_list = np.linspace(0, 1, 11) + k_dis_datasets = [] + k_dis_preimages = [] + preimages = [] + bests_from_dataset = [] + for alpha1 in alpha1_list: + print('alpha1 =', alpha1, ':\n') + # set parameters. + ds_name = 'MUTAG' + rpg_options = {'k': 5, + 'r_max': 10, # + 'l': 500, + 'alphas': None, + 'parallel': True, + 'verbose': 2} + kernel_options = {'name': 'PathUpToH', + 'depth': 2, # + 'k_func': 'MinMax', # + 'compute_method': 'trie', + 'parallel': 'imap_unordered', + # 'parallel': None, + 'n_jobs': multiprocessing.cpu_count(), + 'normalize': True, + 'verbose': 0} + edge_required = True + irrelevant_labels = {'edge_labels': ['label_0']} + cut_range = None + + # create/get Gram matrix. + dir_save = dir_root + ds_name + '.' + kernel_options['name'] + '/' + if not os.path.exists(dir_save): + os.makedirs(dir_save) + gm_fname = dir_save + 'gram_matrix_unnorm.' + ds_name + '.' + kernel_options['name'] + '.gm.npz' + gmfile_exist = os.path.isfile(os.path.abspath(gm_fname)) + if gmfile_exist: + gmfile = np.load(gm_fname, allow_pickle=True) # @todo: may not be safe. + gram_matrix_unnorm = gmfile['gram_matrix_unnorm'] + time_precompute_gm = gmfile['run_time'] + + # 1. get dataset. + print('1. getting dataset...') + dataset_all = Dataset() + dataset_all.load_predefined_dataset(ds_name) + dataset_all.trim_dataset(edge_required=edge_required) + if irrelevant_labels is not None: + dataset_all.remove_labels(**irrelevant_labels) + if cut_range is not None: + dataset_all.cut_graphs(cut_range) + + # add two "random" graphs. + g1 = nx.Graph() + g1.add_nodes_from(range(0, 16), label_0='0') + g1.add_nodes_from(range(16, 25), label_0='1') + g1.add_node(25, label_0='2') + g1.add_nodes_from([26, 27], label_0='3') + g1.add_edges_from([(0, 1), (1, 2), (2, 3), (3, 4), (4, 5), (5, 6), (6, 7), (7, 8), (8, 9), (9, 10), (10, 11), (11, 12), (5, 0), (4, 9), (12, 3), (10, 13), (13, 14), (14, 15), (15, 8), (0, 16), (1, 17), (2, 18), (12, 19), (11, 20), (13, 21), (15, 22), (7, 23), (6, 24), (14, 25), (25, 26), (25, 27)]) + g2 = nx.Graph() + g2.add_nodes_from(range(0, 12), label_0='0') + g2.add_nodes_from(range(12, 19), label_0='1') + g2.add_nodes_from([19, 20, 21], label_0='2') + g2.add_nodes_from([22, 23], label_0='3') + g2.add_edges_from([(0, 1), (1, 2), (2, 3), (3, 4), (4, 5), (5, 6), (6, 19), (19, 7), (7, 8), (8, 9), (9, 10), (10, 11), (11, 20), (20, 7), (5, 0), (4, 8), (0, 12), (1, 13), (2, 14), (9, 15), (10, 16), (11, 17), (6, 18), (3, 21), (21, 22), (21, 23)]) + dataset_all.load_graphs([g1, g2] + dataset_all.graphs, targets=None) + + # 2. initialize rpg and setting parameters. + print('2. initializing rpg and setting parameters...') + nb_graphs = len(dataset_all.graphs) - 2 + rpg_options['alphas'] = [alpha1, 1 - alpha1] + [0] * nb_graphs + if gmfile_exist: + rpg_options['gram_matrix_unnorm'] = gram_matrix_unnorm + rpg_options['runtime_precompute_gm'] = time_precompute_gm + rpg = RandomPreimageGenerator() + rpg.dataset = dataset_all + rpg.set_options(**rpg_options.copy()) + rpg.kernel_options = kernel_options.copy() + + # 3. compute preimage. + print('3. computing preimage...') + rpg.run() + results = rpg.get_results() + k_dis_datasets.append(results['k_dis_dataset']) + k_dis_preimages.append(results['k_dis_preimage']) + bests_from_dataset.append(rpg.best_from_dataset) + preimages.append(rpg.preimage) + + # 4. save results. + # write Gram matrices to file. + if not gmfile_exist: + np.savez(dir_save + 'gram_matrix_unnorm.' + ds_name + '.' + kernel_options['name'] + '.gm', gram_matrix_unnorm=rpg.gram_matrix_unnorm, run_time=results['runtime_precompute_gm']) + + print('\ncomplete.\n') + + return k_dis_datasets, k_dis_preimages, bests_from_dataset, preimages + + +if __name__ == '__main__': + k_dis_datasets, k_dis_preimages, bests_from_dataset, preimages = xp_random_preimage_generation() \ No newline at end of file diff --git a/gklearn/preimage/median_preimage_generator.py b/gklearn/preimage/median_preimage_generator.py index 9deabe0..6d3a45f 100644 --- a/gklearn/preimage/median_preimage_generator.py +++ b/gklearn/preimage/median_preimage_generator.py @@ -19,7 +19,7 @@ from gklearn.ged.median import constant_node_costs,mge_options_to_string from gklearn.gedlib import librariesImport, gedlibpy from gklearn.utils import Timer from gklearn.utils.utils import get_graph_kernel_by_name -# from gklearn.utils.dataset import Dataset + class MedianPreimageGenerator(PreimageGenerator): @@ -127,8 +127,7 @@ class MedianPreimageGenerator(PreimageGenerator): # 3. compute set median and gen median using optimized edit costs. if self._verbose >= 2: print('\nstart computing set median and gen median using optimized edit costs...\n') -# group_fnames = [Gn[g].graph['filename'] for g in group_min] - self.__generate_preimage_iam() + self.__gmg_bcu() end_generate_preimage = time.time() self.__runtime_generate_preimage = end_generate_preimage - end_optimize_ec self.__runtime_total = end_generate_preimage - start @@ -140,13 +139,7 @@ class MedianPreimageGenerator(PreimageGenerator): # 4. compute kernel distances to the true median. if self._verbose >= 2: print('\nstart computing distances to true median....\n') -# Gn_median = [Gn[g].copy() for g in group_min] self.__compute_distances_to_true_median() -# dis_k_sm, dis_k_gm, dis_k_gi, dis_k_gi_min, idx_dis_k_gi_min = -# idx_dis_k_gi_min = group_min[idx_dis_k_gi_min] -# print('index min dis_k_gi:', idx_dis_k_gi_min) -# print('sod_sm:', sod_sm) -# print('sod_gm:', sod_gm) # 5. print out results. if self._verbose: @@ -169,11 +162,6 @@ class MedianPreimageGenerator(PreimageGenerator): print('Is optimization of edit costs converged:', self.__converged) print('================================================================================') print() - - # collect return values. -# return (sod_sm, sod_gm), \ -# (dis_k_sm, dis_k_gm, dis_k_gi, dis_k_gi_min, idx_dis_k_gi_min), \ -# (time_fitting, time_generating) def get_results(self): @@ -861,7 +849,15 @@ class MedianPreimageGenerator(PreimageGenerator): print() - def __generate_preimage_iam(self): + def __gmg_bcu(self): + """ + The local search algorithm based on block coordinate update (BCU) for estimating a generalized median graph (GMG). + + Returns + ------- + None. + + """ # Set up the ged environment. ged_env = gedlibpy.GEDEnv() # @todo: maybe create a ged_env as a private varible. # gedlibpy.restart_env() @@ -917,10 +913,6 @@ class MedianPreimageGenerator(PreimageGenerator): self.__k_dis_set_median = compute_k_dis(0, range(1, 1+len(self._dataset.graphs)), [1 / len(self._dataset.graphs)] * len(self._dataset.graphs), gram_with_sm, withterm3=False) - # print(gen_median.nodes(data=True)) - # print(gen_median.edges(data=True)) - # print(set_median.nodes(data=True)) - # print(set_median.edges(data=True)) # compute distance in kernel space for generalized median. kernels_to_gm, _ = self._graph_kernel.compute(self.__gen_median, self._dataset.graphs, **self._kernel_options) diff --git a/gklearn/preimage/random_preimage_generator.py b/gklearn/preimage/random_preimage_generator.py new file mode 100644 index 0000000..b2da2b2 --- /dev/null +++ b/gklearn/preimage/random_preimage_generator.py @@ -0,0 +1,287 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +""" +Created on Fri May 29 14:29:52 2020 + +@author: ljia +""" + +import numpy as np +import time +import random +import sys +import tqdm +import multiprocessing +import networkx as nx +from gklearn.preimage import PreimageGenerator +from gklearn.preimage.utils import compute_k_dis +from gklearn.utils import Timer +from gklearn.utils.utils import get_graph_kernel_by_name +# from gklearn.utils.dataset import Dataset + +class RandomPreimageGenerator(PreimageGenerator): + + def __init__(self, dataset=None): + PreimageGenerator.__init__(self, dataset=dataset) + # arguments to set. + self.__k = 5 # number of nearest neighbors of phi in D_N. + self.__r_max = 10 # maximum number of iterations. + self.__l = 500 # numbers of graphs generated for each graph in D_k U {g_i_hat}. + self.__alphas = None # weights of linear combinations of points in kernel space. + self.__parallel = True + self.__n_jobs = multiprocessing.cpu_count() + self.__time_limit_in_sec = 0 # @todo + self.__max_itrs = 100 # @todo + # values to compute. + self.__runtime_generate_preimage = None + self.__runtime_total = None + self.__preimage = None + self.__best_from_dataset = None + self.__k_dis_preimage = None + self.__k_dis_dataset = None + self.__itrs = 0 + self.__converged = False # @todo + self.__num_updates = 0 + # values that can be set or to be computed. + self.__gram_matrix_unnorm = None + self.__runtime_precompute_gm = None + + + def set_options(self, **kwargs): + self._kernel_options = kwargs.get('kernel_options', {}) + self._graph_kernel = kwargs.get('graph_kernel', None) + self._verbose = kwargs.get('verbose', 2) + self.__k = kwargs.get('k', 5) + self.__r_max = kwargs.get('r_max', 10) + self.__l = kwargs.get('l', 500) + self.__alphas = kwargs.get('alphas', None) + self.__parallel = kwargs.get('parallel', True) + self.__n_jobs = kwargs.get('n_jobs', multiprocessing.cpu_count()) + self.__time_limit_in_sec = kwargs.get('time_limit_in_sec', 0) + self.__max_itrs = kwargs.get('max_itrs', 100) + self.__gram_matrix_unnorm = kwargs.get('gram_matrix_unnorm', None) + self.__runtime_precompute_gm = kwargs.get('runtime_precompute_gm', None) + + + def run(self): + self._graph_kernel = get_graph_kernel_by_name(self._kernel_options['name'], + node_labels=self._dataset.node_labels, + edge_labels=self._dataset.edge_labels, + node_attrs=self._dataset.node_attrs, + edge_attrs=self._dataset.edge_attrs, + ds_infos=self._dataset.get_dataset_infos(keys=['directed']), + kernel_options=self._kernel_options) + + # record start time. + start = time.time() + + # 1. precompute gram matrix. + if self.__gram_matrix_unnorm is None: + gram_matrix, run_time = self._graph_kernel.compute(self._dataset.graphs, **self._kernel_options) + self.__gram_matrix_unnorm = self._graph_kernel.gram_matrix_unnorm + end_precompute_gm = time.time() + self.__runtime_precompute_gm = end_precompute_gm - start + else: + if self.__runtime_precompute_gm is None: + raise Exception('Parameter "runtime_precompute_gm" must be given when using pre-computed Gram matrix.') + self._graph_kernel.gram_matrix_unnorm = self.__gram_matrix_unnorm + if self._kernel_options['normalize']: + self._graph_kernel.gram_matrix = self._graph_kernel.normalize_gm(np.copy(self.__gram_matrix_unnorm)) + else: + self._graph_kernel.gram_matrix = np.copy(self.__gram_matrix_unnorm) + end_precompute_gm = time.time() + start -= self.__runtime_precompute_gm + + # 2. compute k nearest neighbors of phi in D_N. + if self._verbose >= 2: + print('\nstart computing k nearest neighbors of phi in D_N...\n') + D_N = self._dataset.graphs + if self.__alphas is None: + self.__alphas = [1 / len(D_N)] * len(D_N) + k_dis_list = [] # distance between g_star and each graph. + term3 = 0 + for i1, a1 in enumerate(self.__alphas): + for i2, a2 in enumerate(self.__alphas): + term3 += a1 * a2 * self._graph_kernel.gram_matrix[i1, i2] + for idx in range(len(D_N)): + k_dis_list.append(compute_k_dis(idx, range(0, len(D_N)), self.__alphas, self._graph_kernel.gram_matrix, term3=term3, withterm3=True)) + + # sort. + sort_idx = np.argsort(k_dis_list) + dis_gs = [k_dis_list[idis] for idis in sort_idx[0:self.__k]] # the k shortest distances. + nb_best = len(np.argwhere(dis_gs == dis_gs[0]).flatten().tolist()) + g0hat_list = [D_N[idx].copy() for idx in sort_idx[0:nb_best]] # the nearest neighbors of phi in D_N + self.__best_from_dataset = g0hat_list[0] # get the first best graph if there are muitlple. + self.__k_dis_dataset = dis_gs[0] + + if self.__k_dis_dataset == 0: # get the exact pre-image. + end_generate_preimage = time.time() + self.__runtime_generate_preimage = end_generate_preimage - end_precompute_gm + self.__runtime_total = end_generate_preimage - start + self.__preimage = self.__best_from_dataset.copy() + self.__k_dis_preimage = self.__k_dis_dataset + if self._verbose: + print() + print('=============================================================================') + print('The exact pre-image is found from the input dataset.') + print('-----------------------------------------------------------------------------') + print('Distance in kernel space for the best graph from dataset and for preimage:', self.__k_dis_dataset) + print('Time to pre-compute Gram matrix:', self.__runtime_precompute_gm) + print('Time to generate pre-images:', self.__runtime_generate_preimage) + print('Total time:', self.__runtime_total) + print('=============================================================================') + print() + return + + dhat = dis_gs[0] # the nearest distance + Gk = [D_N[ig].copy() for ig in sort_idx[0:self.__k]] # the k nearest neighbors + Gs_nearest = [nx.convert_node_labels_to_integers(g) for g in Gk] # [g.copy() for g in Gk] + + # 3. start iterations. + if self._verbose >= 2: + print('starting iterations...') + gihat_list = [] + dihat_list = [] + r = 0 + dis_of_each_itr = [dhat] + while r < self.__r_max: + print('\n- r =', r) + found = False + dis_bests = dis_gs + dihat_list + + # compute numbers of nodes to be inserted/deleted. + # @todo what if the log is negetive? how to choose alpha (scalar)? + fdgs_list = np.array(dis_bests) + if np.min(fdgs_list) < 1: + fdgs_list /= np.min(dis_bests) + fdgs_list = [int(item) for item in np.ceil(np.log(fdgs_list))] + if np.min(fdgs_list) < 1: + fdgs_list = np.array(fdgs_list) + 1 + + for ig, gs in enumerate(Gs_nearest + gihat_list): + if self._verbose >= 2: + print('-- computing', ig + 1, 'graphs out of', len(Gs_nearest) + len(gihat_list)) + for trail in range(0, self.__l): + if self._verbose >= 2: + print('---', trail + 1, 'trail out of', self.__l) + + # add and delete edges. + gtemp = gs.copy() + np.random.seed() # @todo: may not work for possible parallel. + # which edges to change. + # @todo: should we use just half of the adjacency matrix for undirected graphs? + nb_vpairs = nx.number_of_nodes(gs) * (nx.number_of_nodes(gs) - 1) + # @todo: what if fdgs is bigger than nb_vpairs? + idx_change = random.sample(range(nb_vpairs), fdgs_list[ig] if + fdgs_list[ig] < nb_vpairs else nb_vpairs) + for item in idx_change: + node1 = int(item / (nx.number_of_nodes(gs) - 1)) + node2 = (item - node1 * (nx.number_of_nodes(gs) - 1)) + if node2 >= node1: # skip the self pair. + node2 += 1 + # @todo: is the randomness correct? + if not gtemp.has_edge(node1, node2): + gtemp.add_edge(node1, node2) + else: + gtemp.remove_edge(node1, node2) + + # compute new distances. + kernels_to_gtmp, _ = self._graph_kernel.compute(gtemp, D_N, **self._kernel_options) + kernel_gtmp, _ = self._graph_kernel.compute(gtemp, gtemp, **self._kernel_options) + kernels_to_gtmp = [kernels_to_gtmp[i] / np.sqrt(self.__gram_matrix_unnorm[i, i] * kernel_gtmp) for i in range(len(kernels_to_gtmp))] # normalize + # @todo: not correct kernel value + gram_with_gtmp = np.concatenate((np.array([kernels_to_gtmp]), np.copy(self._graph_kernel.gram_matrix)), axis=0) + gram_with_gtmp = np.concatenate((np.array([[1] + kernels_to_gtmp]).T, gram_with_gtmp), axis=1) + dnew = compute_k_dis(0, range(1, 1 + len(D_N)), self.__alphas, gram_with_gtmp, term3=term3, withterm3=True) + + # get the better graph preimage. + if dnew <= dhat: # @todo: the new distance is smaller or also equal? + if dnew < dhat: + if self._verbose >= 2: + print('trail =', str(trail)) + print('\nI am smaller!') + print('index (as in D_k U {gihat} =', str(ig)) + print('distance:', dhat, '->', dnew) + self.__num_updates += 1 + elif dnew == dhat: + if self._verbose >= 2: + print('I am equal!') + dhat = dnew + gnew = gtemp.copy() + found = True # found better graph. + + if found: + r = 0 + gihat_list = [gnew] + dihat_list = [dhat] + else: + r += 1 + + dis_of_each_itr.append(dhat) + self.__itrs += 1 + if self._verbose >= 2: + print('Total number of iterations is', self.__itrs) + print('The preimage is updated', self.__num_updates, 'times.') + print('The shortest distances for previous iterations are', dis_of_each_itr) + + + + # get results and print. + end_generate_preimage = time.time() + self.__runtime_generate_preimage = end_generate_preimage - end_precompute_gm + self.__runtime_total = end_generate_preimage - start + self.__preimage = (g0hat_list[0] if len(gihat_list) == 0 else gihat_list[0]) + self.__k_dis_preimage = dhat + if self._verbose: + print() + print('=============================================================================') + print('Finished generalization of preimages.') + print('-----------------------------------------------------------------------------') + print('Distance in kernel space for the best graph from dataset:', self.__k_dis_dataset) + print('Distance in kernel space for the preimage:', self.__k_dis_preimage) + print('Total number of iterations for optimizing:', self.__itrs) + print('Total number of updating preimage:', self.__num_updates) + print('Time to pre-compute Gram matrix:', self.__runtime_precompute_gm) + print('Time to generate pre-images:', self.__runtime_generate_preimage) + print('Total time:', self.__runtime_total) + print('=============================================================================') + print() + + + def get_results(self): + results = {} + results['runtime_precompute_gm'] = self.__runtime_precompute_gm + results['runtime_generate_preimage'] = self.__runtime_generate_preimage + results['runtime_total'] = self.__runtime_total + results['k_dis_dataset'] = self.__k_dis_dataset + results['k_dis_preimage'] = self.__k_dis_preimage + results['itrs'] = self.__itrs + results['num_updates'] = self.__num_updates + return results + + + def __termination_criterion_met(self, converged, timer, itr, itrs_without_update): + if timer.expired() or (itr >= self.__max_itrs if self.__max_itrs >= 0 else False): +# if self.__state == AlgorithmState.TERMINATED: +# self.__state = AlgorithmState.INITIALIZED + return True + return converged or (itrs_without_update > self.__max_itrs_without_update if self.__max_itrs_without_update >= 0 else False) + + + @property + def preimage(self): + return self.__preimage + + + @property + def best_from_dataset(self): + return self.__best_from_dataset + + + @property + def gram_matrix_unnorm(self): + return self.__gram_matrix_unnorm + + @gram_matrix_unnorm.setter + def gram_matrix_unnorm(self, value): + self.__gram_matrix_unnorm = value \ No newline at end of file From 50da423a1c4ee6597767e1fa860b095679b0100c Mon Sep 17 00:00:00 2001 From: jajupmochi Date: Mon, 1 Jun 2020 18:13:43 +0200 Subject: [PATCH 02/17] Add experiment of random preimage. --- gklearn/preimage/__init__.py | 1 + gklearn/preimage/experiments/xp_random_preimage.py | 1192 ++++++++++++++++++++ .../preimage/generate_random_preimages_by_class.py | 188 +++ gklearn/preimage/utils.py | 2 +- 4 files changed, 1382 insertions(+), 1 deletion(-) create mode 100644 gklearn/preimage/experiments/xp_random_preimage.py create mode 100644 gklearn/preimage/generate_random_preimages_by_class.py diff --git a/gklearn/preimage/__init__.py b/gklearn/preimage/__init__.py index 385762e..7972820 100644 --- a/gklearn/preimage/__init__.py +++ b/gklearn/preimage/__init__.py @@ -14,3 +14,4 @@ from gklearn.preimage.preimage_generator import PreimageGenerator from gklearn.preimage.median_preimage_generator import MedianPreimageGenerator from gklearn.preimage.random_preimage_generator import RandomPreimageGenerator from gklearn.preimage.kernel_knn_cv import kernel_knn_cv +from gklearn.preimage.generate_random_preimages_by_class import generate_random_preimages_by_class diff --git a/gklearn/preimage/experiments/xp_random_preimage.py b/gklearn/preimage/experiments/xp_random_preimage.py new file mode 100644 index 0000000..8700ad8 --- /dev/null +++ b/gklearn/preimage/experiments/xp_random_preimage.py @@ -0,0 +1,1192 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +""" +Created on Tue Jan 14 15:39:29 2020 + +@author: ljia +""" +import multiprocessing +import functools +import sys +import os +import logging +from gklearn.utils.kernels import deltakernel, gaussiankernel, kernelproduct +from gklearn.preimage import generate_random_preimages_by_class +from gklearn.utils import compute_gram_matrices_by_class + + +dir_root = '../results/xp_random_preimage/' + + +def xp_median_preimage_15_1(): + """xp 15_1: AIDS, StructuralSP, using CONSTANT, symbolic only. + """ + # set parameters. + ds_name = 'AIDS' # + rpg_options = {'k': 5, + 'r_max': 10, # + 'l': 500, + 'alphas': None, + 'parallel': True, + 'verbose': 2} + mixkernel = functools.partial(kernelproduct, deltakernel, gaussiankernel) + sub_kernels = {'symb': deltakernel, 'nsymb': gaussiankernel, 'mix': mixkernel} + kernel_options = {'name': 'StructuralSP', + 'edge_weight': None, + 'node_kernels': sub_kernels, + 'edge_kernels': sub_kernels, + 'compute_method': 'naive', + 'parallel': 'imap_unordered', + # 'parallel': None, + 'n_jobs': multiprocessing.cpu_count(), + 'normalize': True, + 'verbose': 0} + save_results = True + dir_save = dir_root + ds_name + '.' + kernel_options['name'] + '.symb/' + irrelevant_labels = {'node_attrs': ['chem', 'charge', 'x', 'y'], 'edge_labels': ['valence']} # + edge_required = False # + + if not os.path.exists(dir_save): + os.makedirs(dir_save) + file_output = open(dir_save + 'output.txt', 'a') + sys.stdout = file_output + + # print settings. + print('parameters:') + print('dataset name:', ds_name) + print('kernel_options:', kernel_options) + print('save_results:', save_results) + print('irrelevant_labels:', irrelevant_labels) + print() + + # generate preimages. + try: + generate_random_preimages_by_class(ds_name, rpg_options, kernel_options, save_results=save_results, save_preimages=True, load_gm='auto', dir_save=dir_save, irrelevant_labels=irrelevant_labels, edge_required=edge_required) + except Exception as exp: + print('An exception occured when running this experiment:') + LOG_FILENAME = dir_save + 'error.txt' + logging.basicConfig(filename=LOG_FILENAME, level=logging.DEBUG) + logging.exception('') + print(repr(exp)) + + +def xp_median_preimage_15_2(): + """xp 15_2: AIDS, PathUpToH, using CONSTANT, symbolic only. + """ + # set parameters. + ds_name = 'AIDS' # + rpg_options = {'k': 5, + 'r_max': 10, # + 'l': 500, + 'alphas': None, + 'parallel': True, + 'verbose': 2} + kernel_options = {'name': 'PathUpToH', + 'depth': 1, # + 'k_func': 'MinMax', # + 'compute_method': 'trie', + 'parallel': 'imap_unordered', + # 'parallel': None, + 'n_jobs': multiprocessing.cpu_count(), + 'normalize': True, + 'verbose': 0} + save_results = True + dir_save = dir_root + ds_name + '.' + kernel_options['name'] + '.symb/' + irrelevant_labels = {'node_attrs': ['chem', 'charge', 'x', 'y'], 'edge_labels': ['valence']} # + edge_required = False # + + if not os.path.exists(dir_save): + os.makedirs(dir_save) + file_output = open(dir_save + 'output.txt', 'a') + sys.stdout = file_output + + # print settings. + print('parameters:') + print('dataset name:', ds_name) + print('kernel_options:', kernel_options) + print('save_results:', save_results) + print('irrelevant_labels:', irrelevant_labels) + print() + + # generate preimages. + try: + generate_random_preimages_by_class(ds_name, rpg_options, kernel_options, save_results=save_results, save_preimages=True, load_gm='auto', dir_save=dir_save, irrelevant_labels=irrelevant_labels, edge_required=edge_required) + except Exception as exp: + print('An exception occured when running this experiment:') + LOG_FILENAME = dir_save + 'error.txt' + logging.basicConfig(filename=LOG_FILENAME, level=logging.DEBUG) + logging.exception('') + print(repr(exp)) + + +def xp_median_preimage_15_3(): + """xp 15_3: AIDS, Treelet, using CONSTANT, symbolic only. + """ + from gklearn.utils.kernels import polynomialkernel + # set parameters. + ds_name = 'AIDS' # + rpg_options = {'k': 5, + 'r_max': 10, # + 'l': 500, + 'alphas': None, + 'parallel': True, + 'verbose': 2} + pkernel = functools.partial(polynomialkernel, d=1, c=1e+2) + kernel_options = {'name': 'Treelet', # + 'sub_kernel': pkernel, + 'parallel': 'imap_unordered', + # 'parallel': None, + 'n_jobs': multiprocessing.cpu_count(), + 'normalize': True, + 'verbose': 0} + save_results = True + dir_save = dir_root + ds_name + '.' + kernel_options['name'] + '.symb/' + irrelevant_labels = {'node_attrs': ['chem', 'charge', 'x', 'y'], 'edge_labels': ['valence']} # + edge_required = False # + + if not os.path.exists(dir_save): + os.makedirs(dir_save) + file_output = open(dir_save + 'output.txt', 'a') + sys.stdout = file_output + + # print settings. + print('parameters:') + print('dataset name:', ds_name) + print('kernel_options:', kernel_options) + print('save_results:', save_results) + print('irrelevant_labels:', irrelevant_labels) + print() + + # generate preimages. + try: + generate_random_preimages_by_class(ds_name, rpg_options, kernel_options, save_results=save_results, save_preimages=True, load_gm='auto', dir_save=dir_save, irrelevant_labels=irrelevant_labels, edge_required=edge_required) + except Exception as exp: + print('An exception occured when running this experiment:') + LOG_FILENAME = dir_save + 'error.txt' + logging.basicConfig(filename=LOG_FILENAME, level=logging.DEBUG) + logging.exception('') + print(repr(exp)) + + +def xp_median_preimage_15_4(): + """xp 15_4: AIDS, WeisfeilerLehman, using CONSTANT, symbolic only. + """ + # set parameters. + ds_name = 'AIDS' # + rpg_options = {'k': 5, + 'r_max': 10, # + 'l': 500, + 'alphas': None, + 'parallel': True, + 'verbose': 2} + kernel_options = {'name': 'WeisfeilerLehman', + 'height': 10, + 'base_kernel': 'subtree', + 'parallel': 'imap_unordered', + # 'parallel': None, + 'n_jobs': multiprocessing.cpu_count(), + 'normalize': True, + 'verbose': 0} + save_results = True + dir_save = dir_root + ds_name + '.' + kernel_options['name'] + '.symb/' + irrelevant_labels = {'node_attrs': ['chem', 'charge', 'x', 'y'], 'edge_labels': ['valence']} # + edge_required = False # + + if not os.path.exists(dir_save): + os.makedirs(dir_save) + file_output = open(dir_save + 'output.txt', 'a') + sys.stdout = file_output + + # print settings. + print('parameters:') + print('dataset name:', ds_name) + print('kernel_options:', kernel_options) + print('save_results:', save_results) + print('irrelevant_labels:', irrelevant_labels) + print() + +# # compute gram matrices for each class a priori. +# print('Compute gram matrices for each class a priori.') +# compute_gram_matrices_by_class(ds_name, kernel_options, save_results=True, dir_save=dir_save, irrelevant_labels=irrelevant_labels) + + # generate preimages. + try: + generate_random_preimages_by_class(ds_name, rpg_options, kernel_options, save_results=save_results, save_preimages=True, load_gm='auto', dir_save=dir_save, irrelevant_labels=irrelevant_labels, edge_required=edge_required) + except Exception as exp: + print('An exception occured when running this experiment:') + LOG_FILENAME = dir_save + 'error.txt' + logging.basicConfig(filename=LOG_FILENAME, level=logging.DEBUG) + logging.exception('') + print(repr(exp)) + + +def xp_median_preimage_14_1(): + """xp 14_1: DD, PathUpToH, using CONSTANT. + """ + # set parameters. + ds_name = 'DD' # + rpg_options = {'k': 5, + 'r_max': 10, # + 'l': 500, + 'alphas': None, + 'parallel': True, + 'verbose': 2} + kernel_options = {'name': 'PathUpToH', + 'depth': 2, # + 'k_func': 'MinMax', # + 'compute_method': 'trie', + 'parallel': 'imap_unordered', + # 'parallel': None, + 'n_jobs': multiprocessing.cpu_count(), + 'normalize': True, + 'verbose': 0} + save_results = True + dir_save = dir_root + ds_name + '.' + kernel_options['name'] + '/' + irrelevant_labels = None # + edge_required = False # + + if not os.path.exists(dir_save): + os.makedirs(dir_save) + file_output = open(dir_save + 'output.txt', 'a') + sys.stdout = file_output + +# # compute gram matrices for each class a priori. +# print('Compute gram matrices for each class a priori.') +# compute_gram_matrices_by_class(ds_name, kernel_options, save_results=save_results, dir_save=dir_save, irrelevant_labels=irrelevant_labels, edge_required=edge_required) + + # print settings. + print('parameters:') + print('dataset name:', ds_name) + print('kernel_options:', kernel_options) + print('save_results:', save_results) + print('irrelevant_labels:', irrelevant_labels) + print() + + # generate preimages. + try: + generate_random_preimages_by_class(ds_name, rpg_options, kernel_options, save_results=save_results, save_preimages=True, load_gm='auto', dir_save=dir_save, irrelevant_labels=irrelevant_labels, edge_required=edge_required) + except Exception as exp: + print('An exception occured when running this experiment:') + LOG_FILENAME = dir_save + 'error.txt' + logging.basicConfig(filename=LOG_FILENAME, level=logging.DEBUG) + logging.exception('') + print(repr(exp)) + + +def xp_median_preimage_12_1(): + """xp 12_1: PAH, StructuralSP, using NON_SYMBOLIC, unlabeled. + """ + # set parameters. + ds_name = 'PAH' # + rpg_options = {'k': 5, + 'r_max': 10, # + 'l': 500, + 'alphas': None, + 'parallel': True, + 'verbose': 2} + mixkernel = functools.partial(kernelproduct, deltakernel, gaussiankernel) + sub_kernels = {'symb': deltakernel, 'nsymb': gaussiankernel, 'mix': mixkernel} + kernel_options = {'name': 'StructuralSP', + 'edge_weight': None, + 'node_kernels': sub_kernels, + 'edge_kernels': sub_kernels, + 'compute_method': 'naive', + 'parallel': 'imap_unordered', + # 'parallel': None, + 'n_jobs': multiprocessing.cpu_count(), + 'normalize': True, + 'verbose': 0} + save_results = True + dir_save = dir_root + ds_name + '.' + kernel_options['name'] + '.unlabeled/' + irrelevant_labels = {'node_attrs': ['x', 'y', 'z'], 'edge_labels': ['bond_stereo']} # + edge_required = False # + + if not os.path.exists(dir_save): + os.makedirs(dir_save) + file_output = open(dir_save + 'output.txt', 'a') + sys.stdout = file_output + + # print settings. + print('parameters:') + print('dataset name:', ds_name) + print('kernel_options:', kernel_options) + print('save_results:', save_results) + print('irrelevant_labels:', irrelevant_labels) + print() + + # generate preimages. + try: + generate_random_preimages_by_class(ds_name, rpg_options, kernel_options, save_results=save_results, save_preimages=True, load_gm='auto', dir_save=dir_save, irrelevant_labels=irrelevant_labels, edge_required=edge_required) + except Exception as exp: + print('An exception occured when running this experiment:') + LOG_FILENAME = dir_save + 'error.txt' + logging.basicConfig(filename=LOG_FILENAME, level=logging.DEBUG) + logging.exception('') + print(repr(exp)) + + +def xp_median_preimage_12_2(): + """xp 12_2: PAH, PathUpToH, using CONSTANT, unlabeled. + """ + # set parameters. + ds_name = 'PAH' # + rpg_options = {'k': 5, + 'r_max': 10, # + 'l': 500, + 'alphas': None, + 'parallel': True, + 'verbose': 2} + kernel_options = {'name': 'PathUpToH', + 'depth': 1, # + 'k_func': 'MinMax', # + 'compute_method': 'trie', + 'parallel': 'imap_unordered', + # 'parallel': None, + 'n_jobs': multiprocessing.cpu_count(), + 'normalize': True, + 'verbose': 0} + save_results = True + dir_save = dir_root + ds_name + '.' + kernel_options['name'] + '.unlabeled/' + irrelevant_labels = {'node_attrs': ['x', 'y', 'z'], 'edge_labels': ['bond_stereo']} # + edge_required = False # + + if not os.path.exists(dir_save): + os.makedirs(dir_save) + file_output = open(dir_save + 'output.txt', 'a') + sys.stdout = file_output + + # print settings. + print('parameters:') + print('dataset name:', ds_name) + print('kernel_options:', kernel_options) + print('save_results:', save_results) + print('irrelevant_labels:', irrelevant_labels) + print() + + # generate preimages. + try: + generate_random_preimages_by_class(ds_name, rpg_options, kernel_options, save_results=save_results, save_preimages=True, load_gm='auto', dir_save=dir_save, irrelevant_labels=irrelevant_labels, edge_required=edge_required) + except Exception as exp: + print('An exception occured when running this experiment:') + LOG_FILENAME = dir_save + 'error.txt' + logging.basicConfig(filename=LOG_FILENAME, level=logging.DEBUG) + logging.exception('') + print(repr(exp)) + + +def xp_median_preimage_12_3(): + """xp 12_3: PAH, Treelet, using CONSTANT, unlabeled. + """ + from gklearn.utils.kernels import gaussiankernel + # set parameters. + ds_name = 'PAH' # + rpg_options = {'k': 5, + 'r_max': 10, # + 'l': 500, + 'alphas': None, + 'parallel': True, + 'verbose': 2} + pkernel = functools.partial(gaussiankernel, gamma=None) # @todo + kernel_options = {'name': 'Treelet', # + 'sub_kernel': pkernel, + 'parallel': 'imap_unordered', + # 'parallel': None, + 'n_jobs': multiprocessing.cpu_count(), + 'normalize': True, + 'verbose': 0} + save_results = True + dir_save = dir_root + ds_name + '.' + kernel_options['name'] + '.unlabeled/' + irrelevant_labels = {'node_attrs': ['x', 'y', 'z'], 'edge_labels': ['bond_stereo']} # + edge_required = False # + + if not os.path.exists(dir_save): + os.makedirs(dir_save) + file_output = open(dir_save + 'output.txt', 'a') + sys.stdout = file_output + + # print settings. + print('parameters:') + print('dataset name:', ds_name) + print('kernel_options:', kernel_options) + print('save_results:', save_results) + print('irrelevant_labels:', irrelevant_labels) + print() + + # generate preimages. + try: + generate_random_preimages_by_class(ds_name, rpg_options, kernel_options, save_results=save_results, save_preimages=True, load_gm='auto', dir_save=dir_save, irrelevant_labels=irrelevant_labels, edge_required=edge_required) + except Exception as exp: + print('An exception occured when running this experiment:') + LOG_FILENAME = dir_save + 'error.txt' + logging.basicConfig(filename=LOG_FILENAME, level=logging.DEBUG) + logging.exception('') + print(repr(exp)) + + +def xp_median_preimage_12_4(): + """xp 12_4: PAH, WeisfeilerLehman, using CONSTANT, unlabeled. + """ + # set parameters. + ds_name = 'PAH' # + rpg_options = {'k': 5, + 'r_max': 10, # + 'l': 500, + 'alphas': None, + 'parallel': True, + 'verbose': 2} + kernel_options = {'name': 'WeisfeilerLehman', + 'height': 14, + 'base_kernel': 'subtree', + 'parallel': 'imap_unordered', + # 'parallel': None, + 'n_jobs': multiprocessing.cpu_count(), + 'normalize': True, + 'verbose': 0} + save_results = True + dir_save = dir_root + ds_name + '.' + kernel_options['name'] + '.unlabeled/' + irrelevant_labels = {'node_attrs': ['x', 'y', 'z'], 'edge_labels': ['bond_stereo']} # + edge_required = False # + + if not os.path.exists(dir_save): + os.makedirs(dir_save) + file_output = open(dir_save + 'output.txt', 'a') + sys.stdout = file_output + + # print settings. + print('parameters:') + print('dataset name:', ds_name) + print('kernel_options:', kernel_options) + print('save_results:', save_results) + print('irrelevant_labels:', irrelevant_labels) + print() + +# # compute gram matrices for each class a priori. +# print('Compute gram matrices for each class a priori.') +# compute_gram_matrices_by_class(ds_name, kernel_options, save_results=True, dir_save=dir_save, irrelevant_labels=irrelevant_labels) + + # generate preimages. + try: + generate_random_preimages_by_class(ds_name, rpg_options, kernel_options, save_results=save_results, save_preimages=True, load_gm='auto', dir_save=dir_save, irrelevant_labels=irrelevant_labels, edge_required=edge_required) + except Exception as exp: + print('An exception occured when running this experiment:') + LOG_FILENAME = dir_save + 'error.txt' + logging.basicConfig(filename=LOG_FILENAME, level=logging.DEBUG) + logging.exception('') + print(repr(exp)) + + +def xp_median_preimage_12_5(): + """xp 12_5: PAH, ShortestPath, using NON_SYMBOLIC, unlabeled. + """ + # set parameters. + ds_name = 'PAH' # + rpg_options = {'k': 5, + 'r_max': 10, # + 'l': 500, + 'alphas': None, + 'parallel': True, + 'verbose': 2} + mixkernel = functools.partial(kernelproduct, deltakernel, gaussiankernel) + sub_kernels = {'symb': deltakernel, 'nsymb': gaussiankernel, 'mix': mixkernel} + kernel_options = {'name': 'ShortestPath', + 'edge_weight': None, + 'node_kernels': sub_kernels, + 'parallel': 'imap_unordered', + # 'parallel': None, + 'n_jobs': multiprocessing.cpu_count(), + 'normalize': True, + 'verbose': 0} + save_results = True + dir_save = dir_root + ds_name + '.' + kernel_options['name'] + '.unlabeled/' # + irrelevant_labels = {'node_attrs': ['x', 'y', 'z'], 'edge_labels': ['bond_stereo']} # + edge_required = True # + + if not os.path.exists(dir_save): + os.makedirs(dir_save) + file_output = open(dir_save + 'output.txt', 'a') + sys.stdout = file_output + + # print settings. + print('parameters:') + print('dataset name:', ds_name) + print('kernel_options:', kernel_options) + print('save_results:', save_results) + print('irrelevant_labels:', irrelevant_labels) + print() + + # generate preimages. + try: + generate_random_preimages_by_class(ds_name, rpg_options, kernel_options, save_results=save_results, save_preimages=True, load_gm='auto', dir_save=dir_save, irrelevant_labels=irrelevant_labels, edge_required=edge_required) + except Exception as exp: + print('An exception occured when running this experiment:') + LOG_FILENAME = dir_save + 'error.txt' + logging.basicConfig(filename=LOG_FILENAME, level=logging.DEBUG) + logging.exception('') + print(repr(exp)) + + +def xp_median_preimage_9_1(): + """xp 9_1: MAO, StructuralSP, using CONSTANT, symbolic only. + """ + # set parameters. + ds_name = 'MAO' # + rpg_options = {'k': 5, + 'r_max': 10, # + 'l': 500, + 'alphas': None, + 'parallel': True, + 'verbose': 2} + mixkernel = functools.partial(kernelproduct, deltakernel, gaussiankernel) + sub_kernels = {'symb': deltakernel, 'nsymb': gaussiankernel, 'mix': mixkernel} + kernel_options = {'name': 'StructuralSP', + 'edge_weight': None, + 'node_kernels': sub_kernels, + 'edge_kernels': sub_kernels, + 'compute_method': 'naive', + 'parallel': 'imap_unordered', + # 'parallel': None, + 'n_jobs': multiprocessing.cpu_count(), + 'normalize': True, + 'verbose': 0} + save_results = True + dir_save = dir_root + ds_name + '.' + kernel_options['name'] + '.symb/' + irrelevant_labels = {'node_attrs': ['x', 'y', 'z'], 'edge_labels': ['bond_type', 'bond_stereo']} # + edge_required = False # + + if not os.path.exists(dir_save): + os.makedirs(dir_save) + file_output = open(dir_save + 'output.txt', 'a') + sys.stdout = file_output + + # print settings. + print('parameters:') + print('dataset name:', ds_name) + print('kernel_options:', kernel_options) + print('save_results:', save_results) + print('irrelevant_labels:', irrelevant_labels) + print() + + # generate preimages. + try: + generate_random_preimages_by_class(ds_name, rpg_options, kernel_options, save_results=save_results, save_preimages=True, load_gm='auto', dir_save=dir_save, irrelevant_labels=irrelevant_labels, edge_required=edge_required) + except Exception as exp: + print('An exception occured when running this experiment:') + LOG_FILENAME = dir_save + 'error.txt' + logging.basicConfig(filename=LOG_FILENAME, level=logging.DEBUG) + logging.exception('') + print(repr(exp)) + + +def xp_median_preimage_9_2(): + """xp 9_2: MAO, PathUpToH, using CONSTANT, symbolic only. + """ + # set parameters. + ds_name = 'MAO' # + rpg_options = {'k': 5, + 'r_max': 10, # + 'l': 500, + 'alphas': None, + 'parallel': True, + 'verbose': 2} + kernel_options = {'name': 'PathUpToH', + 'depth': 9, # + 'k_func': 'MinMax', # + 'compute_method': 'trie', + 'parallel': 'imap_unordered', + # 'parallel': None, + 'n_jobs': multiprocessing.cpu_count(), + 'normalize': True, + 'verbose': 0} + save_results = True + dir_save = dir_root + ds_name + '.' + kernel_options['name'] + '.symb/' + irrelevant_labels = {'node_attrs': ['x', 'y', 'z'], 'edge_labels': ['bond_type', 'bond_stereo']} # + edge_required = False # + + if not os.path.exists(dir_save): + os.makedirs(dir_save) + file_output = open(dir_save + 'output.txt', 'a') + sys.stdout = file_output + + # print settings. + print('parameters:') + print('dataset name:', ds_name) + print('kernel_options:', kernel_options) + print('save_results:', save_results) + print('irrelevant_labels:', irrelevant_labels) + print() + + # generate preimages. + try: + generate_random_preimages_by_class(ds_name, rpg_options, kernel_options, save_results=save_results, save_preimages=True, load_gm='auto', dir_save=dir_save, irrelevant_labels=irrelevant_labels, edge_required=edge_required) + except Exception as exp: + print('An exception occured when running this experiment:') + LOG_FILENAME = dir_save + 'error.txt' + logging.basicConfig(filename=LOG_FILENAME, level=logging.DEBUG) + logging.exception('') + print(repr(exp)) + + +def xp_median_preimage_9_3(): + """xp 9_3: MAO, Treelet, using CONSTANT, symbolic only. + """ + from gklearn.utils.kernels import polynomialkernel + # set parameters. + ds_name = 'MAO' # + rpg_options = {'k': 5, + 'r_max': 10, # + 'l': 500, + 'alphas': None, + 'parallel': True, + 'verbose': 2} + pkernel = functools.partial(polynomialkernel, d=4, c=1e+7) + kernel_options = {'name': 'Treelet', # + 'sub_kernel': pkernel, + 'parallel': 'imap_unordered', + # 'parallel': None, + 'n_jobs': multiprocessing.cpu_count(), + 'normalize': True, + 'verbose': 0} + save_results = True + dir_save = dir_root + ds_name + '.' + kernel_options['name'] + '.symb/' + irrelevant_labels = {'node_attrs': ['x', 'y', 'z'], 'edge_labels': ['bond_type', 'bond_stereo']} # + edge_required = False # + + if not os.path.exists(dir_save): + os.makedirs(dir_save) + file_output = open(dir_save + 'output.txt', 'a') + sys.stdout = file_output + + # print settings. + print('parameters:') + print('dataset name:', ds_name) + print('kernel_options:', kernel_options) + print('save_results:', save_results) + print('irrelevant_labels:', irrelevant_labels) + print() + + # generate preimages. + try: + generate_random_preimages_by_class(ds_name, rpg_options, kernel_options, save_results=save_results, save_preimages=True, load_gm='auto', dir_save=dir_save, irrelevant_labels=irrelevant_labels, edge_required=edge_required) + except Exception as exp: + print('An exception occured when running this experiment:') + LOG_FILENAME = dir_save + 'error.txt' + logging.basicConfig(filename=LOG_FILENAME, level=logging.DEBUG) + logging.exception('') + print(repr(exp)) + + +def xp_median_preimage_9_4(): + """xp 9_4: MAO, WeisfeilerLehman, using CONSTANT, symbolic only. + """ + # set parameters. + ds_name = 'MAO' # + rpg_options = {'k': 5, + 'r_max': 10, # + 'l': 500, + 'alphas': None, + 'parallel': True, + 'verbose': 2} + kernel_options = {'name': 'WeisfeilerLehman', + 'height': 6, + 'base_kernel': 'subtree', + 'parallel': 'imap_unordered', + # 'parallel': None, + 'n_jobs': multiprocessing.cpu_count(), + 'normalize': True, + 'verbose': 0} + save_results = True + dir_save = dir_root + ds_name + '.' + kernel_options['name'] + '.symb/' + irrelevant_labels = {'node_attrs': ['x', 'y', 'z'], 'edge_labels': ['bond_type', 'bond_stereo']} # + edge_required = False # + + if not os.path.exists(dir_save): + os.makedirs(dir_save) + file_output = open(dir_save + 'output.txt', 'a') + sys.stdout = file_output + + # print settings. + print('parameters:') + print('dataset name:', ds_name) + print('kernel_options:', kernel_options) + print('save_results:', save_results) + print('irrelevant_labels:', irrelevant_labels) + print() + +# # compute gram matrices for each class a priori. +# print('Compute gram matrices for each class a priori.') +# compute_gram_matrices_by_class(ds_name, kernel_options, save_results=True, dir_save=dir_save, irrelevant_labels=irrelevant_labels) + + # generate preimages. + try: + generate_random_preimages_by_class(ds_name, rpg_options, kernel_options, save_results=save_results, save_preimages=True, load_gm='auto', dir_save=dir_save, irrelevant_labels=irrelevant_labels, edge_required=edge_required) + except Exception as exp: + print('An exception occured when running this experiment:') + LOG_FILENAME = dir_save + 'error.txt' + logging.basicConfig(filename=LOG_FILENAME, level=logging.DEBUG) + logging.exception('') + print(repr(exp)) + + +def xp_median_preimage_8_1(): + """xp 8_1: Monoterpenoides, StructuralSP, using CONSTANT. + """ + # set parameters. + ds_name = 'Monoterpenoides' # + rpg_options = {'k': 5, + 'r_max': 10, # + 'l': 500, + 'alphas': None, + 'parallel': True, + 'verbose': 2} + mixkernel = functools.partial(kernelproduct, deltakernel, gaussiankernel) + sub_kernels = {'symb': deltakernel, 'nsymb': gaussiankernel, 'mix': mixkernel} + kernel_options = {'name': 'StructuralSP', + 'edge_weight': None, + 'node_kernels': sub_kernels, + 'edge_kernels': sub_kernels, + 'compute_method': 'naive', + 'parallel': 'imap_unordered', + # 'parallel': None, + 'n_jobs': multiprocessing.cpu_count(), + 'normalize': True, + 'verbose': 0} + save_results = True + dir_save = dir_root + ds_name + '.' + kernel_options['name'] + '/' + irrelevant_labels = {'edge_labels': ['valence']} # + edge_required = False # + + if not os.path.exists(dir_save): + os.makedirs(dir_save) + file_output = open(dir_save + 'output.txt', 'a') + sys.stdout = file_output + + # print settings. + print('parameters:') + print('dataset name:', ds_name) + print('kernel_options:', kernel_options) + print('save_results:', save_results) + print('irrelevant_labels:', irrelevant_labels) + print() + + # generate preimages. + try: + generate_random_preimages_by_class(ds_name, rpg_options, kernel_options, save_results=save_results, save_preimages=True, load_gm='auto', dir_save=dir_save, irrelevant_labels=irrelevant_labels, edge_required=edge_required) + except Exception as exp: + print('An exception occured when running this experiment:') + LOG_FILENAME = dir_save + 'error.txt' + logging.basicConfig(filename=LOG_FILENAME, level=logging.DEBUG) + logging.exception('') + print(repr(exp)) + + +def xp_median_preimage_8_2(): + """xp 8_2: Monoterpenoides, PathUpToH, using CONSTANT. + """ + # set parameters. + ds_name = 'Monoterpenoides' # + rpg_options = {'k': 5, + 'r_max': 10, # + 'l': 500, + 'alphas': None, + 'parallel': True, + 'verbose': 2} + kernel_options = {'name': 'PathUpToH', + 'depth': 7, # + 'k_func': 'MinMax', # + 'compute_method': 'trie', + 'parallel': 'imap_unordered', + # 'parallel': None, + 'n_jobs': multiprocessing.cpu_count(), + 'normalize': True, + 'verbose': 0} + save_results = True + dir_save = dir_root + ds_name + '.' + kernel_options['name'] + '/' + irrelevant_labels = {'edge_labels': ['valence']} # + edge_required = False # + + if not os.path.exists(dir_save): + os.makedirs(dir_save) + file_output = open(dir_save + 'output.txt', 'a') + sys.stdout = file_output + + # print settings. + print('parameters:') + print('dataset name:', ds_name) + print('kernel_options:', kernel_options) + print('save_results:', save_results) + print('irrelevant_labels:', irrelevant_labels) + print() + + # generate preimages. + try: + generate_random_preimages_by_class(ds_name, rpg_options, kernel_options, save_results=save_results, save_preimages=True, load_gm='auto', dir_save=dir_save, irrelevant_labels=irrelevant_labels, edge_required=edge_required) + except Exception as exp: + print('An exception occured when running this experiment:') + LOG_FILENAME = dir_save + 'error.txt' + logging.basicConfig(filename=LOG_FILENAME, level=logging.DEBUG) + logging.exception('') + print(repr(exp)) + + +def xp_median_preimage_8_3(): + """xp 8_3: Monoterpenoides, Treelet, using CONSTANT. + """ + from gklearn.utils.kernels import polynomialkernel + # set parameters. + ds_name = 'Monoterpenoides' # + rpg_options = {'k': 5, + 'r_max': 10, # + 'l': 500, + 'alphas': None, + 'parallel': True, + 'verbose': 0} + pkernel = functools.partial(polynomialkernel, d=2, c=1e+5) + kernel_options = {'name': 'Treelet', + 'sub_kernel': pkernel, + 'parallel': 'imap_unordered', + # 'parallel': None, + 'n_jobs': multiprocessing.cpu_count(), + 'normalize': True, + 'verbose': 0} + save_results = True + dir_save = dir_root + ds_name + '.' + kernel_options['name'] + '/' + irrelevant_labels = {'edge_labels': ['valence']} # + edge_required = False # + + if not os.path.exists(dir_save): + os.makedirs(dir_save) + file_output = open(dir_save + 'output.txt', 'a') + sys.stdout = file_output + + # print settings. + print('parameters:') + print('dataset name:', ds_name) + print('kernel_options:', kernel_options) + print('save_results:', save_results) + print('irrelevant_labels:', irrelevant_labels) + print() + + # generate preimages. + try: + generate_random_preimages_by_class(ds_name, rpg_options, kernel_options, save_results=save_results, save_preimages=True, load_gm='auto', dir_save=dir_save, irrelevant_labels=irrelevant_labels, edge_required=edge_required) + except Exception as exp: + print('An exception occured when running this experiment:') + LOG_FILENAME = dir_save + 'error.txt' + logging.basicConfig(filename=LOG_FILENAME, level=logging.DEBUG) + logging.exception('') + print(repr(exp)) + + +def xp_median_preimage_8_4(): + """xp 8_4: Monoterpenoides, WeisfeilerLehman, using CONSTANT. + """ + # set parameters. + ds_name = 'Monoterpenoides' # + rpg_options = {'k': 5, + 'r_max': 10, # + 'l': 500, + 'alphas': None, + 'parallel': True, + 'verbose': 2} + kernel_options = {'name': 'WeisfeilerLehman', + 'height': 4, + 'base_kernel': 'subtree', + 'parallel': 'imap_unordered', + # 'parallel': None, + 'n_jobs': multiprocessing.cpu_count(), + 'normalize': True, + 'verbose': 0} + save_results = True + dir_save = dir_root + ds_name + '.' + kernel_options['name'] + '/' + irrelevant_labels = {'edge_labels': ['valence']} # + edge_required = False # + + if not os.path.exists(dir_save): + os.makedirs(dir_save) + file_output = open(dir_save + 'output.txt', 'a') + sys.stdout = file_output + + # print settings. + print('parameters:') + print('dataset name:', ds_name) + print('kernel_options:', kernel_options) + print('save_results:', save_results) + print('irrelevant_labels:', irrelevant_labels) + print() + + # generate preimages. + try: + generate_random_preimages_by_class(ds_name, rpg_options, kernel_options, save_results=save_results, save_preimages=True, load_gm='auto', dir_save=dir_save, irrelevant_labels=irrelevant_labels, edge_required=edge_required) + except Exception as exp: + print('An exception occured when running this experiment:') + LOG_FILENAME = dir_save + 'error.txt' + logging.basicConfig(filename=LOG_FILENAME, level=logging.DEBUG) + logging.exception('') + print(repr(exp)) + + +def xp_median_preimage_7_1(): + """xp 7_1: MUTAG, StructuralSP, using CONSTANT. + """ + # set parameters. + ds_name = 'MUTAG' # + rpg_options = {'k': 5, + 'r_max': 10, # + 'l': 500, + 'alphas': None, + 'parallel': True, + 'verbose': 2} + mixkernel = functools.partial(kernelproduct, deltakernel, gaussiankernel) + sub_kernels = {'symb': deltakernel, 'nsymb': gaussiankernel, 'mix': mixkernel} + kernel_options = {'name': 'StructuralSP', + 'edge_weight': None, + 'node_kernels': sub_kernels, + 'edge_kernels': sub_kernels, + 'compute_method': 'naive', + 'parallel': 'imap_unordered', + # 'parallel': None, + 'n_jobs': multiprocessing.cpu_count(), + 'normalize': True, + 'verbose': 0} + save_results = True + dir_save = dir_root + ds_name + '.' + kernel_options['name'] + '/' + irrelevant_labels = {'edge_labels': ['label_0']} # + edge_required = False # + + if not os.path.exists(dir_save): + os.makedirs(dir_save) + file_output = open(dir_save + 'output.txt', 'a') + sys.stdout = file_output + + # print settings. + print('parameters:') + print('dataset name:', ds_name) + print('kernel_options:', kernel_options) + print('save_results:', save_results) + print('irrelevant_labels:', irrelevant_labels) + print() + + # generate preimages. + try: + generate_random_preimages_by_class(ds_name, rpg_options, kernel_options, save_results=save_results, save_preimages=True, load_gm='auto', dir_save=dir_save, irrelevant_labels=irrelevant_labels, edge_required=edge_required) + except Exception as exp: + print('An exception occured when running this experiment:') + LOG_FILENAME = dir_save + 'error.txt' + logging.basicConfig(filename=LOG_FILENAME, level=logging.DEBUG) + logging.exception('') + print(repr(exp)) + + +def xp_median_preimage_7_2(): + """xp 7_2: MUTAG, PathUpToH, using CONSTANT. + """ + # set parameters. + ds_name = 'MUTAG' # + rpg_options = {'k': 5, + 'r_max': 10, # + 'l': 500, + 'alphas': None, + 'parallel': True, + 'verbose': 2} + kernel_options = {'name': 'PathUpToH', + 'depth': 2, # + 'k_func': 'MinMax', # + 'compute_method': 'trie', + 'parallel': 'imap_unordered', + # 'parallel': None, + 'n_jobs': multiprocessing.cpu_count(), + 'normalize': True, + 'verbose': 0} + save_results = True + dir_save = dir_root + ds_name + '.' + kernel_options['name'] + '/' + irrelevant_labels = {'edge_labels': ['label_0']} # + edge_required = False # + + if not os.path.exists(dir_save): + os.makedirs(dir_save) + file_output = open(dir_save + 'output.txt', 'a') + sys.stdout = file_output + + # print settings. + print('parameters:') + print('dataset name:', ds_name) + print('kernel_options:', kernel_options) + print('save_results:', save_results) + print('irrelevant_labels:', irrelevant_labels) + print() + + # generate preimages. + try: + generate_random_preimages_by_class(ds_name, rpg_options, kernel_options, save_results=save_results, save_preimages=True, load_gm='auto', dir_save=dir_save, irrelevant_labels=irrelevant_labels, edge_required=edge_required, cut_range=None) + except Exception as exp: + print('An exception occured when running experiment on xp_median_preimage_7_2:') + LOG_FILENAME = dir_save + 'error.txt' + logging.basicConfig(filename=LOG_FILENAME, level=logging.DEBUG) + logging.exception('') + print(repr(exp)) + + +def xp_median_preimage_7_3(): + """xp 7_3: MUTAG, Treelet, using CONSTANT. + """ + from gklearn.utils.kernels import polynomialkernel + # set parameters. + ds_name = 'MUTAG' # + rpg_options = {'k': 5, + 'r_max': 10, # + 'l': 500, + 'alphas': None, + 'parallel': True, + 'verbose': 2} + pkernel = functools.partial(polynomialkernel, d=3, c=1e+8) + kernel_options = {'name': 'Treelet', + 'sub_kernel': pkernel, + 'parallel': 'imap_unordered', + # 'parallel': None, + 'n_jobs': multiprocessing.cpu_count(), + 'normalize': True, + 'verbose': 0} + save_results = True + dir_save = dir_root + ds_name + '.' + kernel_options['name'] + '/' + irrelevant_labels = {'edge_labels': ['label_0']} # + edge_required = False # + + if not os.path.exists(dir_save): + os.makedirs(dir_save) + file_output = open(dir_save + 'output.txt', 'a') + sys.stdout = file_output + + # print settings. + print('parameters:') + print('dataset name:', ds_name) + print('kernel_options:', kernel_options) + print('save_results:', save_results) + print('irrelevant_labels:', irrelevant_labels) + print() + + # generate preimages. + try: + generate_random_preimages_by_class(ds_name, rpg_options, kernel_options, save_results=save_results, save_preimages=True, load_gm='auto', dir_save=dir_save, irrelevant_labels=irrelevant_labels, edge_required=edge_required) + except Exception as exp: + print('An exception occured when running this experiment:') + LOG_FILENAME = dir_save + 'error.txt' + logging.basicConfig(filename=LOG_FILENAME, level=logging.DEBUG) + logging.exception('') + print(repr(exp)) + + +def xp_median_preimage_7_4(): + """xp 7_4: MUTAG, WeisfeilerLehman, using CONSTANT. + """ + # set parameters. + ds_name = 'MUTAG' # + rpg_options = {'k': 5, + 'r_max': 10, # + 'l': 500, + 'alphas': None, + 'parallel': True, + 'verbose': 2} + kernel_options = {'name': 'WeisfeilerLehman', + 'height': 1, + 'base_kernel': 'subtree', + 'parallel': 'imap_unordered', + # 'parallel': None, + 'n_jobs': multiprocessing.cpu_count(), + 'normalize': True, + 'verbose': 0} + save_results = True + dir_save = dir_root + ds_name + '.' + kernel_options['name'] + '/' + irrelevant_labels = {'edge_labels': ['label_0']} # + edge_required = False # + + if not os.path.exists(dir_save): + os.makedirs(dir_save) + file_output = open(dir_save + 'output.txt', 'a') + sys.stdout = file_output + + # print settings. + print('parameters:') + print('dataset name:', ds_name) + print('kernel_options:', kernel_options) + print('save_results:', save_results) + print('irrelevant_labels:', irrelevant_labels) + print() + + # generate preimages. + try: + generate_random_preimages_by_class(ds_name, rpg_options, kernel_options, save_results=save_results, save_preimages=True, load_gm='auto', dir_save=dir_save, irrelevant_labels=irrelevant_labels, edge_required=edge_required) + except Exception as exp: + print('An exception occured when running this experiment:') + LOG_FILENAME = dir_save + 'error.txt' + logging.basicConfig(filename=LOG_FILENAME, level=logging.DEBUG) + logging.exception('') + print(repr(exp)) + + +if __name__ == "__main__": + +# #### xp 7_2: MUTAG, PathUpToH, using CONSTANT. + xp_median_preimage_7_2() + +# #### xp 7_3: MUTAG, Treelet, using CONSTANT. + xp_median_preimage_7_3() + +# #### xp 7_4: MUTAG, WeisfeilerLehman, using CONSTANT. + xp_median_preimage_7_4() +# +# #### xp 7_1: MUTAG, StructuralSP, using CONSTANT. + xp_median_preimage_7_1() + +# #### xp 8_2: Monoterpenoides, PathUpToH, using CONSTANT. + xp_median_preimage_8_2() + +# #### xp 8_3: Monoterpenoides, Treelet, using CONSTANT. + xp_median_preimage_8_3() + +# #### xp 8_4: Monoterpenoides, WeisfeilerLehman, using CONSTANT. + xp_median_preimage_8_4() + +# #### xp 8_1: Monoterpenoides, StructuralSP, using CONSTANT. + xp_median_preimage_8_1() + +# #### xp 9_2: MAO, PathUpToH, using CONSTANT, symbolic only. + xp_median_preimage_9_2() + +# #### xp 9_3: MAO, Treelet, using CONSTANT, symbolic only. + xp_median_preimage_9_3() + +# #### xp 9_4: MAO, WeisfeilerLehman, using CONSTANT, symbolic only. + xp_median_preimage_9_4() + +# #### xp 9_1: MAO, StructuralSP, using CONSTANT, symbolic only. + xp_median_preimage_9_1() + + #### xp 12_1: PAH, StructuralSP, using NON_SYMBOLIC, unlabeled. + xp_median_preimage_12_1() + + #### xp 12_2: PAH, PathUpToH, using CONSTANT, unlabeled. + xp_median_preimage_12_2() + + #### xp 12_3: PAH, Treelet, using CONSTANT, unlabeled. + xp_median_preimage_12_3() + + #### xp 12_4: PAH, WeisfeilerLehman, using CONSTANT, unlabeled. + xp_median_preimage_12_4() + + #### xp 12_5: PAH, ShortestPath, using NON_SYMBOLIC, unlabeled. + xp_median_preimage_12_5() + + # #### xp 15_1: AIDS, StructuralSP, using CONSTANT, symbolic only. + xp_median_preimage_15_1() + +# #### xp 15_2: AIDS, PathUpToH, using CONSTANT, symbolic only. + xp_median_preimage_15_2() + +# #### xp 15_3: AIDS, Treelet, using CONSTANT, symbolic only. + xp_median_preimage_15_3() + +# #### xp 15_4: AIDS, WeisfeilerLehman, using CONSTANT, symbolic only. + xp_median_preimage_15_4() +# + #### xp 14_1: DD, PathUpToH, using CONSTANT. + xp_median_preimage_14_1() diff --git a/gklearn/preimage/generate_random_preimages_by_class.py b/gklearn/preimage/generate_random_preimages_by_class.py new file mode 100644 index 0000000..656579f --- /dev/null +++ b/gklearn/preimage/generate_random_preimages_by_class.py @@ -0,0 +1,188 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +""" +Created on Mon Jun 1 17:02:51 2020 + +@author: ljia +""" + +import numpy as np +from gklearn.utils import Dataset +import csv +import os +import os.path +from gklearn.preimage import RandomPreimageGenerator +from gklearn.utils import split_dataset_by_target +from gklearn.utils.graphfiles import saveGXL + + +def generate_random_preimages_by_class(ds_name, rpg_options, kernel_options, save_results=True, save_preimages=True, load_gm='auto', dir_save='', irrelevant_labels=None, edge_required=False, cut_range=None): + # 1. get dataset. + print('1. getting dataset...') + dataset_all = Dataset() + dataset_all.load_predefined_dataset(ds_name) + dataset_all.trim_dataset(edge_required=edge_required) + if irrelevant_labels is not None: + dataset_all.remove_labels(**irrelevant_labels) + if cut_range is not None: + dataset_all.cut_graphs(cut_range) + datasets = split_dataset_by_target(dataset_all) + + if save_results: + # create result files. + print('creating output files...') + fn_output_detail, fn_output_summary = __init_output_file_preimage(ds_name, kernel_options['name'], dir_save) + + + dis_k_dataset_list = [] + dis_k_preimage_list = [] + time_precompute_gm_list = [] + time_generate_list = [] + time_total_list = [] + itrs_list = [] + num_updates_list = [] + if load_gm == 'auto': + gm_fname = dir_save + 'gram_matrix_unnorm.' + ds_name + '.' + kernel_options['name'] + '.gm.npz' + gmfile_exist = os.path.isfile(os.path.abspath(gm_fname)) + if gmfile_exist: + gmfile = np.load(gm_fname, allow_pickle=True) # @todo: may not be safe. + gram_matrix_unnorm_list = [item for item in gmfile['gram_matrix_unnorm_list']] + time_precompute_gm_list = gmfile['run_time_list'].tolist() + else: + gram_matrix_unnorm_list = [] + time_precompute_gm_list = [] + elif not load_gm: + gram_matrix_unnorm_list = [] + time_precompute_gm_list = [] + else: + gm_fname = dir_save + 'gram_matrix_unnorm.' + ds_name + '.' + kernel_options['name'] + '.gm.npz' + gmfile = np.load(gm_fname, allow_pickle=True) # @todo: may not be safe. + gram_matrix_unnorm_list = [item for item in gmfile['gram_matrix_unnorm_list']] + time_precompute_gm_list = gmfile['run_time_list'].tolist() + + print('starting generating preimage for each class of target...') + idx_offset = 0 + for idx, dataset in enumerate(datasets): + target = dataset.targets[0] + print('\ntarget =', target, '\n') +# if target != 1: +# continue + + num_graphs = len(dataset.graphs) + if num_graphs < 2: + print('\nnumber of graphs = ', num_graphs, ', skip.\n') + idx_offset += 1 + continue + + # 2. set parameters. + print('2. initializing mpg and setting parameters...') + if load_gm: + if gmfile_exist: + rpg_options['gram_matrix_unnorm'] = gram_matrix_unnorm_list[idx - idx_offset] + rpg_options['runtime_precompute_gm'] = time_precompute_gm_list[idx - idx_offset] + rpg = RandomPreimageGenerator() + rpg.dataset = dataset + rpg.set_options(**rpg_options.copy()) + rpg.kernel_options = kernel_options.copy() + + # 3. compute preimage. + print('3. computing preimage...') + rpg.run() + results = rpg.get_results() + + # 4. save results (and median graphs). + print('4. saving results (and preimages)...') + # write result detail. + if save_results: + print('writing results to files...') + + f_detail = open(dir_save + fn_output_detail, 'a') + csv.writer(f_detail).writerow([ds_name, kernel_options['name'], + num_graphs, target, 1, + results['k_dis_dataset'], results['k_dis_preimage'], + results['runtime_precompute_gm'], + results['runtime_generate_preimage'], results['runtime_total'], + results['itrs'], results['num_updates']]) + f_detail.close() + + # compute result summary. + dis_k_dataset_list.append(results['k_dis_dataset']) + dis_k_preimage_list.append(results['k_dis_preimage']) + time_precompute_gm_list.append(results['runtime_precompute_gm']) + time_generate_list.append(results['runtime_generate_preimage']) + time_total_list.append(results['runtime_total']) + itrs_list.append(results['itrs']) + num_updates_list.append(results['num_updates']) + + # write result summary for each letter. + f_summary = open(dir_save + fn_output_summary, 'a') + csv.writer(f_summary).writerow([ds_name, kernel_options['name'], + num_graphs, target, + results['k_dis_dataset'], results['k_dis_preimage'], + results['runtime_precompute_gm'], + results['runtime_generate_preimage'], results['runtime_total'], + results['itrs'], results['num_updates']]) + f_summary.close() + + # save median graphs. + if save_preimages: + if not os.path.exists(dir_save + 'preimages/'): + os.makedirs(dir_save + 'preimages/') + print('Saving preimages to files...') + fn_best_dataset = dir_save + 'preimages/g_best_dataset.' + 'nbg' + str(num_graphs) + '.y' + str(target) + '.repeat' + str(1) + saveGXL(rpg.best_from_dataset, fn_best_dataset + '.gxl', method='default', + node_labels=dataset.node_labels, edge_labels=dataset.edge_labels, + node_attrs=dataset.node_attrs, edge_attrs=dataset.edge_attrs) + fn_preimage = dir_save + 'preimages/g_preimage.' + 'nbg' + str(num_graphs) + '.y' + str(target) + '.repeat' + str(1) + saveGXL(rpg.preimage, fn_preimage + '.gxl', method='default', + node_labels=dataset.node_labels, edge_labels=dataset.edge_labels, + node_attrs=dataset.node_attrs, edge_attrs=dataset.edge_attrs) + + if (load_gm == 'auto' and not gmfile_exist) or not load_gm: + gram_matrix_unnorm_list.append(rpg.gram_matrix_unnorm) + + # write result summary for each class. + if save_results: + dis_k_dataset_mean = np.mean(dis_k_dataset_list) + dis_k_preimage_mean = np.mean(dis_k_preimage_list) + time_precompute_gm_mean = np.mean(time_precompute_gm_list) + time_generate_mean = np.mean(time_generate_list) + time_total_mean = np.mean(time_total_list) + itrs_mean = np.mean(itrs_list) + num_updates_mean = np.mean(num_updates_list) + f_summary = open(dir_save + fn_output_summary, 'a') + csv.writer(f_summary).writerow([ds_name, kernel_options['name'], + num_graphs, 'all', + dis_k_dataset_mean, dis_k_preimage_mean, + time_precompute_gm_mean, + time_generate_mean, time_total_mean, itrs_mean, + num_updates_mean]) + f_summary.close() + + # write Gram matrices to file. + if (load_gm == 'auto' and not gmfile_exist) or not load_gm: + np.savez(dir_save + 'gram_matrix_unnorm.' + ds_name + '.' + kernel_options['name'] + '.gm', gram_matrix_unnorm_list=gram_matrix_unnorm_list, run_time_list=time_precompute_gm_list) + + print('\ncomplete.\n') + + +def __init_output_file_preimage(ds_name, gkernel, dir_output): + if not os.path.exists(dir_output): + os.makedirs(dir_output) + fn_output_detail = 'results_detail.' + ds_name + '.' + gkernel + '.csv' + f_detail = open(dir_output + fn_output_detail, 'a') + csv.writer(f_detail).writerow(['dataset', 'graph kernel', 'num graphs', + 'target', 'repeat', 'dis_k best from dataset', 'dis_k preimage', + 'time precompute gm', 'time generate preimage', 'time total', + 'itrs', 'num updates']) + f_detail.close() + + fn_output_summary = 'results_summary.' + ds_name + '.' + gkernel + '.csv' + f_summary = open(dir_output + fn_output_summary, 'a') + csv.writer(f_summary).writerow(['dataset', 'graph kernel', 'num graphs', + 'target', 'dis_k best from dataset', 'dis_k preimage', + 'time precompute gm', 'time generate preimage', 'time total', + 'itrs', 'num updates']) + f_summary.close() + + return fn_output_detail, fn_output_summary \ No newline at end of file diff --git a/gklearn/preimage/utils.py b/gklearn/preimage/utils.py index 5ca0c1e..6bc10c7 100644 --- a/gklearn/preimage/utils.py +++ b/gklearn/preimage/utils.py @@ -256,7 +256,7 @@ def generate_median_preimages_by_class(ds_name, mpg_options, kernel_options, ged if (load_gm == 'auto' and not gmfile_exist) or not load_gm: gram_matrix_unnorm_list.append(mpg.gram_matrix_unnorm) - # write result summary for each letter. + # write result summary for each class. if save_results: sod_sm_mean = np.mean(sod_sm_list) sod_gm_mean = np.mean(sod_gm_list) From 980dd1c9cf857f2808c69c2a6383abc1da8e48f8 Mon Sep 17 00:00:00 2001 From: jajupmochi Date: Tue, 2 Jun 2020 11:34:57 +0200 Subject: [PATCH 03/17] Add parallel to RPG. --- gklearn/preimage/random_preimage_generator.py | 205 +++++++++++++++++++------- 1 file changed, 151 insertions(+), 54 deletions(-) diff --git a/gklearn/preimage/random_preimage_generator.py b/gklearn/preimage/random_preimage_generator.py index b2da2b2..f3dfb48 100644 --- a/gklearn/preimage/random_preimage_generator.py +++ b/gklearn/preimage/random_preimage_generator.py @@ -10,9 +10,11 @@ import numpy as np import time import random import sys -import tqdm +from tqdm import tqdm import multiprocessing import networkx as nx +from multiprocessing import Pool +from functools import partial from gklearn.preimage import PreimageGenerator from gklearn.preimage.utils import compute_k_dis from gklearn.utils import Timer @@ -144,12 +146,14 @@ class RandomPreimageGenerator(PreimageGenerator): dihat_list = [] r = 0 dis_of_each_itr = [dhat] + if self.__parallel: + self._kernel_options['parallel'] = None while r < self.__r_max: print('\n- r =', r) found = False dis_bests = dis_gs + dihat_list - # compute numbers of nodes to be inserted/deleted. + # compute numbers of edges to be inserted/deleted. # @todo what if the log is negetive? how to choose alpha (scalar)? fdgs_list = np.array(dis_bests) if np.min(fdgs_list) < 1: @@ -161,54 +165,7 @@ class RandomPreimageGenerator(PreimageGenerator): for ig, gs in enumerate(Gs_nearest + gihat_list): if self._verbose >= 2: print('-- computing', ig + 1, 'graphs out of', len(Gs_nearest) + len(gihat_list)) - for trail in range(0, self.__l): - if self._verbose >= 2: - print('---', trail + 1, 'trail out of', self.__l) - - # add and delete edges. - gtemp = gs.copy() - np.random.seed() # @todo: may not work for possible parallel. - # which edges to change. - # @todo: should we use just half of the adjacency matrix for undirected graphs? - nb_vpairs = nx.number_of_nodes(gs) * (nx.number_of_nodes(gs) - 1) - # @todo: what if fdgs is bigger than nb_vpairs? - idx_change = random.sample(range(nb_vpairs), fdgs_list[ig] if - fdgs_list[ig] < nb_vpairs else nb_vpairs) - for item in idx_change: - node1 = int(item / (nx.number_of_nodes(gs) - 1)) - node2 = (item - node1 * (nx.number_of_nodes(gs) - 1)) - if node2 >= node1: # skip the self pair. - node2 += 1 - # @todo: is the randomness correct? - if not gtemp.has_edge(node1, node2): - gtemp.add_edge(node1, node2) - else: - gtemp.remove_edge(node1, node2) - - # compute new distances. - kernels_to_gtmp, _ = self._graph_kernel.compute(gtemp, D_N, **self._kernel_options) - kernel_gtmp, _ = self._graph_kernel.compute(gtemp, gtemp, **self._kernel_options) - kernels_to_gtmp = [kernels_to_gtmp[i] / np.sqrt(self.__gram_matrix_unnorm[i, i] * kernel_gtmp) for i in range(len(kernels_to_gtmp))] # normalize - # @todo: not correct kernel value - gram_with_gtmp = np.concatenate((np.array([kernels_to_gtmp]), np.copy(self._graph_kernel.gram_matrix)), axis=0) - gram_with_gtmp = np.concatenate((np.array([[1] + kernels_to_gtmp]).T, gram_with_gtmp), axis=1) - dnew = compute_k_dis(0, range(1, 1 + len(D_N)), self.__alphas, gram_with_gtmp, term3=term3, withterm3=True) - - # get the better graph preimage. - if dnew <= dhat: # @todo: the new distance is smaller or also equal? - if dnew < dhat: - if self._verbose >= 2: - print('trail =', str(trail)) - print('\nI am smaller!') - print('index (as in D_k U {gihat} =', str(ig)) - print('distance:', dhat, '->', dnew) - self.__num_updates += 1 - elif dnew == dhat: - if self._verbose >= 2: - print('I am equal!') - dhat = dnew - gnew = gtemp.copy() - found = True # found better graph. + gnew, dhat, found = self.__generate_l_graphs(gs, fdgs_list[ig], dhat, ig, found, term3) if found: r = 0 @@ -220,10 +177,9 @@ class RandomPreimageGenerator(PreimageGenerator): dis_of_each_itr.append(dhat) self.__itrs += 1 if self._verbose >= 2: - print('Total number of iterations is', self.__itrs) + print('Total number of iterations is', self.__itrs, '.') print('The preimage is updated', self.__num_updates, 'times.') - print('The shortest distances for previous iterations are', dis_of_each_itr) - + print('The shortest distances for previous iterations are', dis_of_each_itr, '.') # get results and print. @@ -245,8 +201,149 @@ class RandomPreimageGenerator(PreimageGenerator): print('Time to generate pre-images:', self.__runtime_generate_preimage) print('Total time:', self.__runtime_total) print('=============================================================================') - print() + print() + + + def __generate_l_graphs(self, g_init, fdgs, dhat, ig, found, term3): + if self.__parallel: + gnew, dhat, found = self.__generate_l_graphs_parallel(g_init, fdgs, dhat, ig, found, term3) + else: + gnew, dhat, found = self.__generate_l_graphs_series(g_init, fdgs, dhat, ig, found, term3) + return gnew, dhat, found + + + def __generate_l_graphs_series(self, g_init, fdgs, dhat, ig, found, term3): + gnew = None + for trail in range(0, self.__l): + if self._verbose >= 2: + print('---', trail + 1, 'trail out of', self.__l) + + # add and delete edges. + gtemp = g_init.copy() + np.random.seed() # @todo: may not work for possible parallel. + # which edges to change. + # @todo: should we use just half of the adjacency matrix for undirected graphs? + nb_vpairs = nx.number_of_nodes(g_init) * (nx.number_of_nodes(g_init) - 1) + # @todo: what if fdgs is bigger than nb_vpairs? + idx_change = random.sample(range(nb_vpairs), fdgs if + fdgs < nb_vpairs else nb_vpairs) + for item in idx_change: + node1 = int(item / (nx.number_of_nodes(g_init) - 1)) + node2 = (item - node1 * (nx.number_of_nodes(g_init) - 1)) + if node2 >= node1: # skip the self pair. + node2 += 1 + # @todo: is the randomness correct? + if not gtemp.has_edge(node1, node2): + gtemp.add_edge(node1, node2) + else: + gtemp.remove_edge(node1, node2) + + # compute new distances. + kernels_to_gtmp, _ = self._graph_kernel.compute(gtemp, self._dataset.graphs, **self._kernel_options) + kernel_gtmp, _ = self._graph_kernel.compute(gtemp, gtemp, **self._kernel_options) + kernels_to_gtmp = [kernels_to_gtmp[i] / np.sqrt(self.__gram_matrix_unnorm[i, i] * kernel_gtmp) for i in range(len(kernels_to_gtmp))] # normalize + # @todo: not correct kernel value + gram_with_gtmp = np.concatenate((np.array([kernels_to_gtmp]), np.copy(self._graph_kernel.gram_matrix)), axis=0) + gram_with_gtmp = np.concatenate((np.array([[1] + kernels_to_gtmp]).T, gram_with_gtmp), axis=1) + dnew = compute_k_dis(0, range(1, 1 + len(self._dataset.graphs)), self.__alphas, gram_with_gtmp, term3=term3, withterm3=True) + + # get the better graph preimage. + if dnew <= dhat: # @todo: the new distance is smaller or also equal? + if dnew < dhat: + if self._verbose >= 2: + print('trail =', str(trail)) + print('\nI am smaller!') + print('index (as in D_k U {gihat} =', str(ig)) + print('distance:', dhat, '->', dnew) + self.__num_updates += 1 + elif dnew == dhat: + if self._verbose >= 2: + print('I am equal!') + dhat = dnew + gnew = gtemp.copy() + found = True # found better graph. + + return gnew, dhat, found + + + def __generate_l_graphs_parallel(self, g_init, fdgs, dhat, ig, found, term3): + gnew = None + len_itr = self.__l + gnew_list = [None] * len_itr + dnew_list = [None] * len_itr + itr = range(0, len_itr) + n_jobs = multiprocessing.cpu_count() + if len_itr < 100 * n_jobs: + chunksize = int(len_itr / n_jobs) + 1 + else: + chunksize = 100 + do_fun = partial(self._generate_graph_parallel, g_init, fdgs, term3) + pool = Pool(processes=n_jobs) + if self._verbose >= 2: + iterator = tqdm(pool.imap_unordered(do_fun, itr, chunksize), + desc='Generating l graphs', file=sys.stdout) + else: + iterator = pool.imap_unordered(do_fun, itr, chunksize) + for idx, gnew, dnew in iterator: + gnew_list[idx] = gnew + dnew_list[idx] = dnew + pool.close() + pool.join() + + # check if get the better graph preimage. + idx_min = np.argmin(dnew_list) + dnew = dnew_list[idx_min] + if dnew <= dhat: # @todo: the new distance is smaller or also equal? + if dnew < dhat: + if self._verbose >= 2: + print('\nI am smaller!') + print('index (as in D_k U {gihat} =', str(ig)) + print('distance:', dhat, '->', dnew) + self.__num_updates += 1 + elif dnew == dhat: + if self._verbose >= 2: + print('I am equal!') + dhat = dnew + gnew = gnew_list[idx_min] + found = True # found better graph. + + return gnew, dhat, found + + + def _generate_graph_parallel(self, g_init, fdgs, term3, itr): + trail = itr + # add and delete edges. + gtemp = g_init.copy() + np.random.seed() # @todo: may not work for possible parallel. + # which edges to change. + # @todo: should we use just half of the adjacency matrix for undirected graphs? + nb_vpairs = nx.number_of_nodes(g_init) * (nx.number_of_nodes(g_init) - 1) + # @todo: what if fdgs is bigger than nb_vpairs? + idx_change = random.sample(range(nb_vpairs), fdgs if + fdgs < nb_vpairs else nb_vpairs) + for item in idx_change: + node1 = int(item / (nx.number_of_nodes(g_init) - 1)) + node2 = (item - node1 * (nx.number_of_nodes(g_init) - 1)) + if node2 >= node1: # skip the self pair. + node2 += 1 + # @todo: is the randomness correct? + if not gtemp.has_edge(node1, node2): + gtemp.add_edge(node1, node2) + else: + gtemp.remove_edge(node1, node2) + + # compute new distances. + kernels_to_gtmp, _ = self._graph_kernel.compute(gtemp, self._dataset.graphs, **self._kernel_options) + kernel_gtmp, _ = self._graph_kernel.compute(gtemp, gtemp, **self._kernel_options) + kernels_to_gtmp = [kernels_to_gtmp[i] / np.sqrt(self.__gram_matrix_unnorm[i, i] * kernel_gtmp) for i in range(len(kernels_to_gtmp))] # normalize + # @todo: not correct kernel value + gram_with_gtmp = np.concatenate((np.array([kernels_to_gtmp]), np.copy(self._graph_kernel.gram_matrix)), axis=0) + gram_with_gtmp = np.concatenate((np.array([[1] + kernels_to_gtmp]).T, gram_with_gtmp), axis=1) + dnew = compute_k_dis(0, range(1, 1 + len(self._dataset.graphs)), self.__alphas, gram_with_gtmp, term3=term3, withterm3=True) + + return trail, gtemp, dnew + def get_results(self): results = {} From 5c3651e12ca030206ba73f85943b4d39b488fbd7 Mon Sep 17 00:00:00 2001 From: jajupmochi Date: Wed, 3 Jun 2020 12:27:30 +0200 Subject: [PATCH 04/17] Fix bug: random number genration in RPG when paralelling. --- gklearn/preimage/random_preimage_generator.py | 63 ++++++++++----------------- gklearn/preimage/utils.py | 8 ++-- 2 files changed, 26 insertions(+), 45 deletions(-) diff --git a/gklearn/preimage/random_preimage_generator.py b/gklearn/preimage/random_preimage_generator.py index f3dfb48..70c5953 100644 --- a/gklearn/preimage/random_preimage_generator.py +++ b/gklearn/preimage/random_preimage_generator.py @@ -8,7 +8,6 @@ Created on Fri May 29 14:29:52 2020 import numpy as np import time -import random import sys from tqdm import tqdm import multiprocessing @@ -157,7 +156,7 @@ class RandomPreimageGenerator(PreimageGenerator): # @todo what if the log is negetive? how to choose alpha (scalar)? fdgs_list = np.array(dis_bests) if np.min(fdgs_list) < 1: - fdgs_list /= np.min(dis_bests) + fdgs_list /= np.min(fdgs_list) fdgs_list = [int(item) for item in np.ceil(np.log(fdgs_list))] if np.min(fdgs_list) < 1: fdgs_list = np.array(fdgs_list) + 1 @@ -214,54 +213,31 @@ class RandomPreimageGenerator(PreimageGenerator): def __generate_l_graphs_series(self, g_init, fdgs, dhat, ig, found, term3): gnew = None - for trail in range(0, self.__l): + updated = False + for trial in range(0, self.__l): if self._verbose >= 2: - print('---', trail + 1, 'trail out of', self.__l) + print('---', trial + 1, 'trial out of', self.__l) - # add and delete edges. - gtemp = g_init.copy() - np.random.seed() # @todo: may not work for possible parallel. - # which edges to change. - # @todo: should we use just half of the adjacency matrix for undirected graphs? - nb_vpairs = nx.number_of_nodes(g_init) * (nx.number_of_nodes(g_init) - 1) - # @todo: what if fdgs is bigger than nb_vpairs? - idx_change = random.sample(range(nb_vpairs), fdgs if - fdgs < nb_vpairs else nb_vpairs) - for item in idx_change: - node1 = int(item / (nx.number_of_nodes(g_init) - 1)) - node2 = (item - node1 * (nx.number_of_nodes(g_init) - 1)) - if node2 >= node1: # skip the self pair. - node2 += 1 - # @todo: is the randomness correct? - if not gtemp.has_edge(node1, node2): - gtemp.add_edge(node1, node2) - else: - gtemp.remove_edge(node1, node2) - - # compute new distances. - kernels_to_gtmp, _ = self._graph_kernel.compute(gtemp, self._dataset.graphs, **self._kernel_options) - kernel_gtmp, _ = self._graph_kernel.compute(gtemp, gtemp, **self._kernel_options) - kernels_to_gtmp = [kernels_to_gtmp[i] / np.sqrt(self.__gram_matrix_unnorm[i, i] * kernel_gtmp) for i in range(len(kernels_to_gtmp))] # normalize - # @todo: not correct kernel value - gram_with_gtmp = np.concatenate((np.array([kernels_to_gtmp]), np.copy(self._graph_kernel.gram_matrix)), axis=0) - gram_with_gtmp = np.concatenate((np.array([[1] + kernels_to_gtmp]).T, gram_with_gtmp), axis=1) - dnew = compute_k_dis(0, range(1, 1 + len(self._dataset.graphs)), self.__alphas, gram_with_gtmp, term3=term3, withterm3=True) + gtemp, dnew = self.__do_trial(g_init, fdgs, term3, trial) # get the better graph preimage. if dnew <= dhat: # @todo: the new distance is smaller or also equal? if dnew < dhat: if self._verbose >= 2: - print('trail =', str(trail)) + print('trial =', str(trial)) print('\nI am smaller!') print('index (as in D_k U {gihat} =', str(ig)) print('distance:', dhat, '->', dnew) - self.__num_updates += 1 + updated = True elif dnew == dhat: if self._verbose >= 2: print('I am equal!') dhat = dnew gnew = gtemp.copy() - found = True # found better graph. + found = True # found better or equally good graph. + + if updated: + self.__num_updates += 1 return gnew, dhat, found @@ -311,17 +287,22 @@ class RandomPreimageGenerator(PreimageGenerator): def _generate_graph_parallel(self, g_init, fdgs, term3, itr): - trail = itr - + trial = itr + gtemp, dnew = self.__do_trial(g_init, fdgs, term3, trial) + return trial, gtemp, dnew + + + def __do_trial(self, g_init, fdgs, term3, trial): # add and delete edges. gtemp = g_init.copy() - np.random.seed() # @todo: may not work for possible parallel. + seed = (trial + int(time.time())) % (2 ** 32 - 1) + rdm_state = np.random.RandomState(seed=seed) # which edges to change. # @todo: should we use just half of the adjacency matrix for undirected graphs? nb_vpairs = nx.number_of_nodes(g_init) * (nx.number_of_nodes(g_init) - 1) # @todo: what if fdgs is bigger than nb_vpairs? - idx_change = random.sample(range(nb_vpairs), fdgs if - fdgs < nb_vpairs else nb_vpairs) + idx_change = rdm_state.randint(0, high=nb_vpairs, size=(fdgs if + fdgs < nb_vpairs else nb_vpairs)) for item in idx_change: node1 = int(item / (nx.number_of_nodes(g_init) - 1)) node2 = (item - node1 * (nx.number_of_nodes(g_init) - 1)) @@ -342,7 +323,7 @@ class RandomPreimageGenerator(PreimageGenerator): gram_with_gtmp = np.concatenate((np.array([[1] + kernels_to_gtmp]).T, gram_with_gtmp), axis=1) dnew = compute_k_dis(0, range(1, 1 + len(self._dataset.graphs)), self.__alphas, gram_with_gtmp, term3=term3, withterm3=True) - return trail, gtemp, dnew + return gtemp, dnew def get_results(self): diff --git a/gklearn/preimage/utils.py b/gklearn/preimage/utils.py index 6bc10c7..d4d5d05 100644 --- a/gklearn/preimage/utils.py +++ b/gklearn/preimage/utils.py @@ -387,15 +387,15 @@ def dis_gstar(idx_g, idx_gi, alpha, Kmatrix, term3=0, withterm3=True): return np.sqrt(term1 - term2 + term3) -def compute_k_dis(idx_g, idx_gi, alpha, Kmatrix, term3=0, withterm3=True): +def compute_k_dis(idx_g, idx_gi, alphas, Kmatrix, term3=0, withterm3=True): term1 = Kmatrix[idx_g, idx_g] term2 = 0 - for i, a in enumerate(alpha): + for i, a in enumerate(alphas): term2 += a * Kmatrix[idx_g, idx_gi[i]] term2 *= 2 if withterm3 == False: - for i1, a1 in enumerate(alpha): - for i2, a2 in enumerate(alpha): + for i1, a1 in enumerate(alphas): + for i2, a2 in enumerate(alphas): term3 += a1 * a2 * Kmatrix[idx_gi[i1], idx_gi[i2]] return np.sqrt(term1 - term2 + term3) From 87e06e9812dd408b09f7f5d7a8c60dded187caed Mon Sep 17 00:00:00 2001 From: jajupmochi Date: Wed, 3 Jun 2020 15:45:36 +0200 Subject: [PATCH 05/17] expand the number of modifications to increase the possiblity in PRG. --- gklearn/preimage/random_preimage_generator.py | 21 +++++++++++++++++++-- 1 file changed, 19 insertions(+), 2 deletions(-) diff --git a/gklearn/preimage/random_preimage_generator.py b/gklearn/preimage/random_preimage_generator.py index 70c5953..d4d603c 100644 --- a/gklearn/preimage/random_preimage_generator.py +++ b/gklearn/preimage/random_preimage_generator.py @@ -155,11 +155,27 @@ class RandomPreimageGenerator(PreimageGenerator): # compute numbers of edges to be inserted/deleted. # @todo what if the log is negetive? how to choose alpha (scalar)? fdgs_list = np.array(dis_bests) - if np.min(fdgs_list) < 1: + if np.min(fdgs_list) < 1: # in case the log is negetive. fdgs_list /= np.min(fdgs_list) fdgs_list = [int(item) for item in np.ceil(np.log(fdgs_list))] - if np.min(fdgs_list) < 1: + if np.min(fdgs_list) < 1: # in case the log is smaller than 1. fdgs_list = np.array(fdgs_list) + 1 + # expand the number of modifications to increase the possiblity. + nb_vpairs_list = [nx.number_of_nodes(g) * (nx.number_of_nodes(g) - 1) for g in (Gs_nearest + gihat_list)] + nb_vpairs_min = np.min(nb_vpairs_list) + idx_fdgs_max = np.argmax(fdgs_list) + fdgs_max_old = fdgs_list[idx_fdgs_max] + fdgs_max = fdgs_max_old + nb_modif = 1 + for idx, nb in enumerate(range(nb_vpairs_min, nb_vpairs_min - fdgs_max, -1)): + nb_modif *= nb / (fdgs_max - idx) + while fdgs_max < nb_vpairs_min and nb_modif < self.__l: + fdgs_max += 1 + nb_modif *= (nb_vpairs_min - fdgs_max + 1) / fdgs_max + nb_increase = int(fdgs_max - fdgs_max_old) + if nb_increase > 0: + fdgs_list += 1 + for ig, gs in enumerate(Gs_nearest + gihat_list): if self._verbose >= 2: @@ -303,6 +319,7 @@ class RandomPreimageGenerator(PreimageGenerator): # @todo: what if fdgs is bigger than nb_vpairs? idx_change = rdm_state.randint(0, high=nb_vpairs, size=(fdgs if fdgs < nb_vpairs else nb_vpairs)) +# print(idx_change) for item in idx_change: node1 = int(item / (nx.number_of_nodes(g_init) - 1)) node2 = (item - node1 * (nx.number_of_nodes(g_init) - 1)) From b4f4f384ded57b6f2ac7b192c5a9f534066e50f4 Mon Sep 17 00:00:00 2001 From: jajupmochi Date: Fri, 5 Jun 2020 14:39:26 +0200 Subject: [PATCH 06/17] Add time and iteration limits for RPG. --- gklearn/preimage/random_preimage_generator.py | 30 +++++++++++++++------------ 1 file changed, 17 insertions(+), 13 deletions(-) diff --git a/gklearn/preimage/random_preimage_generator.py b/gklearn/preimage/random_preimage_generator.py index d4d603c..5ac9353 100644 --- a/gklearn/preimage/random_preimage_generator.py +++ b/gklearn/preimage/random_preimage_generator.py @@ -31,8 +31,8 @@ class RandomPreimageGenerator(PreimageGenerator): self.__alphas = None # weights of linear combinations of points in kernel space. self.__parallel = True self.__n_jobs = multiprocessing.cpu_count() - self.__time_limit_in_sec = 0 # @todo - self.__max_itrs = 100 # @todo + self.__time_limit_in_sec = 0 + self.__max_itrs = 20 # values to compute. self.__runtime_generate_preimage = None self.__runtime_total = None @@ -59,7 +59,7 @@ class RandomPreimageGenerator(PreimageGenerator): self.__parallel = kwargs.get('parallel', True) self.__n_jobs = kwargs.get('n_jobs', multiprocessing.cpu_count()) self.__time_limit_in_sec = kwargs.get('time_limit_in_sec', 0) - self.__max_itrs = kwargs.get('max_itrs', 100) + self.__max_itrs = kwargs.get('max_itrs', 20) self.__gram_matrix_unnorm = kwargs.get('gram_matrix_unnorm', None) self.__runtime_precompute_gm = kwargs.get('runtime_precompute_gm', None) @@ -147,7 +147,10 @@ class RandomPreimageGenerator(PreimageGenerator): dis_of_each_itr = [dhat] if self.__parallel: self._kernel_options['parallel'] = None - while r < self.__r_max: + self.__itrs = 0 + self.__num_updates = 0 + timer = Timer(self.__time_limit_in_sec) + while not self.__termination_criterion_met(timer, self.__itrs, r): print('\n- r =', r) found = False dis_bests = dis_gs + dihat_list @@ -238,14 +241,14 @@ class RandomPreimageGenerator(PreimageGenerator): # get the better graph preimage. if dnew <= dhat: # @todo: the new distance is smaller or also equal? - if dnew < dhat: + if dhat - dnew > 1e-6: if self._verbose >= 2: print('trial =', str(trial)) print('\nI am smaller!') print('index (as in D_k U {gihat} =', str(ig)) print('distance:', dhat, '->', dnew) updated = True - elif dnew == dhat: + else: if self._verbose >= 2: print('I am equal!') dhat = dnew @@ -286,13 +289,13 @@ class RandomPreimageGenerator(PreimageGenerator): idx_min = np.argmin(dnew_list) dnew = dnew_list[idx_min] if dnew <= dhat: # @todo: the new distance is smaller or also equal? - if dnew < dhat: + if dhat - dnew > 1e-6: # @todo: use a proportion and watch out for 0. if self._verbose >= 2: - print('\nI am smaller!') - print('index (as in D_k U {gihat} =', str(ig)) - print('distance:', dhat, '->', dnew) + print('I am smaller!') + print('index (as in D_k U {gihat}) =', str(ig)) + print('distance:', dhat, '->', dnew, '\n') self.__num_updates += 1 - elif dnew == dhat: + else: if self._verbose >= 2: print('I am equal!') dhat = dnew @@ -355,12 +358,13 @@ class RandomPreimageGenerator(PreimageGenerator): return results - def __termination_criterion_met(self, converged, timer, itr, itrs_without_update): + def __termination_criterion_met(self, timer, itr, r): if timer.expired() or (itr >= self.__max_itrs if self.__max_itrs >= 0 else False): # if self.__state == AlgorithmState.TERMINATED: # self.__state = AlgorithmState.INITIALIZED return True - return converged or (itrs_without_update > self.__max_itrs_without_update if self.__max_itrs_without_update >= 0 else False) + return (r >= self.__r_max if self.__r_max >= 0 else False) +# return converged or (itrs_without_update > self.__max_itrs_without_update if self.__max_itrs_without_update >= 0 else False) @property From e66846f2a86214e2a8c752e3134b6821854455f9 Mon Sep 17 00:00:00 2001 From: jajupmochi Date: Fri, 5 Jun 2020 14:40:51 +0200 Subject: [PATCH 07/17] Implement Marginalized kernel class. --- gklearn/kernels/__init__.py | 3 +- gklearn/kernels/marginalized.py | 338 ++++++++++++++++++++++++++++++++++++++++ gklearn/kernels/treelet.py | 2 +- gklearn/utils/utils.py | 8 +- 4 files changed, 348 insertions(+), 3 deletions(-) create mode 100644 gklearn/kernels/marginalized.py diff --git a/gklearn/kernels/__init__.py b/gklearn/kernels/__init__.py index bcb7b06..e642043 100644 --- a/gklearn/kernels/__init__.py +++ b/gklearn/kernels/__init__.py @@ -8,8 +8,9 @@ __author__ = "Linlin Jia" __date__ = "November 2018" from gklearn.kernels.graph_kernel import GraphKernel -from gklearn.kernels.structural_sp import StructuralSP +from gklearn.kernels.marginalized import Marginalized from gklearn.kernels.shortest_path import ShortestPath +from gklearn.kernels.structural_sp import StructuralSP from gklearn.kernels.path_up_to_h import PathUpToH from gklearn.kernels.treelet import Treelet from gklearn.kernels.weisfeiler_lehman import WeisfeilerLehman, WLSubtree diff --git a/gklearn/kernels/marginalized.py b/gklearn/kernels/marginalized.py new file mode 100644 index 0000000..6ddec43 --- /dev/null +++ b/gklearn/kernels/marginalized.py @@ -0,0 +1,338 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +""" +Created on Wed Jun 3 22:22:57 2020 + +@author: ljia + +@references: + + [1] H. Kashima, K. Tsuda, and A. Inokuchi. Marginalized kernels between + labeled graphs. In Proceedings of the 20th International Conference on + Machine Learning, Washington, DC, United States, 2003. + + [2] Pierre Mahé, Nobuhisa Ueda, Tatsuya Akutsu, Jean-Luc Perret, and + Jean-Philippe Vert. Extensions of marginalized graph kernels. In + Proceedings of the twenty-first international conference on Machine + learning, page 70. ACM, 2004. +""" + +import sys +from multiprocessing import Pool +from tqdm import tqdm +import numpy as np +import networkx as nx +from gklearn.utils import SpecialLabel +from gklearn.utils.kernels import deltakernel +from gklearn.utils.parallel import parallel_gm, parallel_me +from gklearn.utils.utils import untotterTransformation +from gklearn.kernels import GraphKernel + + +class Marginalized(GraphKernel): + + def __init__(self, **kwargs): + GraphKernel.__init__(self) + self.__node_labels = kwargs.get('node_labels', []) + self.__edge_labels = kwargs.get('edge_labels', []) + self.__p_quit = kwargs.get('p_quit', 0.5) + self.__n_iteration = kwargs.get('n_iteration', 10) + self.__remove_totters = kwargs.get('remove_totters', False) + self.__ds_infos = kwargs.get('ds_infos', {}) + self.__n_iteration = int(self.__n_iteration) + + + def _compute_gm_series(self): + self.__add_dummy_labels(self._graphs) + + if self.__remove_totters: + if self._verbose >= 2: + iterator = tqdm(self._graphs, desc='removing tottering', file=sys.stdout) + else: + iterator = self._graphs + # @todo: this may not work. + self._graphs = [untotterTransformation(G, self.__node_label, self.__edge_label) for G in iterator] + + # compute Gram matrix. + gram_matrix = np.zeros((len(self._graphs), len(self._graphs))) + + from itertools import combinations_with_replacement + itr = combinations_with_replacement(range(0, len(self._graphs)), 2) + if self._verbose >= 2: + iterator = tqdm(itr, desc='calculating kernels', file=sys.stdout) + else: + iterator = itr + for i, j in iterator: + kernel = self.__kernel_do(self._graphs[i], self._graphs[j]) + gram_matrix[i][j] = kernel + gram_matrix[j][i] = kernel # @todo: no directed graph considered? + + return gram_matrix + + + def _compute_gm_imap_unordered(self): + self.__add_dummy_labels(self._graphs) + + if self.__remove_totters: + pool = Pool(self._n_jobs) + itr = range(0, len(self._graphs)) + if len(self._graphs) < 100 * self._n_jobs: + chunksize = int(len(self._graphs) / self._n_jobs) + 1 + else: + chunksize = 100 + remove_fun = self._wrapper_untotter + if self._verbose >= 2: + iterator = tqdm(pool.imap_unordered(remove_fun, itr, chunksize), + desc='removing tottering', file=sys.stdout) + else: + iterator = pool.imap_unordered(remove_fun, itr, chunksize) + for i, g in iterator: + self._graphs[i] = g + pool.close() + pool.join() + + # compute Gram matrix. + gram_matrix = np.zeros((len(self._graphs), len(self._graphs))) + + def init_worker(gn_toshare): + global G_gn + G_gn = gn_toshare + do_fun = self._wrapper_kernel_do + parallel_gm(do_fun, gram_matrix, self._graphs, init_worker=init_worker, + glbv=(self._graphs,), n_jobs=self._n_jobs, verbose=self._verbose) + + return gram_matrix + + + def _compute_kernel_list_series(self, g1, g_list): + self.__add_dummy_labels(g_list + [g1]) + + if self.__remove_totters: + g1 = untotterTransformation(g1, self.__node_label, self.__edge_label) # @todo: this may not work. + if self._verbose >= 2: + iterator = tqdm(g_list, desc='removing tottering', file=sys.stdout) + else: + iterator = g_list + # @todo: this may not work. + g_list = [untotterTransformation(G, self.__node_label, self.__edge_label) for G in iterator] + + # compute kernel list. + kernel_list = [None] * len(g_list) + if self._verbose >= 2: + iterator = tqdm(range(len(g_list)), desc='calculating kernels', file=sys.stdout) + else: + iterator = range(len(g_list)) + for i in iterator: + kernel = self.__kernel_do(g1, g_list[i]) + kernel_list[i] = kernel + + return kernel_list + + + def _compute_kernel_list_imap_unordered(self, g1, g_list): + self.__add_dummy_labels(g_list + [g1]) + + if self.__remove_totters: + g1 = untotterTransformation(g1, self.__node_label, self.__edge_label) # @todo: this may not work. + pool = Pool(self._n_jobs) + itr = range(0, len(g_list)) + if len(g_list) < 100 * self._n_jobs: + chunksize = int(len(g_list) / self._n_jobs) + 1 + else: + chunksize = 100 + remove_fun = self._wrapper_untotter + if self._verbose >= 2: + iterator = tqdm(pool.imap_unordered(remove_fun, itr, chunksize), + desc='removing tottering', file=sys.stdout) + else: + iterator = pool.imap_unordered(remove_fun, itr, chunksize) + for i, g in iterator: + g_list[i] = g + pool.close() + pool.join() + + # compute kernel list. + kernel_list = [None] * len(g_list) + + def init_worker(g1_toshare, g_list_toshare): + global G_g1, G_g_list + G_g1 = g1_toshare + G_g_list = g_list_toshare + do_fun = self._wrapper_kernel_list_do + def func_assign(result, var_to_assign): + var_to_assign[result[0]] = result[1] + itr = range(len(g_list)) + len_itr = len(g_list) + parallel_me(do_fun, func_assign, kernel_list, itr, len_itr=len_itr, + init_worker=init_worker, glbv=(g1, g_list), method='imap_unordered', + n_jobs=self._n_jobs, itr_desc='calculating kernels', verbose=self._verbose) + + return kernel_list + + + def _wrapper_kernel_list_do(self, itr): + return itr, self.__kernel_do(G_g1, G_g_list[itr]) + + + def _compute_single_kernel_series(self, g1, g2): + self.__add_dummy_labels([g1] + [g2]) + if self.__remove_totters: + g1 = untotterTransformation(g1, self.__node_label, self.__edge_label) # @todo: this may not work. + g2 = untotterTransformation(g2, self.__node_label, self.__edge_label) + kernel = self.__kernel_do(g1, g2) + return kernel + + + def __kernel_do(self, g1, g2): + """Calculate marginalized graph kernel between 2 graphs. + + Parameters + ---------- + g1, g2 : NetworkX graphs + 2 graphs between which the kernel is calculated. + + Return + ------ + kernel : float + Marginalized kernel between 2 graphs. + """ + # init parameters + kernel = 0 + num_nodes_G1 = nx.number_of_nodes(g1) + num_nodes_G2 = nx.number_of_nodes(g2) + # the initial probability distribution in the random walks generating step + # (uniform distribution over |G|) + p_init_G1 = 1 / num_nodes_G1 + p_init_G2 = 1 / num_nodes_G2 + + q = self.__p_quit * self.__p_quit + r1 = q + + # # initial R_inf + # # matrix to save all the R_inf for all pairs of nodes + # R_inf = np.zeros([num_nodes_G1, num_nodes_G2]) + # + # # calculate R_inf with a simple interative method + # for i in range(1, n_iteration): + # R_inf_new = np.zeros([num_nodes_G1, num_nodes_G2]) + # R_inf_new.fill(r1) + # + # # calculate R_inf for each pair of nodes + # for node1 in g1.nodes(data=True): + # neighbor_n1 = g1[node1[0]] + # # the transition probability distribution in the random walks + # # generating step (uniform distribution over the vertices adjacent + # # to the current vertex) + # if len(neighbor_n1) > 0: + # p_trans_n1 = (1 - p_quit) / len(neighbor_n1) + # for node2 in g2.nodes(data=True): + # neighbor_n2 = g2[node2[0]] + # if len(neighbor_n2) > 0: + # p_trans_n2 = (1 - p_quit) / len(neighbor_n2) + # + # for neighbor1 in neighbor_n1: + # for neighbor2 in neighbor_n2: + # t = p_trans_n1 * p_trans_n2 * \ + # deltakernel(g1.node[neighbor1][node_label], + # g2.node[neighbor2][node_label]) * \ + # deltakernel( + # neighbor_n1[neighbor1][edge_label], + # neighbor_n2[neighbor2][edge_label]) + # + # R_inf_new[node1[0]][node2[0]] += t * R_inf[neighbor1][ + # neighbor2] # ref [1] equation (8) + # R_inf[:] = R_inf_new + # + # # add elements of R_inf up and calculate kernel + # for node1 in g1.nodes(data=True): + # for node2 in g2.nodes(data=True): + # s = p_init_G1 * p_init_G2 * deltakernel( + # node1[1][node_label], node2[1][node_label]) + # kernel += s * R_inf[node1[0]][node2[0]] # ref [1] equation (6) + + + R_inf = {} # dict to save all the R_inf for all pairs of nodes + # initial R_inf, the 1st iteration. + for node1 in g1.nodes(): + for node2 in g2.nodes(): + # R_inf[(node1[0], node2[0])] = r1 + if len(g1[node1]) > 0: + if len(g2[node2]) > 0: + R_inf[(node1, node2)] = r1 + else: + R_inf[(node1, node2)] = self.__p_quit + else: + if len(g2[node2]) > 0: + R_inf[(node1, node2)] = self.__p_quit + else: + R_inf[(node1, node2)] = 1 + + # compute all transition probability first. + t_dict = {} + if self.__n_iteration > 1: + for node1 in g1.nodes(): + neighbor_n1 = g1[node1] + # the transition probability distribution in the random walks + # generating step (uniform distribution over the vertices adjacent + # to the current vertex) + if len(neighbor_n1) > 0: + p_trans_n1 = (1 - self.__p_quit) / len(neighbor_n1) + for node2 in g2.nodes(): + neighbor_n2 = g2[node2] + if len(neighbor_n2) > 0: + p_trans_n2 = (1 - self.__p_quit) / len(neighbor_n2) + for neighbor1 in neighbor_n1: + for neighbor2 in neighbor_n2: + t_dict[(node1, node2, neighbor1, neighbor2)] = \ + p_trans_n1 * p_trans_n2 * \ + deltakernel(tuple(g1.nodes[neighbor1][nl] for nl in self.__node_labels), tuple(g2.nodes[neighbor2][nl] for nl in self.__node_labels)) * \ + deltakernel(tuple(neighbor_n1[neighbor1][el] for el in self.__edge_labels), tuple(neighbor_n2[neighbor2][el] for el in self.__edge_labels)) + + # calculate R_inf with a simple interative method + for i in range(2, self.__n_iteration + 1): + R_inf_old = R_inf.copy() + + # calculate R_inf for each pair of nodes + for node1 in g1.nodes(): + neighbor_n1 = g1[node1] + # the transition probability distribution in the random walks + # generating step (uniform distribution over the vertices adjacent + # to the current vertex) + if len(neighbor_n1) > 0: + for node2 in g2.nodes(): + neighbor_n2 = g2[node2] + if len(neighbor_n2) > 0: + R_inf[(node1, node2)] = r1 + for neighbor1 in neighbor_n1: + for neighbor2 in neighbor_n2: + R_inf[(node1, node2)] += \ + (t_dict[(node1, node2, neighbor1, neighbor2)] * \ + R_inf_old[(neighbor1, neighbor2)]) # ref [1] equation (8) + + # add elements of R_inf up and calculate kernel + for (n1, n2), value in R_inf.items(): + s = p_init_G1 * p_init_G2 * deltakernel(tuple(g1.nodes[n1][nl] for nl in self.__node_labels), tuple(g2.nodes[n2][nl] for nl in self.__node_labels)) + kernel += s * value # ref [1] equation (6) + + return kernel + + + def _wrapper_kernel_do(self, itr): + i = itr[0] + j = itr[1] + return i, j, self.__kernel_do(G_gn[i], G_gn[j]) + + + def _wrapper_untotter(self, i): + return i, untotterTransformation(self._graphs[i], self.__node_label, self.__edge_label) # @todo: this may not work. + + + def __add_dummy_labels(self, Gn): + if len(self.__node_labels) == 0 or (len(self.__node_labels) == 1 and self.__node_labels[0] == SpecialLabel.DUMMY): + for i in range(len(Gn)): + nx.set_node_attributes(Gn[i], '0', SpecialLabel.DUMMY) + self.__node_labels = [SpecialLabel.DUMMY] + if len(self.__edge_labels) == 0 or (len(self.__edge_labels) == 1 and self.__edge_labels[0] == SpecialLabel.DUMMY): + for i in range(len(Gn)): + nx.set_edge_attributes(Gn[i], '0', SpecialLabel.DUMMY) + self.__edge_labels = [SpecialLabel.DUMMY] \ No newline at end of file diff --git a/gklearn/kernels/treelet.py b/gklearn/kernels/treelet.py index e05ee0c..c3204ec 100644 --- a/gklearn/kernels/treelet.py +++ b/gklearn/kernels/treelet.py @@ -195,7 +195,7 @@ class Treelet(GraphKernel): Return ------ kernel : float - Treelet Kernel between 2 graphs. + Treelet kernel between 2 graphs. """ keys = set(canonkey1.keys()) & set(canonkey2.keys()) # find same canonical keys in both graphs vector1 = np.array([(canonkey1[key] if (key in canonkey1.keys()) else 0) for key in keys]) diff --git a/gklearn/utils/utils.py b/gklearn/utils/utils.py index 868f0f6..faa4ae6 100644 --- a/gklearn/utils/utils.py +++ b/gklearn/utils/utils.py @@ -300,7 +300,13 @@ def get_edge_labels(Gn, edge_label): def get_graph_kernel_by_name(name, node_labels=None, edge_labels=None, node_attrs=None, edge_attrs=None, ds_infos=None, kernel_options={}): - if name == 'ShortestPath': + if name == 'Marginalized': + from gklearn.kernels import Marginalized + graph_kernel = Marginalized(node_labels=node_labels, + edge_labels=edge_labels, + ds_infos=ds_infos, + **kernel_options) + elif name == 'ShortestPath': from gklearn.kernels import ShortestPath graph_kernel = ShortestPath(node_labels=node_labels, node_attrs=node_attrs, From 924f028c0c1c65587c0053285f2123e3a28631dd Mon Sep 17 00:00:00 2001 From: jajupmochi Date: Fri, 5 Jun 2020 14:41:49 +0200 Subject: [PATCH 08/17] Update the RPG experiment. --- .../experiments/xp_random_preimage_generation.py | 191 ++++++++++++++++++--- 1 file changed, 163 insertions(+), 28 deletions(-) diff --git a/gklearn/preimage/experiments/xp_random_preimage_generation.py b/gklearn/preimage/experiments/xp_random_preimage_generation.py index 51f7a7b..3bffc00 100644 --- a/gklearn/preimage/experiments/xp_random_preimage_generation.py +++ b/gklearn/preimage/experiments/xp_random_preimage_generation.py @@ -9,6 +9,7 @@ import multiprocessing import numpy as np import networkx as nx import os +from gklearn.utils.graphfiles import saveGXL from gklearn.preimage import RandomPreimageGenerator from gklearn.utils import Dataset @@ -16,7 +17,7 @@ from gklearn.utils import Dataset dir_root = '../results/xp_random_preimage_generation/' -def xp_random_preimage_generation(): +def xp_random_preimage_generation(kernel_name): """ Experiment similar to the one in Bakir's paper. A test to check if RandomPreimageGenerator class works correctly. @@ -40,15 +41,26 @@ def xp_random_preimage_generation(): 'alphas': None, 'parallel': True, 'verbose': 2} - kernel_options = {'name': 'PathUpToH', - 'depth': 2, # - 'k_func': 'MinMax', # - 'compute_method': 'trie', - 'parallel': 'imap_unordered', - # 'parallel': None, - 'n_jobs': multiprocessing.cpu_count(), - 'normalize': True, - 'verbose': 0} + if kernel_name == 'PathUpToH': + kernel_options = {'name': 'PathUpToH', + 'depth': 2, # + 'k_func': 'MinMax', # + 'compute_method': 'trie', + 'parallel': 'imap_unordered', + # 'parallel': None, + 'n_jobs': multiprocessing.cpu_count(), + 'normalize': True, + 'verbose': 0} + elif kernel_name == 'Marginalized': + kernel_options = {'name': 'Marginalized', + 'p_quit': 0.8, # + 'n_iteration': 7, # + 'remove_totters': False, + 'parallel': 'imap_unordered', + # 'parallel': None, + 'n_jobs': multiprocessing.cpu_count(), + 'normalize': True, + 'verbose': 0} edge_required = True irrelevant_labels = {'edge_labels': ['label_0']} cut_range = None @@ -74,25 +86,30 @@ def xp_random_preimage_generation(): if cut_range is not None: dataset_all.cut_graphs(cut_range) - # add two "random" graphs. - g1 = nx.Graph() - g1.add_nodes_from(range(0, 16), label_0='0') - g1.add_nodes_from(range(16, 25), label_0='1') - g1.add_node(25, label_0='2') - g1.add_nodes_from([26, 27], label_0='3') - g1.add_edges_from([(0, 1), (1, 2), (2, 3), (3, 4), (4, 5), (5, 6), (6, 7), (7, 8), (8, 9), (9, 10), (10, 11), (11, 12), (5, 0), (4, 9), (12, 3), (10, 13), (13, 14), (14, 15), (15, 8), (0, 16), (1, 17), (2, 18), (12, 19), (11, 20), (13, 21), (15, 22), (7, 23), (6, 24), (14, 25), (25, 26), (25, 27)]) - g2 = nx.Graph() - g2.add_nodes_from(range(0, 12), label_0='0') - g2.add_nodes_from(range(12, 19), label_0='1') - g2.add_nodes_from([19, 20, 21], label_0='2') - g2.add_nodes_from([22, 23], label_0='3') - g2.add_edges_from([(0, 1), (1, 2), (2, 3), (3, 4), (4, 5), (5, 6), (6, 19), (19, 7), (7, 8), (8, 9), (9, 10), (10, 11), (11, 20), (20, 7), (5, 0), (4, 8), (0, 12), (1, 13), (2, 14), (9, 15), (10, 16), (11, 17), (6, 18), (3, 21), (21, 22), (21, 23)]) - dataset_all.load_graphs([g1, g2] + dataset_all.graphs, targets=None) +# # add two "random" graphs. +# g1 = nx.Graph() +# g1.add_nodes_from(range(0, 16), label_0='0') +# g1.add_nodes_from(range(16, 25), label_0='1') +# g1.add_node(25, label_0='2') +# g1.add_nodes_from([26, 27], label_0='3') +# g1.add_edges_from([(0, 1), (1, 2), (2, 3), (3, 4), (4, 5), (5, 6), (6, 7), (7, 8), (8, 9), (9, 10), (10, 11), (11, 12), (5, 0), (4, 9), (12, 3), (10, 13), (13, 14), (14, 15), (15, 8), (0, 16), (1, 17), (2, 18), (12, 19), (11, 20), (13, 21), (15, 22), (7, 23), (6, 24), (14, 25), (25, 26), (25, 27)]) +# g2 = nx.Graph() +# g2.add_nodes_from(range(0, 12), label_0='0') +# g2.add_nodes_from(range(12, 19), label_0='1') +# g2.add_nodes_from([19, 20, 21], label_0='2') +# g2.add_nodes_from([22, 23], label_0='3') +# g2.add_edges_from([(0, 1), (1, 2), (2, 3), (3, 4), (4, 5), (5, 6), (6, 19), (19, 7), (7, 8), (8, 9), (9, 10), (10, 11), (11, 20), (20, 7), (5, 0), (4, 8), (0, 12), (1, 13), (2, 14), (9, 15), (10, 16), (11, 17), (6, 18), (3, 21), (21, 22), (21, 23)]) +# dataset_all.load_graphs([g1, g2] + dataset_all.graphs, targets=None) # 2. initialize rpg and setting parameters. print('2. initializing rpg and setting parameters...') - nb_graphs = len(dataset_all.graphs) - 2 - rpg_options['alphas'] = [alpha1, 1 - alpha1] + [0] * nb_graphs +# nb_graphs = len(dataset_all.graphs) - 2 +# rpg_options['alphas'] = [alpha1, 1 - alpha1] + [0] * nb_graphs + nb_graphs = len(dataset_all.graphs) + alphas = [0] * nb_graphs + alphas[1] = alpha1 + alphas[6] = 1 - alpha1 + rpg_options['alphas'] = alphas if gmfile_exist: rpg_options['gram_matrix_unnorm'] = gram_matrix_unnorm rpg_options['runtime_precompute_gm'] = time_precompute_gm @@ -114,11 +131,129 @@ def xp_random_preimage_generation(): # write Gram matrices to file. if not gmfile_exist: np.savez(dir_save + 'gram_matrix_unnorm.' + ds_name + '.' + kernel_options['name'] + '.gm', gram_matrix_unnorm=rpg.gram_matrix_unnorm, run_time=results['runtime_precompute_gm']) - + + # save graphs. + fn_best_dataset = dir_save + 'g_best_dataset.' + 'alpha1_' + str(alpha1)[0:3] + saveGXL(rpg.best_from_dataset, fn_best_dataset + '.gxl', method='default', + node_labels=dataset_all.node_labels, edge_labels=dataset_all.edge_labels, + node_attrs=dataset_all.node_attrs, edge_attrs=dataset_all.edge_attrs) + fn_preimage = dir_save + 'g_preimage.' + 'alpha1_' + str(alpha1)[0:3] + saveGXL(rpg.preimage, fn_preimage + '.gxl', method='default', + node_labels=dataset_all.node_labels, edge_labels=dataset_all.edge_labels, + node_attrs=dataset_all.node_attrs, edge_attrs=dataset_all.edge_attrs) + + # draw graphs. + __draw_graph(rpg.best_from_dataset, fn_best_dataset) + __draw_graph(rpg.preimage, fn_preimage) + + # plot results figure. + __plot_results(alpha1_list, k_dis_datasets, k_dis_preimages, dir_save) + print('\ncomplete.\n') return k_dis_datasets, k_dis_preimages, bests_from_dataset, preimages +def __draw_graph(graph, file_prefix): +# import matplotlib +# matplotlib.use('agg') + import matplotlib.pyplot as plt + plt.figure() + pos = nx.spring_layout(graph) + nx.draw(graph, pos, node_size=500, labels=nx.get_node_attributes(graph, 'label_0'), font_color='w', width=3, with_labels=True) + plt.savefig(file_prefix + '.eps', format='eps', dpi=300) +# plt.show() + plt.clf() + plt.close() + + +def __plot_results(alpha1_list, k_dis_datasets, k_dis_preimages, dir_save): + import matplotlib.pyplot as plt + fig, ax = plt.subplots(1, 1, figsize=(7, 4.5)) + + ind = np.arange(len(alpha1_list)) # the x locations for the groups + width = 0.35 # the width of the bars: can also be len(x) sequence + + p1 = ax.bar(ind, k_dis_preimages, width, label='Reconstructed pre-image', zorder=3, color='#133AAC') + + ax.set_xlabel(r'$\alpha \in [0,1]$') + ax.set_ylabel(r'$d(g_i,g^\star(\alpha))$') + #ax.set_title('Runtime of the shortest path kernel on all datasets') + plt.xticks(ind, [str(i)[0:3] for i in alpha1_list]) + #ax.set_yticks(np.logspace(-16, -3, num=20, base=10)) + #ax.set_ylim(bottom=1e-15) + ax.grid(axis='y', zorder=0) + ax.spines['top'].set_visible(False) + ax.spines['bottom'].set_visible(False) + ax.spines['left'].set_visible(False) + ax.spines['right'].set_visible(False) + ax.xaxis.set_ticks_position('none') + + p2 = ax.plot(ind, k_dis_datasets, 'b.-', label=r'Nearest neighbor in $D_N$', color='orange', zorder=4) + ax.yaxis.set_ticks_position('none') + + fig.subplots_adjust(bottom=.2) + fig.legend(loc='lower center', ncol=2, frameon=False) # , ncol=5, labelspacing=0.1, handletextpad=0.4, columnspacing=0.6) + + plt.savefig(dir_save + 'distances in kernel space.eps', format='eps', dpi=300, + transparent=True, bbox_inches='tight') + plt.show() + plt.clf() + plt.close() + + if __name__ == '__main__': - k_dis_datasets, k_dis_preimages, bests_from_dataset, preimages = xp_random_preimage_generation() \ No newline at end of file +# kernel_name = 'PathUpToH' + kernel_name = 'Marginalized' + k_dis_datasets, k_dis_preimages, bests_from_dataset, preimages = xp_random_preimage_generation(kernel_name) + +# # save graphs. +# dir_save = dir_root + 'MUTAG.PathUpToH/' +# for i, alpha1 in enumerate(np.linspace(0, 1, 11)): +# fn_best_dataset = dir_save + 'g_best_dataset.' + 'alpha1_' + str(alpha1)[0:3] +# saveGXL(bests_from_dataset[i], fn_best_dataset + '.gxl', method='default', +# node_labels=['label_0'], edge_labels=[], +# node_attrs=[], edge_attrs=[]) +# fn_preimage = dir_save + 'g_preimage.' + 'alpha1_' + str(alpha1)[0:3] +# saveGXL(preimages[i], fn_preimage + '.gxl', method='default', +# node_labels=['label_0'], edge_labels=[], +# node_attrs=[], edge_attrs=[]) + +# # draw graphs. +# dir_save = dir_root + 'MUTAG.PathUpToH/' +# for i, alpha1 in enumerate(np.linspace(0, 1, 11)): +# fn_best_dataset = dir_save + 'g_best_dataset.' + 'alpha1_' + str(alpha1)[0:3] +# __draw_graph(bests_from_dataset[i], fn_best_dataset) +# fn_preimage = dir_save + 'g_preimage.' + 'alpha1_' + str(alpha1)[0:3] +# __draw_graph(preimages[i], fn_preimage) + +# # plot results figure. +# alpha1_list = np.linspace(0, 1, 11) +# dir_save = dir_root + 'MUTAG.PathUpToH/' +# __plot_results(alpha1_list, k_dis_datasets, k_dis_preimages, dir_save) + + + +# k_dis_datasets = [0.0, +# 0.08882515554098754, +# 0.17765031108197632, +# 0.2664754666229643, +# 0.35530062216395264, +# 0.44412577770494066, +# 0.35530062216395236, +# 0.2664754666229643, +# 0.17765031108197632, +# 0.08882515554098878, +# 0.0] + +# k_dis_preimages = [0.0, +# 0.08882515554098754, +# 0.17765031108197632, +# 0.2664754666229643, +# 0.35530062216395264, +# 0.44412577770494066, +# 0.35530062216395236, +# 0.2664754666229643, +# 0.17765031108197632, +# 0.08882515554098878, +# 0.0] \ No newline at end of file From e872ffe3b56205017dfabbf86d6c0b8b850c20e0 Mon Sep 17 00:00:00 2001 From: jajupmochi Date: Sun, 7 Jun 2020 10:25:36 +0200 Subject: [PATCH 09/17] Update random preimage experiment. --- gklearn/preimage/experiments/xp_random_preimage_generation.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/gklearn/preimage/experiments/xp_random_preimage_generation.py b/gklearn/preimage/experiments/xp_random_preimage_generation.py index 3bffc00..fc328ca 100644 --- a/gklearn/preimage/experiments/xp_random_preimage_generation.py +++ b/gklearn/preimage/experiments/xp_random_preimage_generation.py @@ -146,9 +146,12 @@ def xp_random_preimage_generation(kernel_name): __draw_graph(rpg.best_from_dataset, fn_best_dataset) __draw_graph(rpg.preimage, fn_preimage) + # save distances. + np.savez(dir_save + 'distances.' + ds_name + '.' + kernel_options['name'], k_dis_datasets=k_dis_datasets, k_dis_preimages=k_dis_preimages) + # plot results figure. __plot_results(alpha1_list, k_dis_datasets, k_dis_preimages, dir_save) - + print('\ncomplete.\n') return k_dis_datasets, k_dis_preimages, bests_from_dataset, preimages @@ -174,7 +177,7 @@ def __plot_results(alpha1_list, k_dis_datasets, k_dis_preimages, dir_save): ind = np.arange(len(alpha1_list)) # the x locations for the groups width = 0.35 # the width of the bars: can also be len(x) sequence - p1 = ax.bar(ind, k_dis_preimages, width, label='Reconstructed pre-image', zorder=3, color='#133AAC') + ax.bar(ind, k_dis_preimages, width, label='Reconstructed pre-image', zorder=3, color='#133AAC') ax.set_xlabel(r'$\alpha \in [0,1]$') ax.set_ylabel(r'$d(g_i,g^\star(\alpha))$') @@ -189,7 +192,7 @@ def __plot_results(alpha1_list, k_dis_datasets, k_dis_preimages, dir_save): ax.spines['right'].set_visible(False) ax.xaxis.set_ticks_position('none') - p2 = ax.plot(ind, k_dis_datasets, 'b.-', label=r'Nearest neighbor in $D_N$', color='orange', zorder=4) + ax.plot(ind, k_dis_datasets, 'b.-', label=r'Nearest neighbor in $D_N$', color='orange', zorder=4) ax.yaxis.set_ticks_position('none') fig.subplots_adjust(bottom=.2) From ea615e8c6899d1101a4deede1dfdf761011164cc Mon Sep 17 00:00:00 2001 From: jajupmochi Date: Sun, 7 Jun 2020 10:58:03 +0200 Subject: [PATCH 10/17] Add more possibilities when using random costs in MPG. --- gklearn/preimage/median_preimage_generator.py | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/gklearn/preimage/median_preimage_generator.py b/gklearn/preimage/median_preimage_generator.py index 6d3a45f..0449da4 100644 --- a/gklearn/preimage/median_preimage_generator.py +++ b/gklearn/preimage/median_preimage_generator.py @@ -191,20 +191,22 @@ class MedianPreimageGenerator(PreimageGenerator): """ if self.__fit_method == 'random': # random if self.__ged_options['edit_cost'] == 'LETTER': - self.__edit_cost_constants = random.sample(range(1, 10), 3) - self.__edit_cost_constants = [item * 0.1 for item in self.__edit_cost_constants] + self.__edit_cost_constants = random.sample(range(1, 1000), 3) + self.__edit_cost_constants = [item * 0.001 for item in self.__edit_cost_constants] elif self.__ged_options['edit_cost'] == 'LETTER2': random.seed(time.time()) - self.__edit_cost_constants = random.sample(range(1, 10), 5) - # self.__edit_cost_constants = [item * 0.1 for item in self.__edit_cost_constants] + self.__edit_cost_constants = random.sample(range(1, 1000), 5) + self.__edit_cost_constants = [item * 0.01 for item in self.__edit_cost_constants] elif self.__ged_options['edit_cost'] == 'NON_SYMBOLIC': - self.__edit_cost_constants = random.sample(range(1, 10), 6) + self.__edit_cost_constants = random.sample(range(1, 1000), 6) + self.__edit_cost_constants = [item * 0.01 for item in self.__edit_cost_constants] if self._dataset.node_attrs == []: self.__edit_cost_constants[2] = 0 if self._dataset.edge_attrs == []: self.__edit_cost_constants[5] = 0 else: - self.__edit_cost_constants = random.sample(range(1, 10), 6) + self.__edit_cost_constants = random.sample(range(1, 1000), 6) + self.__edit_cost_constants = [item * 0.01 for item in self.__edit_cost_constants] if self._verbose >= 2: print('edit cost constants used:', self.__edit_cost_constants) elif self.__fit_method == 'expert': # expert From 4d47f61d0bf16de6d87dc05e4291b637f590050a Mon Sep 17 00:00:00 2001 From: jajupmochi Date: Sun, 7 Jun 2020 11:24:10 +0200 Subject: [PATCH 11/17] Fix bugs to use unnormalized Gram matrix in MPG. --- gklearn/preimage/median_preimage_generator.py | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/gklearn/preimage/median_preimage_generator.py b/gklearn/preimage/median_preimage_generator.py index 0449da4..7fe0626 100644 --- a/gklearn/preimage/median_preimage_generator.py +++ b/gklearn/preimage/median_preimage_generator.py @@ -908,10 +908,12 @@ class MedianPreimageGenerator(PreimageGenerator): # compute distance in kernel space for set median. kernels_to_sm, _ = self._graph_kernel.compute(self.__set_median, self._dataset.graphs, **self._kernel_options) kernel_sm, _ = self._graph_kernel.compute(self.__set_median, self.__set_median, **self._kernel_options) - kernels_to_sm = [kernels_to_sm[i] / np.sqrt(self.__gram_matrix_unnorm[i, i] * kernel_sm) for i in range(len(kernels_to_sm))] # normalize + if self._kernel_options['normalize']: + kernels_to_sm = [kernels_to_sm[i] / np.sqrt(self.__gram_matrix_unnorm[i, i] * kernel_sm) for i in range(len(kernels_to_sm))] # normalize + kernel_sm = 1 # @todo: not correct kernel value gram_with_sm = np.concatenate((np.array([kernels_to_sm]), np.copy(self._graph_kernel.gram_matrix)), axis=0) - gram_with_sm = np.concatenate((np.array([[1] + kernels_to_sm]).T, gram_with_sm), axis=1) + gram_with_sm = np.concatenate((np.array([[kernel_sm] + kernels_to_sm]).T, gram_with_sm), axis=1) self.__k_dis_set_median = compute_k_dis(0, range(1, 1+len(self._dataset.graphs)), [1 / len(self._dataset.graphs)] * len(self._dataset.graphs), gram_with_sm, withterm3=False) @@ -919,9 +921,11 @@ class MedianPreimageGenerator(PreimageGenerator): # compute distance in kernel space for generalized median. kernels_to_gm, _ = self._graph_kernel.compute(self.__gen_median, self._dataset.graphs, **self._kernel_options) kernel_gm, _ = self._graph_kernel.compute(self.__gen_median, self.__gen_median, **self._kernel_options) - kernels_to_gm = [kernels_to_gm[i] / np.sqrt(self.__gram_matrix_unnorm[i, i] * kernel_gm) for i in range(len(kernels_to_gm))] # normalize + if self._kernel_options['normalize']: + kernels_to_gm = [kernels_to_gm[i] / np.sqrt(self.__gram_matrix_unnorm[i, i] * kernel_gm) for i in range(len(kernels_to_gm))] # normalize + kernel_gm = 1 gram_with_gm = np.concatenate((np.array([kernels_to_gm]), np.copy(self._graph_kernel.gram_matrix)), axis=0) - gram_with_gm = np.concatenate((np.array([[1] + kernels_to_gm]).T, gram_with_gm), axis=1) + gram_with_gm = np.concatenate((np.array([[kernel_gm] + kernels_to_gm]).T, gram_with_gm), axis=1) self.__k_dis_gen_median = compute_k_dis(0, range(1, 1+len(self._dataset.graphs)), [1 / len(self._dataset.graphs)] * len(self._dataset.graphs), gram_with_gm, withterm3=False) From fcff223150115a00c8f0b2826f4c56cb425fd63b Mon Sep 17 00:00:00 2001 From: jajupmochi Date: Sun, 7 Jun 2020 11:38:20 +0200 Subject: [PATCH 12/17] Fix bugs to use unnormalized Gram matrix in RPG. --- gklearn/preimage/random_preimage_generator.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/gklearn/preimage/random_preimage_generator.py b/gklearn/preimage/random_preimage_generator.py index 5ac9353..bdf9fe6 100644 --- a/gklearn/preimage/random_preimage_generator.py +++ b/gklearn/preimage/random_preimage_generator.py @@ -20,6 +20,7 @@ from gklearn.utils import Timer from gklearn.utils.utils import get_graph_kernel_by_name # from gklearn.utils.dataset import Dataset + class RandomPreimageGenerator(PreimageGenerator): def __init__(self, dataset=None): @@ -337,10 +338,12 @@ class RandomPreimageGenerator(PreimageGenerator): # compute new distances. kernels_to_gtmp, _ = self._graph_kernel.compute(gtemp, self._dataset.graphs, **self._kernel_options) kernel_gtmp, _ = self._graph_kernel.compute(gtemp, gtemp, **self._kernel_options) - kernels_to_gtmp = [kernels_to_gtmp[i] / np.sqrt(self.__gram_matrix_unnorm[i, i] * kernel_gtmp) for i in range(len(kernels_to_gtmp))] # normalize + if self._kernel_options['normalize']: + kernels_to_gtmp = [kernels_to_gtmp[i] / np.sqrt(self.__gram_matrix_unnorm[i, i] * kernel_gtmp) for i in range(len(kernels_to_gtmp))] # normalize + kernel_gtmp = 1 # @todo: not correct kernel value gram_with_gtmp = np.concatenate((np.array([kernels_to_gtmp]), np.copy(self._graph_kernel.gram_matrix)), axis=0) - gram_with_gtmp = np.concatenate((np.array([[1] + kernels_to_gtmp]).T, gram_with_gtmp), axis=1) + gram_with_gtmp = np.concatenate((np.array([[kernel_gtmp] + kernels_to_gtmp]).T, gram_with_gtmp), axis=1) dnew = compute_k_dis(0, range(1, 1 + len(self._dataset.graphs)), self.__alphas, gram_with_gtmp, term3=term3, withterm3=True) return gtemp, dnew From 3eee30e19ea1631c0752bd6f5c724623eb5604f1 Mon Sep 17 00:00:00 2001 From: jajupmochi Date: Wed, 10 Jun 2020 12:25:12 +0200 Subject: [PATCH 13/17] Add examples. --- gklearn/examples/__init__.py | 0 .../examples/example_median_preimege_generator.py | 115 +++++++++++++++++++++ 2 files changed, 115 insertions(+) create mode 100644 gklearn/examples/__init__.py create mode 100644 gklearn/examples/example_median_preimege_generator.py diff --git a/gklearn/examples/__init__.py b/gklearn/examples/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/gklearn/examples/example_median_preimege_generator.py b/gklearn/examples/example_median_preimege_generator.py new file mode 100644 index 0000000..9afc7bd --- /dev/null +++ b/gklearn/examples/example_median_preimege_generator.py @@ -0,0 +1,115 @@ +# -*- coding: utf-8 -*- +"""example_median_preimege_generator.ipynb + +Automatically generated by Colaboratory. + +Original file is located at + https://colab.research.google.com/drive/1PIDvHOcmiLEQ5Np3bgBDdu0kLOquOMQK + +**This script demonstrates how to generate a graph preimage using Boria's method.** +--- +""" + +"""**1. Get dataset.**""" + +from gklearn.utils import Dataset, split_dataset_by_target + +# Predefined dataset name, use dataset "MAO". +ds_name = 'MAO' +# The node/edge labels that will not be used in the computation. +irrelevant_labels = {'node_attrs': ['x', 'y', 'z'], 'edge_labels': ['bond_stereo']} + +# Initialize a Dataset. +dataset_all = Dataset() +# Load predefined dataset "MAO". +dataset_all.load_predefined_dataset(ds_name) +# Remove irrelevant labels. +dataset_all.remove_labels(**irrelevant_labels) +# Split the whole dataset according to the classification targets. +datasets = split_dataset_by_target(dataset_all) +# Get the first class of graphs, whose median preimage will be computed. +dataset = datasets[0] +len(dataset.graphs) + +"""**2. Set parameters.**""" + +import multiprocessing + +# Parameters for MedianPreimageGenerator (our method). +mpg_options = {'fit_method': 'k-graphs', # how to fit edit costs. "k-graphs" means use all graphs in median set when fitting. + 'init_ecc': [4, 4, 2, 1, 1, 1], # initial edit costs. + 'ds_name': ds_name, # name of the dataset. + 'parallel': True, # whether the parallel scheme is to be used. + 'time_limit_in_sec': 0, # maximum time limit to compute the preimage. If set to 0 then no limit. + 'max_itrs': 100, # maximum iteration limit to optimize edit costs. If set to 0 then no limit. + 'max_itrs_without_update': 3, # If the times that edit costs is not update is more than this number, then the optimization stops. + 'epsilon_residual': 0.01, # In optimization, the residual is only considered changed if the change is bigger than this number. + 'epsilon_ec': 0.1, # In optimization, the edit costs are only considered changed if the changes are bigger than this number. + 'verbose': 2 # whether to print out results. + } +# Parameters for graph kernel computation. +kernel_options = {'name': 'PathUpToH', # use path kernel up to length h. + 'depth': 9, + 'k_func': 'MinMax', + 'compute_method': 'trie', + 'parallel': 'imap_unordered', # or None + 'n_jobs': multiprocessing.cpu_count(), + 'normalize': True, # whether to use normalized Gram matrix to optimize edit costs. + 'verbose': 2 # whether to print out results. + } +# Parameters for GED computation. +ged_options = {'method': 'IPFP', # use IPFP huristic. + 'initialization_method': 'RANDOM', # or 'NODE', etc. + 'initial_solutions': 10, # when bigger than 1, then the method is considered mIPFP. + 'edit_cost': 'CONSTANT', # use CONSTANT cost. + 'attr_distance': 'euclidean', # the distance between non-symbolic node/edge labels is computed by euclidean distance. + 'ratio_runs_from_initial_solutions': 1, + 'threads': multiprocessing.cpu_count(), # parallel threads. Do not work if mpg_options['parallel'] = False. + 'init_option': 'EAGER_WITHOUT_SHUFFLED_COPIES' + } +# Parameters for MedianGraphEstimator (Boria's method). +mge_options = {'init_type': 'MEDOID', # how to initial median (compute set-median). "MEDOID" is to use the graph with smallest SOD. + 'random_inits': 10, # number of random initialization when 'init_type' = 'RANDOM'. + 'time_limit': 600, # maximum time limit to compute the generalized median. If set to 0 then no limit. + 'verbose': 2, # whether to print out results. + 'refine': False # whether to refine the final SODs or not. + } +print('done.') + +"""**3. Run median preimage generator.**""" + +from gklearn.preimage import MedianPreimageGenerator + +# Create median preimage generator instance. +mpg = MedianPreimageGenerator() +# Add dataset. +mpg.dataset = dataset +# Set parameters. +mpg.set_options(**mpg_options.copy()) +mpg.kernel_options = kernel_options.copy() +mpg.ged_options = ged_options.copy() +mpg.mge_options = mge_options.copy() +# Run. +mpg.run() + +"""**4. Get results.**""" + +# Get results. +import pprint +pp = pprint.PrettyPrinter(indent=4) # pretty print +results = mpg.get_results() +pp.pprint(results) + +# Draw generated graphs. +def draw_graph(graph): + import matplotlib.pyplot as plt + import networkx as nx + plt.figure() + pos = nx.spring_layout(graph) + nx.draw(graph, pos, node_size=500, labels=nx.get_node_attributes(graph, 'atom_symbol'), font_color='w', width=3, with_labels=True) + plt.show() + plt.clf() + plt.close() + +draw_graph(mpg.set_median) +draw_graph(mpg.gen_median) \ No newline at end of file From 91eb4850f7b339b9559fff00ea15ca28527bef3e Mon Sep 17 00:00:00 2001 From: jajupmochi Date: Fri, 12 Jun 2020 12:03:20 +0200 Subject: [PATCH 14/17] Fix typos in printout messages. --- gklearn/preimage/median_preimage_generator.py | 2 +- gklearn/preimage/random_preimage_generator.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/gklearn/preimage/median_preimage_generator.py b/gklearn/preimage/median_preimage_generator.py index 7fe0626..657ee4e 100644 --- a/gklearn/preimage/median_preimage_generator.py +++ b/gklearn/preimage/median_preimage_generator.py @@ -145,7 +145,7 @@ class MedianPreimageGenerator(PreimageGenerator): if self._verbose: print() print('================================================================================') - print('Finished generalization of preimages.') + print('Finished generation of preimages.') print('--------------------------------------------------------------------------------') print('The optimized edit cost constants:', self.__edit_cost_constants) print('SOD of the set median:', self.__sod_set_median) diff --git a/gklearn/preimage/random_preimage_generator.py b/gklearn/preimage/random_preimage_generator.py index bdf9fe6..cb28519 100644 --- a/gklearn/preimage/random_preimage_generator.py +++ b/gklearn/preimage/random_preimage_generator.py @@ -210,7 +210,7 @@ class RandomPreimageGenerator(PreimageGenerator): if self._verbose: print() print('=============================================================================') - print('Finished generalization of preimages.') + print('Finished generation of preimages.') print('-----------------------------------------------------------------------------') print('Distance in kernel space for the best graph from dataset:', self.__k_dis_dataset) print('Distance in kernel space for the preimage:', self.__k_dis_preimage) From 95dceaa1ca39586bc1c0f0df55b2e9d28907fef0 Mon Sep 17 00:00:00 2001 From: jajupmochi Date: Fri, 12 Jun 2020 12:04:13 +0200 Subject: [PATCH 15/17] Add simple preimage experiments. --- gklearn/preimage/experiments/xp_simple_preimage.py | 176 +++++++++++++++++++++ 1 file changed, 176 insertions(+) create mode 100644 gklearn/preimage/experiments/xp_simple_preimage.py diff --git a/gklearn/preimage/experiments/xp_simple_preimage.py b/gklearn/preimage/experiments/xp_simple_preimage.py new file mode 100644 index 0000000..a8ce79e --- /dev/null +++ b/gklearn/preimage/experiments/xp_simple_preimage.py @@ -0,0 +1,176 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +""" +Created on Fri Jun 12 10:30:17 2020 + +@author: ljia + +This script constructs simple preimages to test preimage methods and find bugs and shortcomings in them. +""" + + +def xp_simple_preimage(): + import numpy as np + + """**1. Get dataset.**""" + + from gklearn.utils import Dataset, split_dataset_by_target + + # Predefined dataset name, use dataset "MAO". + ds_name = 'MAO' + # The node/edge labels that will not be used in the computation. + irrelevant_labels = {'node_attrs': ['x', 'y', 'z'], 'edge_labels': ['bond_stereo']} + + # Initialize a Dataset. + dataset_all = Dataset() + # Load predefined dataset "MAO". + dataset_all.load_predefined_dataset(ds_name) + # Remove irrelevant labels. + dataset_all.remove_labels(**irrelevant_labels) + # Split the whole dataset according to the classification targets. + datasets = split_dataset_by_target(dataset_all) + # Get the first class of graphs, whose median preimage will be computed. + dataset = datasets[0] + len(dataset.graphs) + + """**2. Set parameters.**""" + + import multiprocessing + + # Parameters for MedianPreimageGenerator (our method). + mpg_options = {'fit_method': 'k-graphs', # how to fit edit costs. "k-graphs" means use all graphs in median set when fitting. + 'init_ecc': [4, 4, 2, 1, 1, 1], # initial edit costs. + 'ds_name': ds_name, # name of the dataset. + 'parallel': True, # whether the parallel scheme is to be used. + 'time_limit_in_sec': 0, # maximum time limit to compute the preimage. If set to 0 then no limit. + 'max_itrs': 10, # maximum iteration limit to optimize edit costs. If set to 0 then no limit. + 'max_itrs_without_update': 3, # If the times that edit costs is not update is more than this number, then the optimization stops. + 'epsilon_residual': 0.01, # In optimization, the residual is only considered changed if the change is bigger than this number. + 'epsilon_ec': 0.1, # In optimization, the edit costs are only considered changed if the changes are bigger than this number. + 'verbose': 2 # whether to print out results. + } + # Parameters for graph kernel computation. + kernel_options = {'name': 'PathUpToH', # use path kernel up to length h. + 'depth': 9, + 'k_func': 'MinMax', + 'compute_method': 'trie', + 'parallel': 'imap_unordered', # or None + 'n_jobs': multiprocessing.cpu_count(), + 'normalize': True, # whether to use normalized Gram matrix to optimize edit costs. + 'verbose': 2 # whether to print out results. + } + # Parameters for GED computation. + ged_options = {'method': 'IPFP', # use IPFP huristic. + 'initialization_method': 'RANDOM', # or 'NODE', etc. + 'initial_solutions': 10, # when bigger than 1, then the method is considered mIPFP. + 'edit_cost': 'CONSTANT', # use CONSTANT cost. + 'attr_distance': 'euclidean', # the distance between non-symbolic node/edge labels is computed by euclidean distance. + 'ratio_runs_from_initial_solutions': 1, + 'threads': multiprocessing.cpu_count(), # parallel threads. Do not work if mpg_options['parallel'] = False. + 'init_option': 'EAGER_WITHOUT_SHUFFLED_COPIES' + } + # Parameters for MedianGraphEstimator (Boria's method). + mge_options = {'init_type': 'MEDOID', # how to initial median (compute set-median). "MEDOID" is to use the graph with smallest SOD. + 'random_inits': 10, # number of random initialization when 'init_type' = 'RANDOM'. + 'time_limit': 600, # maximum time limit to compute the generalized median. If set to 0 then no limit. + 'verbose': 2, # whether to print out results. + 'refine': False # whether to refine the final SODs or not. + } + print('done.') + + """**3. Compute the Gram matrix and distance matrix.**""" + + from gklearn.utils.utils import get_graph_kernel_by_name + + # Get a graph kernel instance. + graph_kernel = get_graph_kernel_by_name(kernel_options['name'], + node_labels=dataset.node_labels, edge_labels=dataset.edge_labels, + node_attrs=dataset.node_attrs, edge_attrs=dataset.edge_attrs, + ds_infos=dataset.get_dataset_infos(keys=['directed']), + kernel_options=kernel_options) + # Compute Gram matrix. + gram_matrix, run_time = graph_kernel.compute(dataset.graphs, **kernel_options) + + # Compute distance matrix. + from gklearn.utils import compute_distance_matrix + dis_mat, _, _, _ = compute_distance_matrix(gram_matrix) + + print('done.') + + """**4. Find the candidate graph.**""" + + from gklearn.preimage.utils import compute_k_dis + + # Number of the nearest neighbors. + k_neighbors = 10 + + # For each graph G in dataset, compute the distance between its image \Phi(G) and the mean of its neighbors' images. + dis_min = np.inf # the minimum distance between possible \Phi(G) and the mean of its neighbors. + for idx, G in enumerate(dataset.graphs): + # Find the k nearest neighbors of G. + dis_list = dis_mat[idx] # distance between \Phi(G) and image of each graphs. + idx_sort = np.argsort(dis_list) # sort distances and get the sorted indices. + idx_nearest = idx_sort[1:k_neighbors+1] # indices of the k-nearest neighbors. + dis_k_nearest = [dis_list[i] for i in idx_nearest] # k-nearest distances, except the 0. + G_k_nearest = [dataset.graphs[i] for i in idx_nearest] # k-nearest neighbors. + + # Compute the distance between \Phi(G) and the mean of its neighbors. + dis_tmp = compute_k_dis(idx, # the index of G in Gram matrix. + idx_nearest, # the indices of the neighbors + [1 / k_neighbors] * k_neighbors, # coefficients for neighbors. + gram_matrix, + withterm3=False) + # Check if the new distance is smallers. + if dis_tmp < dis_min: + dis_min = dis_tmp + G_cand = G + G_neighbors = G_k_nearest + + print('The minimum distance is', dis_min) + + """**5. Run median preimage generator.**""" + + from gklearn.preimage import MedianPreimageGenerator + + # Set the dataset as the k-nearest neighbors. + dataset.load_graphs(G_neighbors) + + # Create median preimage generator instance. + mpg = MedianPreimageGenerator() + # Add dataset. + mpg.dataset = dataset + # Set parameters. + mpg.set_options(**mpg_options.copy()) + mpg.kernel_options = kernel_options.copy() + mpg.ged_options = ged_options.copy() + mpg.mge_options = mge_options.copy() + # Run. + mpg.run() + + """**4. Get results.**""" + + # Get results. + import pprint + pp = pprint.PrettyPrinter(indent=4) # pretty print + results = mpg.get_results() + pp.pprint(results) + + draw_graph(mpg.set_median) + draw_graph(mpg.gen_median) + draw_graph(G_cand) + + +# Draw generated graphs. +def draw_graph(graph): + import matplotlib.pyplot as plt + import networkx as nx + plt.figure() + pos = nx.spring_layout(graph) + nx.draw(graph, pos, node_size=500, labels=nx.get_node_attributes(graph, 'atom_symbol'), font_color='w', width=3, with_labels=True) + plt.show() + plt.clf() + plt.close() + + +if __name__ == '__main__': + xp_simple_preimage() \ No newline at end of file From 74ce199958721d76d4a9d0aa4ec77215448e7f89 Mon Sep 17 00:00:00 2001 From: jajupmochi Date: Mon, 22 Jun 2020 15:29:09 +0200 Subject: [PATCH 16/17] Add examples for graph kernels. --- .../examples/compute_distance_in_kernel_space.py | 73 +++++++++++++ gklearn/examples/compute_graph_kernel.py | 87 ++++++++++++++++ .../examples/example_median_preimege_generator.py | 115 --------------------- 3 files changed, 160 insertions(+), 115 deletions(-) create mode 100644 gklearn/examples/compute_distance_in_kernel_space.py create mode 100644 gklearn/examples/compute_graph_kernel.py delete mode 100644 gklearn/examples/example_median_preimege_generator.py diff --git a/gklearn/examples/compute_distance_in_kernel_space.py b/gklearn/examples/compute_distance_in_kernel_space.py new file mode 100644 index 0000000..76c7494 --- /dev/null +++ b/gklearn/examples/compute_distance_in_kernel_space.py @@ -0,0 +1,73 @@ +# -*- coding: utf-8 -*- +"""compute_distance_in_kernel_space.ipynb + +Automatically generated by Colaboratory. + +Original file is located at + https://colab.research.google.com/drive/17tZP6IrineQmzo9sRtfZOnHpHx6HnlMA + +**This script demonstrates how to compute distance in kernel space between the image of a graph and the mean of images of a group of graphs.** +--- + +**0. Install `graphkit-learn`.** +""" + +"""**1. Get dataset.**""" + +from gklearn.utils import Dataset + +# Predefined dataset name, use dataset "MUTAG". +ds_name = 'MUTAG' + +# Initialize a Dataset. +dataset = Dataset() +# Load predefined dataset "MUTAG". +dataset.load_predefined_dataset(ds_name) +len(dataset.graphs) + +"""**2. Compute graph kernel.**""" + +from gklearn.kernels import PathUpToH +import multiprocessing + +# Initailize parameters for graph kernel computation. +kernel_options = {'depth': 3, + 'k_func': 'MinMax', + 'compute_method': 'trie' + } + +# Initialize graph kernel. +graph_kernel = PathUpToH(node_labels=dataset.node_labels, # list of node label names. + edge_labels=dataset.edge_labels, # list of edge label names. + ds_infos=dataset.get_dataset_infos(keys=['directed']), # dataset information required for computation. + **kernel_options, # options for computation. + ) + +# Compute Gram matrix. +gram_matrix, run_time = graph_kernel.compute(dataset.graphs, + parallel='imap_unordered', # or None. + n_jobs=multiprocessing.cpu_count(), # number of parallel jobs. + normalize=True, # whether to return normalized Gram matrix. + verbose=2 # whether to print out results. + ) + +"""**3. Compute distance in kernel space.** + +Given a dataset $\mathcal{G}_N$, compute the distance in kernel space between the image of $G_1 \in \mathcal{G}_N$ and the mean of images of $\mathcal{G}_k \subset \mathcal{G}_N$. +""" + +from gklearn.preimage.utils import compute_k_dis + +# Index of $G_1$. +idx_1 = 10 +# Indices of graphs in $\mathcal{G}_k$. +idx_graphs = range(0, 10) + +# Compute the distance in kernel space. +dis_k = compute_k_dis(idx_1, + idx_graphs, + [1 / len(idx_graphs)] * len(idx_graphs), # weights for images of graphs in $\mathcal{G}_k$; all equal when computing the mean. + gram_matrix, # gram matrix of al graphs. + withterm3=False + ) +print(dis_k) \ No newline at end of file diff --git a/gklearn/examples/compute_graph_kernel.py b/gklearn/examples/compute_graph_kernel.py new file mode 100644 index 0000000..2fe8d52 --- /dev/null +++ b/gklearn/examples/compute_graph_kernel.py @@ -0,0 +1,87 @@ +# -*- coding: utf-8 -*- +"""compute_graph_kernel.ipynb + +Automatically generated by Colaboratory. + +Original file is located at + https://colab.research.google.com/drive/17Q2QCl9CAtDweGF8LiWnWoN2laeJqT0u + +**This script demonstrates how to compute a graph kernel.** +--- + +**0. Install `graphkit-learn`.** +""" + +"""**1. Get dataset.**""" + +from gklearn.utils import Dataset + +# Predefined dataset name, use dataset "MUTAG". +ds_name = 'MUTAG' + +# Initialize a Dataset. +dataset = Dataset() +# Load predefined dataset "MUTAG". +dataset.load_predefined_dataset(ds_name) +len(dataset.graphs) + +"""**2. Compute graph kernel.**""" + +from gklearn.kernels import PathUpToH + +# Initailize parameters for graph kernel computation. +kernel_options = {'depth': 3, + 'k_func': 'MinMax', + 'compute_method': 'trie' + } + +# Initialize graph kernel. +graph_kernel = PathUpToH(node_labels=dataset.node_labels, # list of node label names. + edge_labels=dataset.edge_labels, # list of edge label names. + ds_infos=dataset.get_dataset_infos(keys=['directed']), # dataset information required for computation. + **kernel_options, # options for computation. + ) + +print('done.') + +import multiprocessing +import matplotlib.pyplot as plt + +# Compute Gram matrix. +gram_matrix, run_time = graph_kernel.compute(dataset.graphs, + parallel='imap_unordered', # or None. + n_jobs=multiprocessing.cpu_count(), # number of parallel jobs. + normalize=True, # whether to return normalized Gram matrix. + verbose=2 # whether to print out results. + ) +# Print results. +print() +print(gram_matrix) +print(run_time) +plt.imshow(gram_matrix) + +import multiprocessing + +# Compute grah kernels between a graph and a list of graphs. +kernel_list, run_time = graph_kernel.compute(dataset.graphs, # a list of graphs. + dataset.graphs[0], # a single graph. + parallel='imap_unordered', # or None. + n_jobs=multiprocessing.cpu_count(), # number of parallel jobs. + verbose=2 # whether to print out results. + ) +# Print results. +print() +print(kernel_list) +print(run_time) + +import multiprocessing + +# Compute a grah kernel between two graphs. +kernel, run_time = graph_kernel.compute(dataset.graphs[0], # a single graph. + dataset.graphs[1], # another single graph. + verbose=2 # whether to print out results. + ) +# Print results. +print() +print(kernel) +print(run_time) \ No newline at end of file diff --git a/gklearn/examples/example_median_preimege_generator.py b/gklearn/examples/example_median_preimege_generator.py deleted file mode 100644 index 9afc7bd..0000000 --- a/gklearn/examples/example_median_preimege_generator.py +++ /dev/null @@ -1,115 +0,0 @@ -# -*- coding: utf-8 -*- -"""example_median_preimege_generator.ipynb - -Automatically generated by Colaboratory. - -Original file is located at - https://colab.research.google.com/drive/1PIDvHOcmiLEQ5Np3bgBDdu0kLOquOMQK - -**This script demonstrates how to generate a graph preimage using Boria's method.** ---- -""" - -"""**1. Get dataset.**""" - -from gklearn.utils import Dataset, split_dataset_by_target - -# Predefined dataset name, use dataset "MAO". -ds_name = 'MAO' -# The node/edge labels that will not be used in the computation. -irrelevant_labels = {'node_attrs': ['x', 'y', 'z'], 'edge_labels': ['bond_stereo']} - -# Initialize a Dataset. -dataset_all = Dataset() -# Load predefined dataset "MAO". -dataset_all.load_predefined_dataset(ds_name) -# Remove irrelevant labels. -dataset_all.remove_labels(**irrelevant_labels) -# Split the whole dataset according to the classification targets. -datasets = split_dataset_by_target(dataset_all) -# Get the first class of graphs, whose median preimage will be computed. -dataset = datasets[0] -len(dataset.graphs) - -"""**2. Set parameters.**""" - -import multiprocessing - -# Parameters for MedianPreimageGenerator (our method). -mpg_options = {'fit_method': 'k-graphs', # how to fit edit costs. "k-graphs" means use all graphs in median set when fitting. - 'init_ecc': [4, 4, 2, 1, 1, 1], # initial edit costs. - 'ds_name': ds_name, # name of the dataset. - 'parallel': True, # whether the parallel scheme is to be used. - 'time_limit_in_sec': 0, # maximum time limit to compute the preimage. If set to 0 then no limit. - 'max_itrs': 100, # maximum iteration limit to optimize edit costs. If set to 0 then no limit. - 'max_itrs_without_update': 3, # If the times that edit costs is not update is more than this number, then the optimization stops. - 'epsilon_residual': 0.01, # In optimization, the residual is only considered changed if the change is bigger than this number. - 'epsilon_ec': 0.1, # In optimization, the edit costs are only considered changed if the changes are bigger than this number. - 'verbose': 2 # whether to print out results. - } -# Parameters for graph kernel computation. -kernel_options = {'name': 'PathUpToH', # use path kernel up to length h. - 'depth': 9, - 'k_func': 'MinMax', - 'compute_method': 'trie', - 'parallel': 'imap_unordered', # or None - 'n_jobs': multiprocessing.cpu_count(), - 'normalize': True, # whether to use normalized Gram matrix to optimize edit costs. - 'verbose': 2 # whether to print out results. - } -# Parameters for GED computation. -ged_options = {'method': 'IPFP', # use IPFP huristic. - 'initialization_method': 'RANDOM', # or 'NODE', etc. - 'initial_solutions': 10, # when bigger than 1, then the method is considered mIPFP. - 'edit_cost': 'CONSTANT', # use CONSTANT cost. - 'attr_distance': 'euclidean', # the distance between non-symbolic node/edge labels is computed by euclidean distance. - 'ratio_runs_from_initial_solutions': 1, - 'threads': multiprocessing.cpu_count(), # parallel threads. Do not work if mpg_options['parallel'] = False. - 'init_option': 'EAGER_WITHOUT_SHUFFLED_COPIES' - } -# Parameters for MedianGraphEstimator (Boria's method). -mge_options = {'init_type': 'MEDOID', # how to initial median (compute set-median). "MEDOID" is to use the graph with smallest SOD. - 'random_inits': 10, # number of random initialization when 'init_type' = 'RANDOM'. - 'time_limit': 600, # maximum time limit to compute the generalized median. If set to 0 then no limit. - 'verbose': 2, # whether to print out results. - 'refine': False # whether to refine the final SODs or not. - } -print('done.') - -"""**3. Run median preimage generator.**""" - -from gklearn.preimage import MedianPreimageGenerator - -# Create median preimage generator instance. -mpg = MedianPreimageGenerator() -# Add dataset. -mpg.dataset = dataset -# Set parameters. -mpg.set_options(**mpg_options.copy()) -mpg.kernel_options = kernel_options.copy() -mpg.ged_options = ged_options.copy() -mpg.mge_options = mge_options.copy() -# Run. -mpg.run() - -"""**4. Get results.**""" - -# Get results. -import pprint -pp = pprint.PrettyPrinter(indent=4) # pretty print -results = mpg.get_results() -pp.pprint(results) - -# Draw generated graphs. -def draw_graph(graph): - import matplotlib.pyplot as plt - import networkx as nx - plt.figure() - pos = nx.spring_layout(graph) - nx.draw(graph, pos, node_size=500, labels=nx.get_node_attributes(graph, 'atom_symbol'), font_color='w', width=3, with_labels=True) - plt.show() - plt.clf() - plt.close() - -draw_graph(mpg.set_median) -draw_graph(mpg.gen_median) \ No newline at end of file From f74ca2c869a4ec818dffbf6a528b3fdec968d318 Mon Sep 17 00:00:00 2001 From: jajupmochi Date: Mon, 22 Jun 2020 15:32:05 +0200 Subject: [PATCH 17/17] Add examples. --- gklearn/examples/median_preimege_generator.py | 115 ++++++++++++++++++++++++++ 1 file changed, 115 insertions(+) create mode 100644 gklearn/examples/median_preimege_generator.py diff --git a/gklearn/examples/median_preimege_generator.py b/gklearn/examples/median_preimege_generator.py new file mode 100644 index 0000000..9afc7bd --- /dev/null +++ b/gklearn/examples/median_preimege_generator.py @@ -0,0 +1,115 @@ +# -*- coding: utf-8 -*- +"""example_median_preimege_generator.ipynb + +Automatically generated by Colaboratory. + +Original file is located at + https://colab.research.google.com/drive/1PIDvHOcmiLEQ5Np3bgBDdu0kLOquOMQK + +**This script demonstrates how to generate a graph preimage using Boria's method.** +--- +""" + +"""**1. Get dataset.**""" + +from gklearn.utils import Dataset, split_dataset_by_target + +# Predefined dataset name, use dataset "MAO". +ds_name = 'MAO' +# The node/edge labels that will not be used in the computation. +irrelevant_labels = {'node_attrs': ['x', 'y', 'z'], 'edge_labels': ['bond_stereo']} + +# Initialize a Dataset. +dataset_all = Dataset() +# Load predefined dataset "MAO". +dataset_all.load_predefined_dataset(ds_name) +# Remove irrelevant labels. +dataset_all.remove_labels(**irrelevant_labels) +# Split the whole dataset according to the classification targets. +datasets = split_dataset_by_target(dataset_all) +# Get the first class of graphs, whose median preimage will be computed. +dataset = datasets[0] +len(dataset.graphs) + +"""**2. Set parameters.**""" + +import multiprocessing + +# Parameters for MedianPreimageGenerator (our method). +mpg_options = {'fit_method': 'k-graphs', # how to fit edit costs. "k-graphs" means use all graphs in median set when fitting. + 'init_ecc': [4, 4, 2, 1, 1, 1], # initial edit costs. + 'ds_name': ds_name, # name of the dataset. + 'parallel': True, # whether the parallel scheme is to be used. + 'time_limit_in_sec': 0, # maximum time limit to compute the preimage. If set to 0 then no limit. + 'max_itrs': 100, # maximum iteration limit to optimize edit costs. If set to 0 then no limit. + 'max_itrs_without_update': 3, # If the times that edit costs is not update is more than this number, then the optimization stops. + 'epsilon_residual': 0.01, # In optimization, the residual is only considered changed if the change is bigger than this number. + 'epsilon_ec': 0.1, # In optimization, the edit costs are only considered changed if the changes are bigger than this number. + 'verbose': 2 # whether to print out results. + } +# Parameters for graph kernel computation. +kernel_options = {'name': 'PathUpToH', # use path kernel up to length h. + 'depth': 9, + 'k_func': 'MinMax', + 'compute_method': 'trie', + 'parallel': 'imap_unordered', # or None + 'n_jobs': multiprocessing.cpu_count(), + 'normalize': True, # whether to use normalized Gram matrix to optimize edit costs. + 'verbose': 2 # whether to print out results. + } +# Parameters for GED computation. +ged_options = {'method': 'IPFP', # use IPFP huristic. + 'initialization_method': 'RANDOM', # or 'NODE', etc. + 'initial_solutions': 10, # when bigger than 1, then the method is considered mIPFP. + 'edit_cost': 'CONSTANT', # use CONSTANT cost. + 'attr_distance': 'euclidean', # the distance between non-symbolic node/edge labels is computed by euclidean distance. + 'ratio_runs_from_initial_solutions': 1, + 'threads': multiprocessing.cpu_count(), # parallel threads. Do not work if mpg_options['parallel'] = False. + 'init_option': 'EAGER_WITHOUT_SHUFFLED_COPIES' + } +# Parameters for MedianGraphEstimator (Boria's method). +mge_options = {'init_type': 'MEDOID', # how to initial median (compute set-median). "MEDOID" is to use the graph with smallest SOD. + 'random_inits': 10, # number of random initialization when 'init_type' = 'RANDOM'. + 'time_limit': 600, # maximum time limit to compute the generalized median. If set to 0 then no limit. + 'verbose': 2, # whether to print out results. + 'refine': False # whether to refine the final SODs or not. + } +print('done.') + +"""**3. Run median preimage generator.**""" + +from gklearn.preimage import MedianPreimageGenerator + +# Create median preimage generator instance. +mpg = MedianPreimageGenerator() +# Add dataset. +mpg.dataset = dataset +# Set parameters. +mpg.set_options(**mpg_options.copy()) +mpg.kernel_options = kernel_options.copy() +mpg.ged_options = ged_options.copy() +mpg.mge_options = mge_options.copy() +# Run. +mpg.run() + +"""**4. Get results.**""" + +# Get results. +import pprint +pp = pprint.PrettyPrinter(indent=4) # pretty print +results = mpg.get_results() +pp.pprint(results) + +# Draw generated graphs. +def draw_graph(graph): + import matplotlib.pyplot as plt + import networkx as nx + plt.figure() + pos = nx.spring_layout(graph) + nx.draw(graph, pos, node_size=500, labels=nx.get_node_attributes(graph, 'atom_symbol'), font_color='w', width=3, with_labels=True) + plt.show() + plt.clf() + plt.close() + +draw_graph(mpg.set_median) +draw_graph(mpg.gen_median) \ No newline at end of file