diff --git a/gklearn/kernels/__init__.py b/gklearn/kernels/__init__.py index 60fea24..4afcc67 100644 --- a/gklearn/kernels/__init__.py +++ b/gklearn/kernels/__init__.py @@ -11,3 +11,4 @@ from gklearn.kernels.graph_kernel import GraphKernel from gklearn.kernels.structural_sp import StructuralSP from gklearn.kernels.shortest_path import ShortestPath from gklearn.kernels.path_up_to_h import PathUpToH +from gklearn.kernels.treelet import Treelet diff --git a/gklearn/kernels/path_up_to_h.py b/gklearn/kernels/path_up_to_h.py index d031021..b23687c 100644 --- a/gklearn/kernels/path_up_to_h.py +++ b/gklearn/kernels/path_up_to_h.py @@ -176,7 +176,7 @@ class PathUpToH(GraphKernel): # @todo: add function for k_func == None pool.close() pool.join() - # compute Gram matrix. + # compute kernel list. kernel_list = [None] * len(g_list) def init_worker(p1_toshare, plist_toshare): diff --git a/gklearn/kernels/treelet.py b/gklearn/kernels/treelet.py new file mode 100644 index 0000000..134c683 --- /dev/null +++ b/gklearn/kernels/treelet.py @@ -0,0 +1,505 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +""" +Created on Mon Apr 13 18:02:46 2020 + +@author: ljia + +@references: + + [1] Gaüzère B, Brun L, Villemin D. Two new graphs kernels in + chemoinformatics. Pattern Recognition Letters. 2012 Nov 1;33(15):2038-47. +""" + +import sys +from multiprocessing import Pool +from tqdm import tqdm +import numpy as np +import networkx as nx +from collections import Counter +from itertools import chain +from gklearn.utils.parallel import parallel_gm, parallel_me +from gklearn.utils.utils import find_all_paths, get_mlti_dim_node_attrs +from gklearn.kernels import GraphKernel + + +class Treelet(GraphKernel): + + def __init__(self, **kwargs): + GraphKernel.__init__(self) + self.__node_labels = kwargs.get('node_labels', []) + self.__edge_labels = kwargs.get('edge_labels', []) + self.__sub_kernel = kwargs.get('sub_kernel', None) + self.__ds_infos = kwargs.get('ds_infos', {}) + if self.__sub_kernel is None: + raise Exception('Sub kernel not set.') + + + def _compute_gm_series(self): + self.__add_dummy_labels(self._graphs) + + # get all canonical keys of all graphs before calculating kernels to save + # time, but this may cost a lot of memory for large dataset. + canonkeys = [] + if self._verbose >= 2: + iterator = tqdm(self._graphs, desc='getting canonkeys', file=sys.stdout) + else: + iterator = self._graphs + for g in iterator: + canonkeys.append(self.__get_canonkeys(g)) + + # compute Gram matrix. + gram_matrix = np.zeros((len(self._graphs), len(self._graphs))) + + from itertools import combinations_with_replacement + itr = combinations_with_replacement(range(0, len(self._graphs)), 2) + if self._verbose >= 2: + iterator = tqdm(itr, desc='calculating kernels', file=sys.stdout) + else: + iterator = itr + for i, j in iterator: + kernel = self.__kernel_do(canonkeys[i], canonkeys[j]) + gram_matrix[i][j] = kernel + gram_matrix[j][i] = kernel # @todo: no directed graph considered? + + return gram_matrix + + + def _compute_gm_imap_unordered(self): + self.__add_dummy_labels(self._graphs) + + # get all canonical keys of all graphs before calculating kernels to save + # time, but this may cost a lot of memory for large dataset. + pool = Pool(self._n_jobs) + itr = zip(self._graphs, range(0, len(self._graphs))) + if len(self._graphs) < 100 * self._n_jobs: + chunksize = int(len(self._graphs) / self._n_jobs) + 1 + else: + chunksize = 100 + canonkeys = [[] for _ in range(len(self._graphs))] + get_fun = self._wrapper_get_canonkeys + if self._verbose >= 2: + iterator = tqdm(pool.imap_unordered(get_fun, itr, chunksize), + desc='getting canonkeys', file=sys.stdout) + else: + iterator = pool.imap_unordered(get_fun, itr, chunksize) + for i, ck in iterator: + canonkeys[i] = ck + pool.close() + pool.join() + + # compute Gram matrix. + gram_matrix = np.zeros((len(self._graphs), len(self._graphs))) + + def init_worker(canonkeys_toshare): + global G_canonkeys + G_canonkeys = canonkeys_toshare + do_fun = self._wrapper_kernel_do + parallel_gm(do_fun, gram_matrix, self._graphs, init_worker=init_worker, + glbv=(canonkeys,), n_jobs=self._n_jobs, verbose=self._verbose) + + return gram_matrix + + + def _compute_kernel_list_series(self, g1, g_list): + self.__add_dummy_labels(g_list + [g1]) + + # get all canonical keys of all graphs before calculating kernels to save + # time, but this may cost a lot of memory for large dataset. + canonkeys_1 = self.__get_canonkeys(g1) + canonkeys_list = [] + if self._verbose >= 2: + iterator = tqdm(g_list, desc='getting canonkeys', file=sys.stdout) + else: + iterator = g_list + for g in iterator: + canonkeys_list.append(self.__get_canonkeys(g)) + + # compute kernel list. + kernel_list = [None] * len(g_list) + if self._verbose >= 2: + iterator = tqdm(range(len(g_list)), desc='calculating kernels', file=sys.stdout) + else: + iterator = range(len(g_list)) + for i in iterator: + kernel = self.__kernel_do(canonkeys_1, canonkeys_list[i]) + kernel_list[i] = kernel + + return kernel_list + + + def _compute_kernel_list_imap_unordered(self, g1, g_list): + self.__add_dummy_labels(g_list + [g1]) + + # get all canonical keys of all graphs before calculating kernels to save + # time, but this may cost a lot of memory for large dataset. + canonkeys_1 = self.__get_canonkeys(g1) + canonkeys_list = [[] for _ in range(len(g_list))] + pool = Pool(self._n_jobs) + itr = zip(g_list, range(0, len(g_list))) + if len(g_list) < 100 * self._n_jobs: + chunksize = int(len(g_list) / self._n_jobs) + 1 + else: + chunksize = 100 + get_fun = self._wrapper_get_canonkeys + if self._verbose >= 2: + iterator = tqdm(pool.imap_unordered(get_fun, itr, chunksize), + desc='getting canonkeys', file=sys.stdout) + else: + iterator = pool.imap_unordered(get_fun, itr, chunksize) + for i, ck in iterator: + canonkeys_list[i] = ck + pool.close() + pool.join() + + # compute kernel list. + kernel_list = [None] * len(g_list) + + def init_worker(ck_1_toshare, ck_list_toshare): + global G_ck_1, G_ck_list + G_ck_1 = ck_1_toshare + G_ck_list = ck_list_toshare + do_fun = self._wrapper_kernel_list_do + def func_assign(result, var_to_assign): + var_to_assign[result[0]] = result[1] + itr = range(len(g_list)) + len_itr = len(g_list) + parallel_me(do_fun, func_assign, kernel_list, itr, len_itr=len_itr, + init_worker=init_worker, glbv=(canonkeys_1, canonkeys_list), method='imap_unordered', + n_jobs=self._n_jobs, itr_desc='calculating kernels', verbose=self._verbose) + + return kernel_list + + + def _wrapper_kernel_list_do(self, itr): + return itr, self.__kernel_do(G_ck_1, G_ck_list[itr]) + + + def _compute_single_kernel_series(self, g1, g2): + self.__add_dummy_labels([g1] + [g2]) + canonkeys_1 = self.__get_canonkeys(g1) + canonkeys_2 = self.__get_canonkeys(g2) + kernel = self.__kernel_do(canonkeys_1, canonkeys_2) + return kernel + + + def __kernel_do(self, canonkey1, canonkey2): + """Calculate treelet graph kernel between 2 graphs. + + Parameters + ---------- + canonkey1, canonkey2 : list + List of canonical keys in 2 graphs, where each key is represented by a string. + + Return + ------ + kernel : float + Treelet Kernel between 2 graphs. + """ + keys = set(canonkey1.keys()) & set(canonkey2.keys()) # find same canonical keys in both graphs + vector1 = np.array([(canonkey1[key] if (key in canonkey1.keys()) else 0) for key in keys]) + vector2 = np.array([(canonkey2[key] if (key in canonkey2.keys()) else 0) for key in keys]) + kernel = self.__sub_kernel(vector1, vector2) + return kernel + + + def _wrapper_kernel_do(self, itr): + i = itr[0] + j = itr[1] + return i, j, self.__kernel_do(G_canonkeys[i], G_canonkeys[j]) + + + def __get_canonkeys(self, G): + """Generate canonical keys of all treelets in a graph. + + Parameters + ---------- + G : NetworkX graphs + The graph in which keys are generated. + + Return + ------ + canonkey/canonkey_l : dict + For unlabeled graphs, canonkey is a dictionary which records amount of + every tree pattern. For labeled graphs, canonkey_l is one which keeps + track of amount of every treelet. + """ + patterns = {} # a dictionary which consists of lists of patterns for all graphlet. + canonkey = {} # canonical key, a dictionary which records amount of every tree pattern. + + ### structural analysis ### + ### In this section, a list of patterns is generated for each graphlet, + ### where every pattern is represented by nodes ordered by Morgan's + ### extended labeling. + # linear patterns + patterns['0'] = list(G.nodes()) + canonkey['0'] = nx.number_of_nodes(G) + for i in range(1, 6): # for i in range(1, 6): + patterns[str(i)] = find_all_paths(G, i, self.__ds_infos['directed']) + canonkey[str(i)] = len(patterns[str(i)]) + + # n-star patterns + patterns['3star'] = [[node] + [neighbor for neighbor in G[node]] for node in G.nodes() if G.degree(node) == 3] + patterns['4star'] = [[node] + [neighbor for neighbor in G[node]] for node in G.nodes() if G.degree(node) == 4] + patterns['5star'] = [[node] + [neighbor for neighbor in G[node]] for node in G.nodes() if G.degree(node) == 5] + # n-star patterns + canonkey['6'] = len(patterns['3star']) + canonkey['8'] = len(patterns['4star']) + canonkey['d'] = len(patterns['5star']) + + # pattern 7 + patterns['7'] = [] # the 1st line of Table 1 in Ref [1] + for pattern in patterns['3star']: + for i in range(1, len(pattern)): # for each neighbor of node 0 + if G.degree(pattern[i]) >= 2: + pattern_t = pattern[:] + # set the node with degree >= 2 as the 4th node + pattern_t[i], pattern_t[3] = pattern_t[3], pattern_t[i] + for neighborx in G[pattern[i]]: + if neighborx != pattern[0]: + new_pattern = pattern_t + [neighborx] + patterns['7'].append(new_pattern) + canonkey['7'] = len(patterns['7']) + + # pattern 11 + patterns['11'] = [] # the 4th line of Table 1 in Ref [1] + for pattern in patterns['4star']: + for i in range(1, len(pattern)): + if G.degree(pattern[i]) >= 2: + pattern_t = pattern[:] + pattern_t[i], pattern_t[4] = pattern_t[4], pattern_t[i] + for neighborx in G[pattern[i]]: + if neighborx != pattern[0]: + new_pattern = pattern_t + [neighborx] + patterns['11'].append(new_pattern) + canonkey['b'] = len(patterns['11']) + + # pattern 12 + patterns['12'] = [] # the 5th line of Table 1 in Ref [1] + rootlist = [] # a list of root nodes, whose extended labels are 3 + for pattern in patterns['3star']: + if pattern[0] not in rootlist: # prevent to count the same pattern twice from each of the two root nodes + rootlist.append(pattern[0]) + for i in range(1, len(pattern)): + if G.degree(pattern[i]) >= 3: + rootlist.append(pattern[i]) + pattern_t = pattern[:] + pattern_t[i], pattern_t[3] = pattern_t[3], pattern_t[i] + for neighborx1 in G[pattern[i]]: + if neighborx1 != pattern[0]: + for neighborx2 in G[pattern[i]]: + if neighborx1 > neighborx2 and neighborx2 != pattern[0]: + new_pattern = pattern_t + [neighborx1] + [neighborx2] + # new_patterns = [ pattern + [neighborx1] + [neighborx2] for neighborx1 in G[pattern[i]] if neighborx1 != pattern[0] for neighborx2 in G[pattern[i]] if (neighborx1 > neighborx2 and neighborx2 != pattern[0]) ] + patterns['12'].append(new_pattern) + canonkey['c'] = int(len(patterns['12']) / 2) + + # pattern 9 + patterns['9'] = [] # the 2nd line of Table 1 in Ref [1] + for pattern in patterns['3star']: + for pairs in [ [neighbor1, neighbor2] for neighbor1 in G[pattern[0]] if G.degree(neighbor1) >= 2 \ + for neighbor2 in G[pattern[0]] if G.degree(neighbor2) >= 2 if neighbor1 > neighbor2]: + pattern_t = pattern[:] + # move nodes with extended labels 4 to specific position to correspond to their children + pattern_t[pattern_t.index(pairs[0])], pattern_t[2] = pattern_t[2], pattern_t[pattern_t.index(pairs[0])] + pattern_t[pattern_t.index(pairs[1])], pattern_t[3] = pattern_t[3], pattern_t[pattern_t.index(pairs[1])] + for neighborx1 in G[pairs[0]]: + if neighborx1 != pattern[0]: + for neighborx2 in G[pairs[1]]: + if neighborx2 != pattern[0]: + new_pattern = pattern_t + [neighborx1] + [neighborx2] + patterns['9'].append(new_pattern) + canonkey['9'] = len(patterns['9']) + + # pattern 10 + patterns['10'] = [] # the 3rd line of Table 1 in Ref [1] + for pattern in patterns['3star']: + for i in range(1, len(pattern)): + if G.degree(pattern[i]) >= 2: + for neighborx in G[pattern[i]]: + if neighborx != pattern[0] and G.degree(neighborx) >= 2: + pattern_t = pattern[:] + pattern_t[i], pattern_t[3] = pattern_t[3], pattern_t[i] + new_patterns = [ pattern_t + [neighborx] + [neighborxx] for neighborxx in G[neighborx] if neighborxx != pattern[i] ] + patterns['10'].extend(new_patterns) + canonkey['a'] = len(patterns['10']) + + ### labeling information ### + ### In this section, a list of canonical keys is generated for every + ### pattern obtained in the structural analysis section above, which is a + ### string corresponding to a unique treelet. A dictionary is built to keep + ### track of the amount of every treelet. + if len(self.__node_labels) > 0 or len(self.__edge_labels) > 0: + canonkey_l = {} # canonical key, a dictionary which keeps track of amount of every treelet. + + # linear patterns + canonkey_t = Counter(get_mlti_dim_node_attrs(G, self.__node_labels)) + for key in canonkey_t: + canonkey_l[('0', key)] = canonkey_t[key] + + for i in range(1, 6): # for i in range(1, 6): + treelet = [] + for pattern in patterns[str(i)]: + canonlist = [] + for idx, node in enumerate(pattern[:-1]): + canonlist.append(tuple(G.nodes[node][nl] for nl in self.__node_labels)) + canonlist.append(tuple(G[node][pattern[idx+1]][el] for el in self.__edge_labels)) + canonlist.append(tuple(G.nodes[pattern[-1]][nl] for nl in self.__node_labels)) + canonkey_t = canonlist if canonlist < canonlist[::-1] else canonlist[::-1] + treelet.append(tuple([str(i)] + canonkey_t)) + canonkey_l.update(Counter(treelet)) + + # n-star patterns + for i in range(3, 6): + treelet = [] + for pattern in patterns[str(i) + 'star']: + canonlist = [] + for leaf in pattern[1:]: + nlabels = tuple(G.nodes[leaf][nl] for nl in self.__node_labels) + elabels = tuple(G[leaf][pattern[0]][el] for el in self.__edge_labels) + canonlist.append(tuple((nlabels, elabels))) + canonlist.sort() + canonlist = list(chain.from_iterable(canonlist)) + canonkey_t = tuple(['d' if i == 5 else str(i * 2)] + + [tuple(G.nodes[pattern[0]][nl] for nl in self.__node_labels)] + + canonlist) + treelet.append(canonkey_t) + canonkey_l.update(Counter(treelet)) + + # pattern 7 + treelet = [] + for pattern in patterns['7']: + canonlist = [] + for leaf in pattern[1:3]: + nlabels = tuple(G.nodes[leaf][nl] for nl in self.__node_labels) + elabels = tuple(G[leaf][pattern[0]][el] for el in self.__edge_labels) + canonlist.append(tuple((nlabels, elabels))) + canonlist.sort() + canonlist = list(chain.from_iterable(canonlist)) + canonkey_t = tuple(['7'] + + [tuple(G.nodes[pattern[0]][nl] for nl in self.__node_labels)] + canonlist + + [tuple(G.nodes[pattern[3]][nl] for nl in self.__node_labels)] + + [tuple(G[pattern[3]][pattern[0]][el] for el in self.__edge_labels)] + + [tuple(G.nodes[pattern[4]][nl] for nl in self.__node_labels)] + + [tuple(G[pattern[4]][pattern[3]][el] for el in self.__edge_labels)]) + treelet.append(canonkey_t) + canonkey_l.update(Counter(treelet)) + + # pattern 11 + treelet = [] + for pattern in patterns['11']: + canonlist = [] + for leaf in pattern[1:4]: + nlabels = tuple(G.nodes[leaf][nl] for nl in self.__node_labels) + elabels = tuple(G[leaf][pattern[0]][el] for el in self.__edge_labels) + canonlist.append(tuple((nlabels, elabels))) + canonlist.sort() + canonlist = list(chain.from_iterable(canonlist)) + canonkey_t = tuple(['b'] + + [tuple(G.nodes[pattern[0]][nl] for nl in self.__node_labels)] + canonlist + + [tuple(G.nodes[pattern[4]][nl] for nl in self.__node_labels)] + + [tuple(G[pattern[4]][pattern[0]][el] for el in self.__edge_labels)] + + [tuple(G.nodes[pattern[5]][nl] for nl in self.__node_labels)] + + [tuple(G[pattern[5]][pattern[4]][el] for el in self.__edge_labels)]) + treelet.append(canonkey_t) + canonkey_l.update(Counter(treelet)) + + # pattern 10 + treelet = [] + for pattern in patterns['10']: + canonkey4 = [tuple(G.nodes[pattern[5]][nl] for nl in self.__node_labels), + tuple(G[pattern[5]][pattern[4]][el] for el in self.__edge_labels)] + canonlist = [] + for leaf in pattern[1:3]: + nlabels = tuple(G.nodes[leaf][nl] for nl in self.__node_labels) + elabels = tuple(G[leaf][pattern[0]][el] for el in self.__edge_labels) + canonlist.append(tuple((nlabels, elabels))) + canonlist.sort() + canonkey0 = list(chain.from_iterable(canonlist)) + canonkey_t = tuple(['a'] + + [tuple(G.nodes[pattern[3]][nl] for nl in self.__node_labels)] + + [tuple(G.nodes[pattern[4]][nl] for nl in self.__node_labels)] + + [tuple(G[pattern[4]][pattern[3]][el] for el in self.__edge_labels)] + + [tuple(G.nodes[pattern[0]][nl] for nl in self.__node_labels)] + + [tuple(G[pattern[0]][pattern[3]][el] for el in self.__edge_labels)] + + canonkey4 + canonkey0) + treelet.append(canonkey_t) + canonkey_l.update(Counter(treelet)) + + # pattern 12 + treelet = [] + for pattern in patterns['12']: + canonlist0 = [] + for leaf in pattern[1:3]: + nlabels = tuple(G.nodes[leaf][nl] for nl in self.__node_labels) + elabels = tuple(G[leaf][pattern[0]][el] for el in self.__edge_labels) + canonlist0.append(tuple((nlabels, elabels))) + canonlist0.sort() + canonlist0 = list(chain.from_iterable(canonlist0)) + canonlist3 = [] + for leaf in pattern[4:6]: + nlabels = tuple(G.nodes[leaf][nl] for nl in self.__node_labels) + elabels = tuple(G[leaf][pattern[3]][el] for el in self.__edge_labels) + canonlist3.append(tuple((nlabels, elabels))) + canonlist3.sort() + canonlist3 = list(chain.from_iterable(canonlist3)) + + # 2 possible key can be generated from 2 nodes with extended label 3, + # select the one with lower lexicographic order. + canonkey_t1 = tuple(['c'] + + [tuple(G.nodes[pattern[0]][nl] for nl in self.__node_labels)] + canonlist0 + + [tuple(G.nodes[pattern[3]][nl] for nl in self.__node_labels)] + + [tuple(G[pattern[3]][pattern[0]][el] for el in self.__edge_labels)] + + canonlist3) + canonkey_t2 = tuple(['c'] + + [tuple(G.nodes[pattern[3]][nl] for nl in self.__node_labels)] + canonlist3 + + [tuple(G.nodes[pattern[0]][nl] for nl in self.__node_labels)] + + [tuple(G[pattern[0]][pattern[3]][el] for el in self.__edge_labels)] + + canonlist0) + treelet.append(canonkey_t1 if canonkey_t1 < canonkey_t2 else canonkey_t2) + canonkey_l.update(Counter(treelet)) + + # pattern 9 + treelet = [] + for pattern in patterns['9']: + canonkey2 = [tuple(G.nodes[pattern[4]][nl] for nl in self.__node_labels), + tuple(G[pattern[4]][pattern[2]][el] for el in self.__edge_labels)] + canonkey3 = [tuple(G.nodes[pattern[5]][nl] for nl in self.__node_labels), + tuple(G[pattern[5]][pattern[3]][el] for el in self.__edge_labels)] + prekey2 = [tuple(G.nodes[pattern[2]][nl] for nl in self.__node_labels), + tuple(G[pattern[2]][pattern[0]][el] for el in self.__edge_labels)] + prekey3 = [tuple(G.nodes[pattern[3]][nl] for nl in self.__node_labels), + tuple(G[pattern[3]][pattern[0]][el] for el in self.__edge_labels)] + if prekey2 + canonkey2 < prekey3 + canonkey3: + canonkey_t = [tuple(G.nodes[pattern[1]][nl] for nl in self.__node_labels)] \ + + [tuple(G[pattern[1]][pattern[0]][el] for el in self.__edge_labels)] \ + + prekey2 + prekey3 + canonkey2 + canonkey3 + else: + canonkey_t = [tuple(G.nodes[pattern[1]][nl] for nl in self.__node_labels)] \ + + [tuple(G[pattern[1]][pattern[0]][el] for el in self.__edge_labels)] \ + + prekey3 + prekey2 + canonkey3 + canonkey2 + treelet.append(tuple(['9'] + + [tuple(G.nodes[pattern[0]][nl] for nl in self.__node_labels)] + + canonkey_t)) + canonkey_l.update(Counter(treelet)) + + return canonkey_l + + return canonkey + + + def _wrapper_get_canonkeys(self, itr_item): + g = itr_item[0] + i = itr_item[1] + return i, self.__get_canonkeys(g) + + + def __add_dummy_labels(self, Gn): + if len(self.__node_labels) == 0: + for G in Gn: + nx.set_node_attributes(G, '0', 'dummy') + self.__node_labels.append('dummy') + if len(self.__edge_labels) == 0: + for G in Gn: + nx.set_edge_attributes(G, '0', 'dummy') + self.__edge_labels.append('dummy') \ No newline at end of file diff --git a/gklearn/preimage/experiments/xp_median_preimage.py b/gklearn/preimage/experiments/xp_median_preimage.py index 2b920e7..f3206bc 100644 --- a/gklearn/preimage/experiments/xp_median_preimage.py +++ b/gklearn/preimage/experiments/xp_median_preimage.py @@ -53,7 +53,7 @@ def xp_median_preimage_9_1(): 'verbose': 2, 'refine': False} save_results = True - dir_save='../results/xp_median_preimage/' + dir_save = '../results/xp_median_preimage/' + ds_name + '.' + kernel_options['name'] + '/' irrelevant_labels = {'node_attrs': ['x', 'y', 'z'], 'edge_labels': ['bond_stereo']} # edge_required = False # @@ -69,7 +69,7 @@ def xp_median_preimage_9_1(): print() # generate preimages. - for fit_method in ['k-graphs', 'expert'] + ['random'] * 10: + for fit_method in ['k-graphs', 'expert'] + ['random'] * 5: print('\n-------------------------------------') print('fit method:', fit_method, '\n') mpg_options['fit_method'] = fit_method @@ -114,7 +114,7 @@ def xp_median_preimage_9_2(): 'verbose': 2, 'refine': False} save_results = True - dir_save='../results/xp_median_preimage/' + dir_save = '../results/xp_median_preimage/' + ds_name + '.' + kernel_options['name'] + '/' irrelevant_labels = {'node_attrs': ['x', 'y', 'z'], 'edge_labels': ['bond_stereo']} # edge_required = False # @@ -130,7 +130,68 @@ def xp_median_preimage_9_2(): print() # generate preimages. - for fit_method in ['k-graphs', 'expert'] + ['random'] * 10: + for fit_method in ['k-graphs', 'expert'] + ['random'] * 5: + print('\n-------------------------------------') + print('fit method:', fit_method, '\n') + mpg_options['fit_method'] = fit_method + generate_median_preimages_by_class(ds_name, mpg_options, kernel_options, ged_options, mge_options, save_results=save_results, save_medians=True, plot_medians=True, load_gm='auto', dir_save=dir_save, irrelevant_labels=irrelevant_labels, edge_required=edge_required) + + +def xp_median_preimage_9_3(): + """xp 9_3: MAO, Treelet, using CONSTANT. + """ + from gklearn.utils.kernels import polynomialkernel + # set parameters. + ds_name = 'MAO' # + mpg_options = {'fit_method': 'k-graphs', + 'init_ecc': [4, 4, 2, 1, 1, 1], # + 'ds_name': ds_name, + 'parallel': True, # False + 'time_limit_in_sec': 0, + 'max_itrs': 100, # + 'max_itrs_without_update': 3, + 'epsilon_residual': 0.01, + 'epsilon_ec': 0.1, + 'verbose': 2} + pkernel = functools.partial(polynomialkernel, d=4, c=1e+7) + kernel_options = {'name': 'Treelet', # + 'sub_kernel': pkernel, + 'parallel': 'imap_unordered', + # 'parallel': None, + 'n_jobs': multiprocessing.cpu_count(), + 'normalize': True, + 'verbose': 2} + ged_options = {'method': 'IPFP', + 'initialization_method': 'RANDOM', # 'NODE' + 'initial_solutions': 10, # 1 + 'edit_cost': 'CONSTANT', # + 'attr_distance': 'euclidean', + 'ratio_runs_from_initial_solutions': 1, + 'threads': multiprocessing.cpu_count(), + 'init_option': 'EAGER_WITHOUT_SHUFFLED_COPIES'} + mge_options = {'init_type': 'MEDOID', + 'random_inits': 10, + 'time_limit': 600, + 'verbose': 2, + 'refine': False} + save_results = True + dir_save = '../results/xp_median_preimage/' + ds_name + '.' + kernel_options['name'] + '/' + irrelevant_labels = None # + edge_required = False # + + # print settings. + print('parameters:') + print('dataset name:', ds_name) + print('mpg_options:', mpg_options) + print('kernel_options:', kernel_options) + print('ged_options:', ged_options) + print('mge_options:', mge_options) + print('save_results:', save_results) + print('irrelevant_labels:', irrelevant_labels) + print() + + # generate preimages. + for fit_method in ['k-graphs', 'expert'] + ['random'] * 5: print('\n-------------------------------------') print('fit method:', fit_method, '\n') mpg_options['fit_method'] = fit_method @@ -178,7 +239,7 @@ def xp_median_preimage_8_1(): 'verbose': 2, 'refine': False} save_results = True - dir_save='../results/xp_median_preimage/' + dir_save = '../results/xp_median_preimage/' + ds_name + '.' + kernel_options['name'] + '/' irrelevant_labels = None # edge_required = False # @@ -194,7 +255,7 @@ def xp_median_preimage_8_1(): print() # generate preimages. - for fit_method in ['k-graphs', 'expert'] + ['random'] * 10: + for fit_method in ['k-graphs', 'expert'] + ['random'] * 5: print('\n-------------------------------------') print('fit method:', fit_method, '\n') mpg_options['fit_method'] = fit_method @@ -239,7 +300,68 @@ def xp_median_preimage_8_2(): 'verbose': 2, 'refine': False} save_results = True - dir_save='../results/xp_median_preimage/' + dir_save = '../results/xp_median_preimage/' + ds_name + '.' + kernel_options['name'] + '/' + irrelevant_labels = None # + edge_required = False # + + # print settings. + print('parameters:') + print('dataset name:', ds_name) + print('mpg_options:', mpg_options) + print('kernel_options:', kernel_options) + print('ged_options:', ged_options) + print('mge_options:', mge_options) + print('save_results:', save_results) + print('irrelevant_labels:', irrelevant_labels) + print() + + # generate preimages. + for fit_method in ['k-graphs', 'expert'] + ['random'] * 5: + print('\n-------------------------------------') + print('fit method:', fit_method, '\n') + mpg_options['fit_method'] = fit_method + generate_median_preimages_by_class(ds_name, mpg_options, kernel_options, ged_options, mge_options, save_results=save_results, save_medians=True, plot_medians=True, load_gm='auto', dir_save=dir_save, irrelevant_labels=irrelevant_labels, edge_required=edge_required) + + +def xp_median_preimage_8_3(): + """xp 8_3: Monoterpenoides, Treelet, using CONSTANT. + """ + from gklearn.utils.kernels import polynomialkernel + # set parameters. + ds_name = 'Monoterpenoides' # + mpg_options = {'fit_method': 'k-graphs', + 'init_ecc': [4, 4, 2, 1, 1, 1], # + 'ds_name': ds_name, + 'parallel': True, # False + 'time_limit_in_sec': 0, + 'max_itrs': 100, # + 'max_itrs_without_update': 3, + 'epsilon_residual': 0.01, + 'epsilon_ec': 0.1, + 'verbose': 2} + pkernel = functools.partial(polynomialkernel, d=2, c=1e+5) + kernel_options = {'name': 'Treelet', + 'sub_kernel': pkernel, + 'parallel': 'imap_unordered', + # 'parallel': None, + 'n_jobs': multiprocessing.cpu_count(), + 'normalize': True, + 'verbose': 2} + ged_options = {'method': 'IPFP', + 'initialization_method': 'RANDOM', # 'NODE' + 'initial_solutions': 10, # 1 + 'edit_cost': 'CONSTANT', # + 'attr_distance': 'euclidean', + 'ratio_runs_from_initial_solutions': 1, + 'threads': multiprocessing.cpu_count(), + 'init_option': 'EAGER_WITHOUT_SHUFFLED_COPIES'} + mge_options = {'init_type': 'MEDOID', + 'random_inits': 10, + 'time_limit': 600, + 'verbose': 2, + 'refine': False} + save_results = True + dir_save = '../results/xp_median_preimage/' + ds_name + '.' + kernel_options['name'] + '/' irrelevant_labels = None # edge_required = False # @@ -255,7 +377,7 @@ def xp_median_preimage_8_2(): print() # generate preimages. - for fit_method in ['k-graphs', 'expert'] + ['random'] * 10: + for fit_method in ['k-graphs', 'expert'] + ['random'] * 5: print('\n-------------------------------------') print('fit method:', fit_method, '\n') mpg_options['fit_method'] = fit_method @@ -303,7 +425,7 @@ def xp_median_preimage_7_1(): 'verbose': 2, 'refine': False} save_results = True - dir_save='../results/xp_median_preimage/' + dir_save = '../results/xp_median_preimage/' + ds_name + '.' + kernel_options['name'] + '/' irrelevant_labels = None # edge_required = False # @@ -319,7 +441,7 @@ def xp_median_preimage_7_1(): print() # generate preimages. - for fit_method in ['k-graphs', 'expert'] + ['random'] * 10: + for fit_method in ['k-graphs', 'expert'] + ['random'] * 5: print('\n-------------------------------------') print('fit method:', fit_method, '\n') mpg_options['fit_method'] = fit_method @@ -364,7 +486,7 @@ def xp_median_preimage_7_2(): 'verbose': 2, 'refine': False} save_results = True - dir_save='../results/xp_median_preimage/' + dir_save = '../results/xp_median_preimage/' + ds_name + '.' + kernel_options['name'] + '/' irrelevant_labels = None # edge_required = False # @@ -380,7 +502,68 @@ def xp_median_preimage_7_2(): print() # generate preimages. - for fit_method in ['k-graphs', 'expert'] + ['random'] * 10: + for fit_method in ['k-graphs', 'expert'] + ['random'] * 5: + print('\n-------------------------------------') + print('fit method:', fit_method, '\n') + mpg_options['fit_method'] = fit_method + generate_median_preimages_by_class(ds_name, mpg_options, kernel_options, ged_options, mge_options, save_results=save_results, save_medians=True, plot_medians=True, load_gm='auto', dir_save=dir_save, irrelevant_labels=irrelevant_labels, edge_required=edge_required) + + +def xp_median_preimage_7_3(): + """xp 7_3: MUTAG, Treelet, using CONSTANT. + """ + from gklearn.utils.kernels import polynomialkernel + # set parameters. + ds_name = 'MUTAG' # + mpg_options = {'fit_method': 'k-graphs', + 'init_ecc': [4, 4, 2, 1, 1, 1], # + 'ds_name': ds_name, + 'parallel': True, # False + 'time_limit_in_sec': 0, + 'max_itrs': 100, # + 'max_itrs_without_update': 3, + 'epsilon_residual': 0.01, + 'epsilon_ec': 0.1, + 'verbose': 2} + pkernel = functools.partial(polynomialkernel, d=3, c=1e+8) + kernel_options = {'name': 'Treelet', + 'sub_kernel': pkernel, + 'parallel': 'imap_unordered', + # 'parallel': None, + 'n_jobs': multiprocessing.cpu_count(), + 'normalize': True, + 'verbose': 2} + ged_options = {'method': 'IPFP', + 'initialization_method': 'RANDOM', # 'NODE' + 'initial_solutions': 10, # 1 + 'edit_cost': 'CONSTANT', # + 'attr_distance': 'euclidean', + 'ratio_runs_from_initial_solutions': 1, + 'threads': multiprocessing.cpu_count(), + 'init_option': 'EAGER_WITHOUT_SHUFFLED_COPIES'} + mge_options = {'init_type': 'MEDOID', + 'random_inits': 10, + 'time_limit': 600, + 'verbose': 2, + 'refine': False} + save_results = True + dir_save = '../results/xp_median_preimage/' + ds_name + '.' + kernel_options['name'] + '/' + irrelevant_labels = None # + edge_required = False # + + # print settings. + print('parameters:') + print('dataset name:', ds_name) + print('mpg_options:', mpg_options) + print('kernel_options:', kernel_options) + print('ged_options:', ged_options) + print('mge_options:', mge_options) + print('save_results:', save_results) + print('irrelevant_labels:', irrelevant_labels) + print() + + # generate preimages. + for fit_method in ['k-graphs', 'expert'] + ['random'] * 5: print('\n-------------------------------------') print('fit method:', fit_method, '\n') mpg_options['fit_method'] = fit_method @@ -428,7 +611,7 @@ def xp_median_preimage_6_1(): 'verbose': 2, 'refine': False} save_results = True - dir_save='../results/xp_median_preimage/' + dir_save = '../results/xp_median_preimage/' + ds_name + '.' + kernel_options['name'] + '/' irrelevant_labels = None # edge_required = False # @@ -444,7 +627,7 @@ def xp_median_preimage_6_1(): print() # generate preimages. - for fit_method in ['k-graphs'] + ['random'] * 10: + for fit_method in ['k-graphs'] + ['random'] * 5: print('\n-------------------------------------') print('fit method:', fit_method, '\n') mpg_options['fit_method'] = fit_method @@ -490,7 +673,7 @@ def xp_median_preimage_6_2(): 'verbose': 2, 'refine': False} save_results = True - dir_save='../results/xp_median_preimage/' + dir_save = '../results/xp_median_preimage/' + ds_name + '.' + kernel_options['name'] + '/' irrelevant_labels = None # edge_required = True # @@ -506,7 +689,7 @@ def xp_median_preimage_6_2(): print() # generate preimages. - for fit_method in ['k-graphs'] + ['random'] * 10: + for fit_method in ['k-graphs'] + ['random'] * 5: print('\n-------------------------------------') print('fit method:', fit_method, '\n') mpg_options['fit_method'] = fit_method @@ -554,7 +737,7 @@ def xp_median_preimage_5_1(): 'verbose': 2, 'refine': False} save_results = True - dir_save='../results/xp_median_preimage/' + dir_save = '../results/xp_median_preimage/' + ds_name + '.' + kernel_options['name'] + '/' irrelevant_labels = None # edge_required = False # @@ -570,7 +753,7 @@ def xp_median_preimage_5_1(): print() # generate preimages. - for fit_method in ['k-graphs'] + ['random'] * 10: + for fit_method in ['k-graphs'] + ['random'] * 5: print('\n-------------------------------------') print('fit method:', fit_method, '\n') mpg_options['fit_method'] = fit_method @@ -618,7 +801,7 @@ def xp_median_preimage_4_1(): 'verbose': 2, 'refine': False} save_results = True - dir_save='../results/xp_median_preimage/' + dir_save = '../results/xp_median_preimage/' + ds_name + '.' + kernel_options['name'] + '/' irrelevant_labels = None # edge_required = False # @@ -634,7 +817,7 @@ def xp_median_preimage_4_1(): print() # generate preimages. - for fit_method in ['k-graphs'] + ['random'] * 10: + for fit_method in ['k-graphs'] + ['random'] * 5: print('\n-------------------------------------') print('fit method:', fit_method, '\n') mpg_options['fit_method'] = fit_method @@ -680,7 +863,7 @@ def xp_median_preimage_3_2(): 'verbose': 2, 'refine': False} save_results = True - dir_save='../results/xp_median_preimage/' + dir_save = '../results/xp_median_preimage/' + ds_name + '.' + kernel_options['name'] + '/' irrelevant_labels = {'edge_attrs': ['orient', 'angle']} # edge_required = True # @@ -696,7 +879,7 @@ def xp_median_preimage_3_2(): print() # generate preimages. - for fit_method in ['k-graphs'] + ['random'] * 10: + for fit_method in ['k-graphs'] + ['random'] * 5: print('\n-------------------------------------') print('fit method:', fit_method, '\n') mpg_options['fit_method'] = fit_method @@ -744,7 +927,7 @@ def xp_median_preimage_3_1(): 'verbose': 2, 'refine': False} save_results = True - dir_save='../results/xp_median_preimage/' + dir_save = '../results/xp_median_preimage/' + ds_name + '.' + kernel_options['name'] + '/' irrelevant_labels = {'edge_attrs': ['orient', 'angle']} # edge_required = False # @@ -760,7 +943,7 @@ def xp_median_preimage_3_1(): print() # generate preimages. - for fit_method in ['k-graphs'] + ['random'] * 10: + for fit_method in ['k-graphs'] + ['random'] * 5: print('\n-------------------------------------') print('fit method:', fit_method, '\n') mpg_options['fit_method'] = fit_method @@ -808,7 +991,7 @@ def xp_median_preimage_2_1(): 'verbose': 2, 'refine': False} save_results = True - dir_save='../results/xp_median_preimage/' + dir_save = '../results/xp_median_preimage/' + ds_name + '.' + kernel_options['name'] + '/' irrelevant_labels = {'edge_labels': ['valence']} # print settings. @@ -827,7 +1010,7 @@ def xp_median_preimage_2_1(): # compute_gram_matrices_by_class(ds_name, kernel_options, save_results=True, dir_save=dir_save, irrelevant_labels=irrelevant_labels) # generate preimages. - for fit_method in ['k-graphs'] + ['random'] * 10: + for fit_method in ['k-graphs'] + ['random'] * 5: print('\n-------------------------------------') print('fit method:', fit_method, '\n') mpg_options['fit_method'] = fit_method @@ -875,6 +1058,7 @@ def xp_median_preimage_1_1(): 'verbose': 2, 'refine': False} save_results = True + dir_save = '../results/xp_median_preimage/' + ds_name + '.' + kernel_options['name'] + '/' # print settings. print('parameters:') @@ -886,11 +1070,11 @@ def xp_median_preimage_1_1(): print('save_results:', save_results) # generate preimages. - for fit_method in ['k-graphs', 'expert'] + ['random'] * 10: + for fit_method in ['k-graphs', 'expert'] + ['random'] * 5: print('\n-------------------------------------') print('fit method:', fit_method, '\n') mpg_options['fit_method'] = fit_method - generate_median_preimages_by_class(ds_name, mpg_options, kernel_options, ged_options, mge_options, save_results=save_results, save_medians=True, plot_medians=True, load_gm='auto', dir_save='../results/xp_median_preimage/') + generate_median_preimages_by_class(ds_name, mpg_options, kernel_options, ged_options, mge_options, save_results=save_results, save_medians=True, plot_medians=True, load_gm='auto', dir_save=dir_save) def xp_median_preimage_1_2(): @@ -932,7 +1116,7 @@ def xp_median_preimage_1_2(): 'verbose': 2, 'refine': False} save_results = True - dir_save='../results/xp_median_preimage/' + dir_save = '../results/xp_median_preimage/' + ds_name + '.' + kernel_options['name'] + '/' irrelevant_labels = None # edge_required = True # @@ -948,7 +1132,7 @@ def xp_median_preimage_1_2(): print() # generate preimages. - for fit_method in ['k-graphs', 'expert'] + ['random'] * 10: + for fit_method in ['k-graphs', 'expert'] + ['random'] * 5: print('\n-------------------------------------') print('fit method:', fit_method, '\n') mpg_options['fit_method'] = fit_method @@ -996,6 +1180,7 @@ def xp_median_preimage_10_1(): 'verbose': 2, 'refine': False} save_results = True + dir_save = '../results/xp_median_preimage/' + ds_name + '.' + kernel_options['name'] + '/' # print settings. print('parameters:') @@ -1007,11 +1192,11 @@ def xp_median_preimage_10_1(): print('save_results:', save_results) # generate preimages. - for fit_method in ['k-graphs', 'expert'] + ['random'] * 10: + for fit_method in ['k-graphs', 'expert'] + ['random'] * 5: print('\n-------------------------------------') print('fit method:', fit_method, '\n') mpg_options['fit_method'] = fit_method - generate_median_preimages_by_class(ds_name, mpg_options, kernel_options, ged_options, mge_options, save_results=save_results, save_medians=True, plot_medians=True, load_gm='auto', dir_save='../results/xp_median_preimage/') + generate_median_preimages_by_class(ds_name, mpg_options, kernel_options, ged_options, mge_options, save_results=save_results, save_medians=True, plot_medians=True, load_gm='auto', dir_save=dir_save) def xp_median_preimage_10_2(): @@ -1053,7 +1238,7 @@ def xp_median_preimage_10_2(): 'verbose': 2, 'refine': False} save_results = True - dir_save='../results/xp_median_preimage/' + dir_save = '../results/xp_median_preimage/' + ds_name + '.' + kernel_options['name'] + '/' irrelevant_labels = None # edge_required = True # @@ -1069,7 +1254,7 @@ def xp_median_preimage_10_2(): print() # generate preimages. - for fit_method in ['k-graphs', 'expert'] + ['random'] * 10: + for fit_method in ['k-graphs', 'expert'] + ['random'] * 5: print('\n-------------------------------------') print('fit method:', fit_method, '\n') mpg_options['fit_method'] = fit_method @@ -1117,6 +1302,7 @@ def xp_median_preimage_11_1(): 'verbose': 2, 'refine': False} save_results = True + dir_save = '../results/xp_median_preimage/' + ds_name + '.' + kernel_options['name'] + '/' # print settings. print('parameters:') @@ -1128,11 +1314,11 @@ def xp_median_preimage_11_1(): print('save_results:', save_results) # generate preimages. - for fit_method in ['k-graphs', 'expert'] + ['random'] * 10: + for fit_method in ['k-graphs', 'expert'] + ['random'] * 5: print('\n-------------------------------------') print('fit method:', fit_method, '\n') mpg_options['fit_method'] = fit_method - generate_median_preimages_by_class(ds_name, mpg_options, kernel_options, ged_options, mge_options, save_results=save_results, save_medians=True, plot_medians=True, load_gm='auto', dir_save='../results/xp_median_preimage/') + generate_median_preimages_by_class(ds_name, mpg_options, kernel_options, ged_options, mge_options, save_results=save_results, save_medians=True, plot_medians=True, load_gm='auto', dir_save=dir_save) def xp_median_preimage_11_2(): @@ -1174,7 +1360,7 @@ def xp_median_preimage_11_2(): 'verbose': 2, 'refine': False} save_results = True - dir_save='../results/xp_median_preimage/' + dir_save = '../results/xp_median_preimage/' + ds_name + '.' + kernel_options['name'] + '/' irrelevant_labels = None # edge_required = True # @@ -1190,7 +1376,7 @@ def xp_median_preimage_11_2(): print() # generate preimages. - for fit_method in ['k-graphs', 'expert'] + ['random'] * 10: + for fit_method in ['k-graphs', 'expert'] + ['random'] * 5: print('\n-------------------------------------') print('fit method:', fit_method, '\n') mpg_options['fit_method'] = fit_method @@ -1242,16 +1428,25 @@ if __name__ == "__main__": # xp_median_preimage_7_1() #### xp 7_2: MUTAG, PathUpToH, using CONSTANT. - xp_median_preimage_7_2() + # xp_median_preimage_7_2() + + #### xp 7_3: MUTAG, Treelet, using CONSTANT. + # xp_median_preimage_7_3() #### xp 8_1: Monoterpenoides, StructuralSP, using CONSTANT. # xp_median_preimage_8_1() #### xp 8_2: Monoterpenoides, PathUpToH, using CONSTANT. -# xp_median_preimage_8_2() + # xp_median_preimage_8_2() + + #### xp 8_3: Monoterpenoides, Treelet, using CONSTANT. +# xp_median_preimage_8_3() #### xp 9_1: MAO, StructuralSP, using CONSTANT, symbolic only. # xp_median_preimage_9_1() #### xp 9_2: MAO, PathUpToH, using CONSTANT, symbolic only. -# xp_median_preimage_9_2() \ No newline at end of file + # xp_median_preimage_9_2() + + #### xp 9_3: MAO, Treelet, using CONSTANT. + xp_median_preimage_9_3() \ No newline at end of file diff --git a/gklearn/preimage/median_preimage_generator.py b/gklearn/preimage/median_preimage_generator.py index 04efa9d..5f00641 100644 --- a/gklearn/preimage/median_preimage_generator.py +++ b/gklearn/preimage/median_preimage_generator.py @@ -745,8 +745,14 @@ class MedianPreimageGenerator(PreimageGenerator): edge_labels=self._dataset.edge_labels, ds_infos=self._dataset.get_dataset_infos(keys=['directed']), **self._kernel_options) + elif self._kernel_options['name'] == 'Treelet': + from gklearn.kernels import Treelet + self._graph_kernel = Treelet(node_labels=self._dataset.node_labels, + edge_labels=self._dataset.edge_labels, + ds_infos=self._dataset.get_dataset_infos(keys=['directed']), + **self._kernel_options) else: - raise Exception('The graph kernel given is not defined. Possible choices include: "StructuralSP", "ShortestPath", "PathUpToH".') + raise Exception('The graph kernel given is not defined. Possible choices include: "StructuralSP", "ShortestPath", "PathUpToH", "Treelet".') # def __clean_graph(self, G, node_labels=[], edge_labels=[], node_attrs=[], edge_attrs=[]): diff --git a/gklearn/preimage/utils.py b/gklearn/preimage/utils.py index 63c8b9e..823ecc5 100644 --- a/gklearn/preimage/utils.py +++ b/gklearn/preimage/utils.py @@ -22,6 +22,7 @@ from gklearn.kernels.weisfeilerLehmanKernel import weisfeilerlehmankernel from gklearn.utils import Dataset import csv import networkx as nx +import os def generate_median_preimages_by_class(ds_name, mpg_options, kernel_options, ged_options, mge_options, save_results=True, save_medians=True, plot_medians=True, load_gm='auto', dir_save='', irrelevant_labels=None, edge_required=False): @@ -215,6 +216,8 @@ def generate_median_preimages_by_class(ds_name, mpg_options, kernel_options, ged # save median graphs. if save_medians: + if not os.path.exists(dir_save + 'medians/'): + os.makedirs(dir_save + 'medians/') print('Saving median graphs to files...') fn_pre_sm = dir_save + 'medians/set_median.' + mpg_options['fit_method'] + '.nbg' + str(num_graphs) + '.y' + str(target) + '.repeat' + str(1) saveGXL(mpg.set_median, fn_pre_sm + '.gxl', method='default', @@ -286,6 +289,8 @@ def generate_median_preimages_by_class(ds_name, mpg_options, kernel_options, ged def __init_output_file(ds_name, gkernel, fit_method, dir_output): + if not os.path.exists(dir_output): + os.makedirs(dir_output) # fn_output_detail = 'results_detail.' + ds_name + '.' + gkernel + '.' + fit_method + '.csv' fn_output_detail = 'results_detail.' + ds_name + '.' + gkernel + '.csv' f_detail = open(dir_output + fn_output_detail, 'a') diff --git a/gklearn/tests/test_graph_kernels.py b/gklearn/tests/test_graph_kernels.py index 5ee9997..a92ebe9 100644 --- a/gklearn/tests/test_graph_kernels.py +++ b/gklearn/tests/test_graph_kernels.py @@ -231,28 +231,31 @@ def test_PathUpToH(ds_name, parallel, k_func, compute_method): assert False, exception -# @pytest.mark.parametrize('ds_name', ['Alkane', 'AIDS']) -# @pytest.mark.parametrize('parallel', ['imap_unordered', None]) -# def test_treeletkernel(ds_name, parallel): -# """Test treelet kernel. -# """ -# from gklearn.kernels.treeletKernel import treeletkernel -# from gklearn.utils.kernels import polynomialkernel -# import functools +@pytest.mark.parametrize('ds_name', ['Alkane', 'AIDS']) +@pytest.mark.parametrize('parallel', ['imap_unordered', None]) +def test_treeletkernel(ds_name, parallel): + """Test treelet kernel. + """ + from gklearn.kernels import Treelet + from gklearn.utils.kernels import polynomialkernel + import functools -# Gn, y = chooseDataset(ds_name) + dataset = chooseDataset(ds_name) -# pkernel = functools.partial(polynomialkernel, d=2, c=1e5) -# try: -# Kmatrix, run_time = treeletkernel(Gn, -# sub_kernel=pkernel, -# node_label='atom', -# edge_label='bond_type', -# parallel=parallel, -# n_jobs=multiprocessing.cpu_count(), -# verbose=True) -# except Exception as exception: -# assert False, exception + pkernel = functools.partial(polynomialkernel, d=2, c=1e5) + try: + graph_kernel = Treelet(node_labels=dataset.node_labels, + edge_labels=dataset.edge_labels, + ds_infos=dataset.get_dataset_infos(keys=['directed']), + sub_kernel=pkernel) + gram_matrix, run_time = graph_kernel.compute(dataset.graphs, + parallel=parallel, n_jobs=multiprocessing.cpu_count(), verbose=True) + kernel_list, run_time = graph_kernel.compute(dataset.graphs[0], dataset.graphs[1:], + parallel=parallel, n_jobs=multiprocessing.cpu_count(), verbose=True) + kernel, run_time = graph_kernel.compute(dataset.graphs[0], dataset.graphs[1], + parallel=parallel, n_jobs=multiprocessing.cpu_count(), verbose=True) + except Exception as exception: + assert False, exception # @pytest.mark.parametrize('ds_name', ['Acyclic']) diff --git a/gklearn/utils/utils.py b/gklearn/utils/utils.py index 0dca111..17954f5 100644 --- a/gklearn/utils/utils.py +++ b/gklearn/utils/utils.py @@ -351,4 +351,77 @@ def compute_gram_matrices_by_class(ds_name, kernel_options, save_results=True, d if save_results: np.savez(dir_save + 'gram_matrix_unnorm.' + ds_name + '.' + kernel_options['name'] + '.gm', gram_matrix_unnorm_list=gram_matrix_unnorm_list, run_time_list=run_time_list) - print('\ncomplete.') \ No newline at end of file + print('\ncomplete.') + + +def find_paths(G, source_node, length): + """Find all paths with a certain length those start from a source node. + A recursive depth first search is applied. + + Parameters + ---------- + G : NetworkX graphs + The graph in which paths are searched. + source_node : integer + The number of the node from where all paths start. + length : integer + The length of paths. + + Return + ------ + path : list of list + List of paths retrieved, where each path is represented by a list of nodes. + """ + if length == 0: + return [[source_node]] + path = [[source_node] + path for neighbor in G[source_node] \ + for path in find_paths(G, neighbor, length - 1) if source_node not in path] + return path + + +def find_all_paths(G, length, is_directed): + """Find all paths with a certain length in a graph. A recursive depth first + search is applied. + + Parameters + ---------- + G : NetworkX graphs + The graph in which paths are searched. + length : integer + The length of paths. + + Return + ------ + path : list of list + List of paths retrieved, where each path is represented by a list of nodes. + """ + all_paths = [] + for node in G: + all_paths.extend(find_paths(G, node, length)) + + if not is_directed: + # For each path, two presentations are retrieved from its two extremities. + # Remove one of them. + all_paths_r = [path[::-1] for path in all_paths] + for idx, path in enumerate(all_paths[:-1]): + for path2 in all_paths_r[idx+1::]: + if path == path2: + all_paths[idx] = [] + break + all_paths = list(filter(lambda a: a != [], all_paths)) + + return all_paths + + +def get_mlti_dim_node_attrs(G, attr_names): + attributes = [] + for nd, attrs in G.nodes(data=True): + attributes.append(tuple(attrs[aname] for aname in attr_names)) + return attributes + + +def get_mlti_dim_edge_attrs(G, attr_names): + attributes = [] + for ed, attrs in G.edges(data=True): + attributes.append(tuple(attrs[aname] for aname in attr_names)) + return attributes \ No newline at end of file