@@ -11,3 +11,4 @@ from gklearn.kernels.graph_kernel import GraphKernel | |||||
from gklearn.kernels.structural_sp import StructuralSP | from gklearn.kernels.structural_sp import StructuralSP | ||||
from gklearn.kernels.shortest_path import ShortestPath | from gklearn.kernels.shortest_path import ShortestPath | ||||
from gklearn.kernels.path_up_to_h import PathUpToH | from gklearn.kernels.path_up_to_h import PathUpToH | ||||
from gklearn.kernels.treelet import Treelet |
@@ -176,7 +176,7 @@ class PathUpToH(GraphKernel): # @todo: add function for k_func == None | |||||
pool.close() | pool.close() | ||||
pool.join() | pool.join() | ||||
# compute Gram matrix. | |||||
# compute kernel list. | |||||
kernel_list = [None] * len(g_list) | kernel_list = [None] * len(g_list) | ||||
def init_worker(p1_toshare, plist_toshare): | def init_worker(p1_toshare, plist_toshare): | ||||
@@ -0,0 +1,505 @@ | |||||
#!/usr/bin/env python3 | |||||
# -*- coding: utf-8 -*- | |||||
""" | |||||
Created on Mon Apr 13 18:02:46 2020 | |||||
@author: ljia | |||||
@references: | |||||
[1] Gaüzère B, Brun L, Villemin D. Two new graphs kernels in | |||||
chemoinformatics. Pattern Recognition Letters. 2012 Nov 1;33(15):2038-47. | |||||
""" | |||||
import sys | |||||
from multiprocessing import Pool | |||||
from tqdm import tqdm | |||||
import numpy as np | |||||
import networkx as nx | |||||
from collections import Counter | |||||
from itertools import chain | |||||
from gklearn.utils.parallel import parallel_gm, parallel_me | |||||
from gklearn.utils.utils import find_all_paths, get_mlti_dim_node_attrs | |||||
from gklearn.kernels import GraphKernel | |||||
class Treelet(GraphKernel): | |||||
def __init__(self, **kwargs): | |||||
GraphKernel.__init__(self) | |||||
self.__node_labels = kwargs.get('node_labels', []) | |||||
self.__edge_labels = kwargs.get('edge_labels', []) | |||||
self.__sub_kernel = kwargs.get('sub_kernel', None) | |||||
self.__ds_infos = kwargs.get('ds_infos', {}) | |||||
if self.__sub_kernel is None: | |||||
raise Exception('Sub kernel not set.') | |||||
def _compute_gm_series(self): | |||||
self.__add_dummy_labels(self._graphs) | |||||
# get all canonical keys of all graphs before calculating kernels to save | |||||
# time, but this may cost a lot of memory for large dataset. | |||||
canonkeys = [] | |||||
if self._verbose >= 2: | |||||
iterator = tqdm(self._graphs, desc='getting canonkeys', file=sys.stdout) | |||||
else: | |||||
iterator = self._graphs | |||||
for g in iterator: | |||||
canonkeys.append(self.__get_canonkeys(g)) | |||||
# compute Gram matrix. | |||||
gram_matrix = np.zeros((len(self._graphs), len(self._graphs))) | |||||
from itertools import combinations_with_replacement | |||||
itr = combinations_with_replacement(range(0, len(self._graphs)), 2) | |||||
if self._verbose >= 2: | |||||
iterator = tqdm(itr, desc='calculating kernels', file=sys.stdout) | |||||
else: | |||||
iterator = itr | |||||
for i, j in iterator: | |||||
kernel = self.__kernel_do(canonkeys[i], canonkeys[j]) | |||||
gram_matrix[i][j] = kernel | |||||
gram_matrix[j][i] = kernel # @todo: no directed graph considered? | |||||
return gram_matrix | |||||
def _compute_gm_imap_unordered(self): | |||||
self.__add_dummy_labels(self._graphs) | |||||
# get all canonical keys of all graphs before calculating kernels to save | |||||
# time, but this may cost a lot of memory for large dataset. | |||||
pool = Pool(self._n_jobs) | |||||
itr = zip(self._graphs, range(0, len(self._graphs))) | |||||
if len(self._graphs) < 100 * self._n_jobs: | |||||
chunksize = int(len(self._graphs) / self._n_jobs) + 1 | |||||
else: | |||||
chunksize = 100 | |||||
canonkeys = [[] for _ in range(len(self._graphs))] | |||||
get_fun = self._wrapper_get_canonkeys | |||||
if self._verbose >= 2: | |||||
iterator = tqdm(pool.imap_unordered(get_fun, itr, chunksize), | |||||
desc='getting canonkeys', file=sys.stdout) | |||||
else: | |||||
iterator = pool.imap_unordered(get_fun, itr, chunksize) | |||||
for i, ck in iterator: | |||||
canonkeys[i] = ck | |||||
pool.close() | |||||
pool.join() | |||||
# compute Gram matrix. | |||||
gram_matrix = np.zeros((len(self._graphs), len(self._graphs))) | |||||
def init_worker(canonkeys_toshare): | |||||
global G_canonkeys | |||||
G_canonkeys = canonkeys_toshare | |||||
do_fun = self._wrapper_kernel_do | |||||
parallel_gm(do_fun, gram_matrix, self._graphs, init_worker=init_worker, | |||||
glbv=(canonkeys,), n_jobs=self._n_jobs, verbose=self._verbose) | |||||
return gram_matrix | |||||
def _compute_kernel_list_series(self, g1, g_list): | |||||
self.__add_dummy_labels(g_list + [g1]) | |||||
# get all canonical keys of all graphs before calculating kernels to save | |||||
# time, but this may cost a lot of memory for large dataset. | |||||
canonkeys_1 = self.__get_canonkeys(g1) | |||||
canonkeys_list = [] | |||||
if self._verbose >= 2: | |||||
iterator = tqdm(g_list, desc='getting canonkeys', file=sys.stdout) | |||||
else: | |||||
iterator = g_list | |||||
for g in iterator: | |||||
canonkeys_list.append(self.__get_canonkeys(g)) | |||||
# compute kernel list. | |||||
kernel_list = [None] * len(g_list) | |||||
if self._verbose >= 2: | |||||
iterator = tqdm(range(len(g_list)), desc='calculating kernels', file=sys.stdout) | |||||
else: | |||||
iterator = range(len(g_list)) | |||||
for i in iterator: | |||||
kernel = self.__kernel_do(canonkeys_1, canonkeys_list[i]) | |||||
kernel_list[i] = kernel | |||||
return kernel_list | |||||
def _compute_kernel_list_imap_unordered(self, g1, g_list): | |||||
self.__add_dummy_labels(g_list + [g1]) | |||||
# get all canonical keys of all graphs before calculating kernels to save | |||||
# time, but this may cost a lot of memory for large dataset. | |||||
canonkeys_1 = self.__get_canonkeys(g1) | |||||
canonkeys_list = [[] for _ in range(len(g_list))] | |||||
pool = Pool(self._n_jobs) | |||||
itr = zip(g_list, range(0, len(g_list))) | |||||
if len(g_list) < 100 * self._n_jobs: | |||||
chunksize = int(len(g_list) / self._n_jobs) + 1 | |||||
else: | |||||
chunksize = 100 | |||||
get_fun = self._wrapper_get_canonkeys | |||||
if self._verbose >= 2: | |||||
iterator = tqdm(pool.imap_unordered(get_fun, itr, chunksize), | |||||
desc='getting canonkeys', file=sys.stdout) | |||||
else: | |||||
iterator = pool.imap_unordered(get_fun, itr, chunksize) | |||||
for i, ck in iterator: | |||||
canonkeys_list[i] = ck | |||||
pool.close() | |||||
pool.join() | |||||
# compute kernel list. | |||||
kernel_list = [None] * len(g_list) | |||||
def init_worker(ck_1_toshare, ck_list_toshare): | |||||
global G_ck_1, G_ck_list | |||||
G_ck_1 = ck_1_toshare | |||||
G_ck_list = ck_list_toshare | |||||
do_fun = self._wrapper_kernel_list_do | |||||
def func_assign(result, var_to_assign): | |||||
var_to_assign[result[0]] = result[1] | |||||
itr = range(len(g_list)) | |||||
len_itr = len(g_list) | |||||
parallel_me(do_fun, func_assign, kernel_list, itr, len_itr=len_itr, | |||||
init_worker=init_worker, glbv=(canonkeys_1, canonkeys_list), method='imap_unordered', | |||||
n_jobs=self._n_jobs, itr_desc='calculating kernels', verbose=self._verbose) | |||||
return kernel_list | |||||
def _wrapper_kernel_list_do(self, itr): | |||||
return itr, self.__kernel_do(G_ck_1, G_ck_list[itr]) | |||||
def _compute_single_kernel_series(self, g1, g2): | |||||
self.__add_dummy_labels([g1] + [g2]) | |||||
canonkeys_1 = self.__get_canonkeys(g1) | |||||
canonkeys_2 = self.__get_canonkeys(g2) | |||||
kernel = self.__kernel_do(canonkeys_1, canonkeys_2) | |||||
return kernel | |||||
def __kernel_do(self, canonkey1, canonkey2): | |||||
"""Calculate treelet graph kernel between 2 graphs. | |||||
Parameters | |||||
---------- | |||||
canonkey1, canonkey2 : list | |||||
List of canonical keys in 2 graphs, where each key is represented by a string. | |||||
Return | |||||
------ | |||||
kernel : float | |||||
Treelet Kernel between 2 graphs. | |||||
""" | |||||
keys = set(canonkey1.keys()) & set(canonkey2.keys()) # find same canonical keys in both graphs | |||||
vector1 = np.array([(canonkey1[key] if (key in canonkey1.keys()) else 0) for key in keys]) | |||||
vector2 = np.array([(canonkey2[key] if (key in canonkey2.keys()) else 0) for key in keys]) | |||||
kernel = self.__sub_kernel(vector1, vector2) | |||||
return kernel | |||||
def _wrapper_kernel_do(self, itr): | |||||
i = itr[0] | |||||
j = itr[1] | |||||
return i, j, self.__kernel_do(G_canonkeys[i], G_canonkeys[j]) | |||||
def __get_canonkeys(self, G): | |||||
"""Generate canonical keys of all treelets in a graph. | |||||
Parameters | |||||
---------- | |||||
G : NetworkX graphs | |||||
The graph in which keys are generated. | |||||
Return | |||||
------ | |||||
canonkey/canonkey_l : dict | |||||
For unlabeled graphs, canonkey is a dictionary which records amount of | |||||
every tree pattern. For labeled graphs, canonkey_l is one which keeps | |||||
track of amount of every treelet. | |||||
""" | |||||
patterns = {} # a dictionary which consists of lists of patterns for all graphlet. | |||||
canonkey = {} # canonical key, a dictionary which records amount of every tree pattern. | |||||
### structural analysis ### | |||||
### In this section, a list of patterns is generated for each graphlet, | |||||
### where every pattern is represented by nodes ordered by Morgan's | |||||
### extended labeling. | |||||
# linear patterns | |||||
patterns['0'] = list(G.nodes()) | |||||
canonkey['0'] = nx.number_of_nodes(G) | |||||
for i in range(1, 6): # for i in range(1, 6): | |||||
patterns[str(i)] = find_all_paths(G, i, self.__ds_infos['directed']) | |||||
canonkey[str(i)] = len(patterns[str(i)]) | |||||
# n-star patterns | |||||
patterns['3star'] = [[node] + [neighbor for neighbor in G[node]] for node in G.nodes() if G.degree(node) == 3] | |||||
patterns['4star'] = [[node] + [neighbor for neighbor in G[node]] for node in G.nodes() if G.degree(node) == 4] | |||||
patterns['5star'] = [[node] + [neighbor for neighbor in G[node]] for node in G.nodes() if G.degree(node) == 5] | |||||
# n-star patterns | |||||
canonkey['6'] = len(patterns['3star']) | |||||
canonkey['8'] = len(patterns['4star']) | |||||
canonkey['d'] = len(patterns['5star']) | |||||
# pattern 7 | |||||
patterns['7'] = [] # the 1st line of Table 1 in Ref [1] | |||||
for pattern in patterns['3star']: | |||||
for i in range(1, len(pattern)): # for each neighbor of node 0 | |||||
if G.degree(pattern[i]) >= 2: | |||||
pattern_t = pattern[:] | |||||
# set the node with degree >= 2 as the 4th node | |||||
pattern_t[i], pattern_t[3] = pattern_t[3], pattern_t[i] | |||||
for neighborx in G[pattern[i]]: | |||||
if neighborx != pattern[0]: | |||||
new_pattern = pattern_t + [neighborx] | |||||
patterns['7'].append(new_pattern) | |||||
canonkey['7'] = len(patterns['7']) | |||||
# pattern 11 | |||||
patterns['11'] = [] # the 4th line of Table 1 in Ref [1] | |||||
for pattern in patterns['4star']: | |||||
for i in range(1, len(pattern)): | |||||
if G.degree(pattern[i]) >= 2: | |||||
pattern_t = pattern[:] | |||||
pattern_t[i], pattern_t[4] = pattern_t[4], pattern_t[i] | |||||
for neighborx in G[pattern[i]]: | |||||
if neighborx != pattern[0]: | |||||
new_pattern = pattern_t + [neighborx] | |||||
patterns['11'].append(new_pattern) | |||||
canonkey['b'] = len(patterns['11']) | |||||
# pattern 12 | |||||
patterns['12'] = [] # the 5th line of Table 1 in Ref [1] | |||||
rootlist = [] # a list of root nodes, whose extended labels are 3 | |||||
for pattern in patterns['3star']: | |||||
if pattern[0] not in rootlist: # prevent to count the same pattern twice from each of the two root nodes | |||||
rootlist.append(pattern[0]) | |||||
for i in range(1, len(pattern)): | |||||
if G.degree(pattern[i]) >= 3: | |||||
rootlist.append(pattern[i]) | |||||
pattern_t = pattern[:] | |||||
pattern_t[i], pattern_t[3] = pattern_t[3], pattern_t[i] | |||||
for neighborx1 in G[pattern[i]]: | |||||
if neighborx1 != pattern[0]: | |||||
for neighborx2 in G[pattern[i]]: | |||||
if neighborx1 > neighborx2 and neighborx2 != pattern[0]: | |||||
new_pattern = pattern_t + [neighborx1] + [neighborx2] | |||||
# new_patterns = [ pattern + [neighborx1] + [neighborx2] for neighborx1 in G[pattern[i]] if neighborx1 != pattern[0] for neighborx2 in G[pattern[i]] if (neighborx1 > neighborx2 and neighborx2 != pattern[0]) ] | |||||
patterns['12'].append(new_pattern) | |||||
canonkey['c'] = int(len(patterns['12']) / 2) | |||||
# pattern 9 | |||||
patterns['9'] = [] # the 2nd line of Table 1 in Ref [1] | |||||
for pattern in patterns['3star']: | |||||
for pairs in [ [neighbor1, neighbor2] for neighbor1 in G[pattern[0]] if G.degree(neighbor1) >= 2 \ | |||||
for neighbor2 in G[pattern[0]] if G.degree(neighbor2) >= 2 if neighbor1 > neighbor2]: | |||||
pattern_t = pattern[:] | |||||
# move nodes with extended labels 4 to specific position to correspond to their children | |||||
pattern_t[pattern_t.index(pairs[0])], pattern_t[2] = pattern_t[2], pattern_t[pattern_t.index(pairs[0])] | |||||
pattern_t[pattern_t.index(pairs[1])], pattern_t[3] = pattern_t[3], pattern_t[pattern_t.index(pairs[1])] | |||||
for neighborx1 in G[pairs[0]]: | |||||
if neighborx1 != pattern[0]: | |||||
for neighborx2 in G[pairs[1]]: | |||||
if neighborx2 != pattern[0]: | |||||
new_pattern = pattern_t + [neighborx1] + [neighborx2] | |||||
patterns['9'].append(new_pattern) | |||||
canonkey['9'] = len(patterns['9']) | |||||
# pattern 10 | |||||
patterns['10'] = [] # the 3rd line of Table 1 in Ref [1] | |||||
for pattern in patterns['3star']: | |||||
for i in range(1, len(pattern)): | |||||
if G.degree(pattern[i]) >= 2: | |||||
for neighborx in G[pattern[i]]: | |||||
if neighborx != pattern[0] and G.degree(neighborx) >= 2: | |||||
pattern_t = pattern[:] | |||||
pattern_t[i], pattern_t[3] = pattern_t[3], pattern_t[i] | |||||
new_patterns = [ pattern_t + [neighborx] + [neighborxx] for neighborxx in G[neighborx] if neighborxx != pattern[i] ] | |||||
patterns['10'].extend(new_patterns) | |||||
canonkey['a'] = len(patterns['10']) | |||||
### labeling information ### | |||||
### In this section, a list of canonical keys is generated for every | |||||
### pattern obtained in the structural analysis section above, which is a | |||||
### string corresponding to a unique treelet. A dictionary is built to keep | |||||
### track of the amount of every treelet. | |||||
if len(self.__node_labels) > 0 or len(self.__edge_labels) > 0: | |||||
canonkey_l = {} # canonical key, a dictionary which keeps track of amount of every treelet. | |||||
# linear patterns | |||||
canonkey_t = Counter(get_mlti_dim_node_attrs(G, self.__node_labels)) | |||||
for key in canonkey_t: | |||||
canonkey_l[('0', key)] = canonkey_t[key] | |||||
for i in range(1, 6): # for i in range(1, 6): | |||||
treelet = [] | |||||
for pattern in patterns[str(i)]: | |||||
canonlist = [] | |||||
for idx, node in enumerate(pattern[:-1]): | |||||
canonlist.append(tuple(G.nodes[node][nl] for nl in self.__node_labels)) | |||||
canonlist.append(tuple(G[node][pattern[idx+1]][el] for el in self.__edge_labels)) | |||||
canonlist.append(tuple(G.nodes[pattern[-1]][nl] for nl in self.__node_labels)) | |||||
canonkey_t = canonlist if canonlist < canonlist[::-1] else canonlist[::-1] | |||||
treelet.append(tuple([str(i)] + canonkey_t)) | |||||
canonkey_l.update(Counter(treelet)) | |||||
# n-star patterns | |||||
for i in range(3, 6): | |||||
treelet = [] | |||||
for pattern in patterns[str(i) + 'star']: | |||||
canonlist = [] | |||||
for leaf in pattern[1:]: | |||||
nlabels = tuple(G.nodes[leaf][nl] for nl in self.__node_labels) | |||||
elabels = tuple(G[leaf][pattern[0]][el] for el in self.__edge_labels) | |||||
canonlist.append(tuple((nlabels, elabels))) | |||||
canonlist.sort() | |||||
canonlist = list(chain.from_iterable(canonlist)) | |||||
canonkey_t = tuple(['d' if i == 5 else str(i * 2)] + | |||||
[tuple(G.nodes[pattern[0]][nl] for nl in self.__node_labels)] | |||||
+ canonlist) | |||||
treelet.append(canonkey_t) | |||||
canonkey_l.update(Counter(treelet)) | |||||
# pattern 7 | |||||
treelet = [] | |||||
for pattern in patterns['7']: | |||||
canonlist = [] | |||||
for leaf in pattern[1:3]: | |||||
nlabels = tuple(G.nodes[leaf][nl] for nl in self.__node_labels) | |||||
elabels = tuple(G[leaf][pattern[0]][el] for el in self.__edge_labels) | |||||
canonlist.append(tuple((nlabels, elabels))) | |||||
canonlist.sort() | |||||
canonlist = list(chain.from_iterable(canonlist)) | |||||
canonkey_t = tuple(['7'] | |||||
+ [tuple(G.nodes[pattern[0]][nl] for nl in self.__node_labels)] + canonlist | |||||
+ [tuple(G.nodes[pattern[3]][nl] for nl in self.__node_labels)] | |||||
+ [tuple(G[pattern[3]][pattern[0]][el] for el in self.__edge_labels)] | |||||
+ [tuple(G.nodes[pattern[4]][nl] for nl in self.__node_labels)] | |||||
+ [tuple(G[pattern[4]][pattern[3]][el] for el in self.__edge_labels)]) | |||||
treelet.append(canonkey_t) | |||||
canonkey_l.update(Counter(treelet)) | |||||
# pattern 11 | |||||
treelet = [] | |||||
for pattern in patterns['11']: | |||||
canonlist = [] | |||||
for leaf in pattern[1:4]: | |||||
nlabels = tuple(G.nodes[leaf][nl] for nl in self.__node_labels) | |||||
elabels = tuple(G[leaf][pattern[0]][el] for el in self.__edge_labels) | |||||
canonlist.append(tuple((nlabels, elabels))) | |||||
canonlist.sort() | |||||
canonlist = list(chain.from_iterable(canonlist)) | |||||
canonkey_t = tuple(['b'] | |||||
+ [tuple(G.nodes[pattern[0]][nl] for nl in self.__node_labels)] + canonlist | |||||
+ [tuple(G.nodes[pattern[4]][nl] for nl in self.__node_labels)] | |||||
+ [tuple(G[pattern[4]][pattern[0]][el] for el in self.__edge_labels)] | |||||
+ [tuple(G.nodes[pattern[5]][nl] for nl in self.__node_labels)] | |||||
+ [tuple(G[pattern[5]][pattern[4]][el] for el in self.__edge_labels)]) | |||||
treelet.append(canonkey_t) | |||||
canonkey_l.update(Counter(treelet)) | |||||
# pattern 10 | |||||
treelet = [] | |||||
for pattern in patterns['10']: | |||||
canonkey4 = [tuple(G.nodes[pattern[5]][nl] for nl in self.__node_labels), | |||||
tuple(G[pattern[5]][pattern[4]][el] for el in self.__edge_labels)] | |||||
canonlist = [] | |||||
for leaf in pattern[1:3]: | |||||
nlabels = tuple(G.nodes[leaf][nl] for nl in self.__node_labels) | |||||
elabels = tuple(G[leaf][pattern[0]][el] for el in self.__edge_labels) | |||||
canonlist.append(tuple((nlabels, elabels))) | |||||
canonlist.sort() | |||||
canonkey0 = list(chain.from_iterable(canonlist)) | |||||
canonkey_t = tuple(['a'] | |||||
+ [tuple(G.nodes[pattern[3]][nl] for nl in self.__node_labels)] | |||||
+ [tuple(G.nodes[pattern[4]][nl] for nl in self.__node_labels)] | |||||
+ [tuple(G[pattern[4]][pattern[3]][el] for el in self.__edge_labels)] | |||||
+ [tuple(G.nodes[pattern[0]][nl] for nl in self.__node_labels)] | |||||
+ [tuple(G[pattern[0]][pattern[3]][el] for el in self.__edge_labels)] | |||||
+ canonkey4 + canonkey0) | |||||
treelet.append(canonkey_t) | |||||
canonkey_l.update(Counter(treelet)) | |||||
# pattern 12 | |||||
treelet = [] | |||||
for pattern in patterns['12']: | |||||
canonlist0 = [] | |||||
for leaf in pattern[1:3]: | |||||
nlabels = tuple(G.nodes[leaf][nl] for nl in self.__node_labels) | |||||
elabels = tuple(G[leaf][pattern[0]][el] for el in self.__edge_labels) | |||||
canonlist0.append(tuple((nlabels, elabels))) | |||||
canonlist0.sort() | |||||
canonlist0 = list(chain.from_iterable(canonlist0)) | |||||
canonlist3 = [] | |||||
for leaf in pattern[4:6]: | |||||
nlabels = tuple(G.nodes[leaf][nl] for nl in self.__node_labels) | |||||
elabels = tuple(G[leaf][pattern[3]][el] for el in self.__edge_labels) | |||||
canonlist3.append(tuple((nlabels, elabels))) | |||||
canonlist3.sort() | |||||
canonlist3 = list(chain.from_iterable(canonlist3)) | |||||
# 2 possible key can be generated from 2 nodes with extended label 3, | |||||
# select the one with lower lexicographic order. | |||||
canonkey_t1 = tuple(['c'] | |||||
+ [tuple(G.nodes[pattern[0]][nl] for nl in self.__node_labels)] + canonlist0 | |||||
+ [tuple(G.nodes[pattern[3]][nl] for nl in self.__node_labels)] | |||||
+ [tuple(G[pattern[3]][pattern[0]][el] for el in self.__edge_labels)] | |||||
+ canonlist3) | |||||
canonkey_t2 = tuple(['c'] | |||||
+ [tuple(G.nodes[pattern[3]][nl] for nl in self.__node_labels)] + canonlist3 | |||||
+ [tuple(G.nodes[pattern[0]][nl] for nl in self.__node_labels)] | |||||
+ [tuple(G[pattern[0]][pattern[3]][el] for el in self.__edge_labels)] | |||||
+ canonlist0) | |||||
treelet.append(canonkey_t1 if canonkey_t1 < canonkey_t2 else canonkey_t2) | |||||
canonkey_l.update(Counter(treelet)) | |||||
# pattern 9 | |||||
treelet = [] | |||||
for pattern in patterns['9']: | |||||
canonkey2 = [tuple(G.nodes[pattern[4]][nl] for nl in self.__node_labels), | |||||
tuple(G[pattern[4]][pattern[2]][el] for el in self.__edge_labels)] | |||||
canonkey3 = [tuple(G.nodes[pattern[5]][nl] for nl in self.__node_labels), | |||||
tuple(G[pattern[5]][pattern[3]][el] for el in self.__edge_labels)] | |||||
prekey2 = [tuple(G.nodes[pattern[2]][nl] for nl in self.__node_labels), | |||||
tuple(G[pattern[2]][pattern[0]][el] for el in self.__edge_labels)] | |||||
prekey3 = [tuple(G.nodes[pattern[3]][nl] for nl in self.__node_labels), | |||||
tuple(G[pattern[3]][pattern[0]][el] for el in self.__edge_labels)] | |||||
if prekey2 + canonkey2 < prekey3 + canonkey3: | |||||
canonkey_t = [tuple(G.nodes[pattern[1]][nl] for nl in self.__node_labels)] \ | |||||
+ [tuple(G[pattern[1]][pattern[0]][el] for el in self.__edge_labels)] \ | |||||
+ prekey2 + prekey3 + canonkey2 + canonkey3 | |||||
else: | |||||
canonkey_t = [tuple(G.nodes[pattern[1]][nl] for nl in self.__node_labels)] \ | |||||
+ [tuple(G[pattern[1]][pattern[0]][el] for el in self.__edge_labels)] \ | |||||
+ prekey3 + prekey2 + canonkey3 + canonkey2 | |||||
treelet.append(tuple(['9'] | |||||
+ [tuple(G.nodes[pattern[0]][nl] for nl in self.__node_labels)] | |||||
+ canonkey_t)) | |||||
canonkey_l.update(Counter(treelet)) | |||||
return canonkey_l | |||||
return canonkey | |||||
def _wrapper_get_canonkeys(self, itr_item): | |||||
g = itr_item[0] | |||||
i = itr_item[1] | |||||
return i, self.__get_canonkeys(g) | |||||
def __add_dummy_labels(self, Gn): | |||||
if len(self.__node_labels) == 0: | |||||
for G in Gn: | |||||
nx.set_node_attributes(G, '0', 'dummy') | |||||
self.__node_labels.append('dummy') | |||||
if len(self.__edge_labels) == 0: | |||||
for G in Gn: | |||||
nx.set_edge_attributes(G, '0', 'dummy') | |||||
self.__edge_labels.append('dummy') |
@@ -53,7 +53,7 @@ def xp_median_preimage_9_1(): | |||||
'verbose': 2, | 'verbose': 2, | ||||
'refine': False} | 'refine': False} | ||||
save_results = True | save_results = True | ||||
dir_save='../results/xp_median_preimage/' | |||||
dir_save = '../results/xp_median_preimage/' + ds_name + '.' + kernel_options['name'] + '/' | |||||
irrelevant_labels = {'node_attrs': ['x', 'y', 'z'], 'edge_labels': ['bond_stereo']} # | irrelevant_labels = {'node_attrs': ['x', 'y', 'z'], 'edge_labels': ['bond_stereo']} # | ||||
edge_required = False # | edge_required = False # | ||||
@@ -69,7 +69,7 @@ def xp_median_preimage_9_1(): | |||||
print() | print() | ||||
# generate preimages. | # generate preimages. | ||||
for fit_method in ['k-graphs', 'expert'] + ['random'] * 10: | |||||
for fit_method in ['k-graphs', 'expert'] + ['random'] * 5: | |||||
print('\n-------------------------------------') | print('\n-------------------------------------') | ||||
print('fit method:', fit_method, '\n') | print('fit method:', fit_method, '\n') | ||||
mpg_options['fit_method'] = fit_method | mpg_options['fit_method'] = fit_method | ||||
@@ -114,7 +114,7 @@ def xp_median_preimage_9_2(): | |||||
'verbose': 2, | 'verbose': 2, | ||||
'refine': False} | 'refine': False} | ||||
save_results = True | save_results = True | ||||
dir_save='../results/xp_median_preimage/' | |||||
dir_save = '../results/xp_median_preimage/' + ds_name + '.' + kernel_options['name'] + '/' | |||||
irrelevant_labels = {'node_attrs': ['x', 'y', 'z'], 'edge_labels': ['bond_stereo']} # | irrelevant_labels = {'node_attrs': ['x', 'y', 'z'], 'edge_labels': ['bond_stereo']} # | ||||
edge_required = False # | edge_required = False # | ||||
@@ -130,7 +130,68 @@ def xp_median_preimage_9_2(): | |||||
print() | print() | ||||
# generate preimages. | # generate preimages. | ||||
for fit_method in ['k-graphs', 'expert'] + ['random'] * 10: | |||||
for fit_method in ['k-graphs', 'expert'] + ['random'] * 5: | |||||
print('\n-------------------------------------') | |||||
print('fit method:', fit_method, '\n') | |||||
mpg_options['fit_method'] = fit_method | |||||
generate_median_preimages_by_class(ds_name, mpg_options, kernel_options, ged_options, mge_options, save_results=save_results, save_medians=True, plot_medians=True, load_gm='auto', dir_save=dir_save, irrelevant_labels=irrelevant_labels, edge_required=edge_required) | |||||
def xp_median_preimage_9_3(): | |||||
"""xp 9_3: MAO, Treelet, using CONSTANT. | |||||
""" | |||||
from gklearn.utils.kernels import polynomialkernel | |||||
# set parameters. | |||||
ds_name = 'MAO' # | |||||
mpg_options = {'fit_method': 'k-graphs', | |||||
'init_ecc': [4, 4, 2, 1, 1, 1], # | |||||
'ds_name': ds_name, | |||||
'parallel': True, # False | |||||
'time_limit_in_sec': 0, | |||||
'max_itrs': 100, # | |||||
'max_itrs_without_update': 3, | |||||
'epsilon_residual': 0.01, | |||||
'epsilon_ec': 0.1, | |||||
'verbose': 2} | |||||
pkernel = functools.partial(polynomialkernel, d=4, c=1e+7) | |||||
kernel_options = {'name': 'Treelet', # | |||||
'sub_kernel': pkernel, | |||||
'parallel': 'imap_unordered', | |||||
# 'parallel': None, | |||||
'n_jobs': multiprocessing.cpu_count(), | |||||
'normalize': True, | |||||
'verbose': 2} | |||||
ged_options = {'method': 'IPFP', | |||||
'initialization_method': 'RANDOM', # 'NODE' | |||||
'initial_solutions': 10, # 1 | |||||
'edit_cost': 'CONSTANT', # | |||||
'attr_distance': 'euclidean', | |||||
'ratio_runs_from_initial_solutions': 1, | |||||
'threads': multiprocessing.cpu_count(), | |||||
'init_option': 'EAGER_WITHOUT_SHUFFLED_COPIES'} | |||||
mge_options = {'init_type': 'MEDOID', | |||||
'random_inits': 10, | |||||
'time_limit': 600, | |||||
'verbose': 2, | |||||
'refine': False} | |||||
save_results = True | |||||
dir_save = '../results/xp_median_preimage/' + ds_name + '.' + kernel_options['name'] + '/' | |||||
irrelevant_labels = None # | |||||
edge_required = False # | |||||
# print settings. | |||||
print('parameters:') | |||||
print('dataset name:', ds_name) | |||||
print('mpg_options:', mpg_options) | |||||
print('kernel_options:', kernel_options) | |||||
print('ged_options:', ged_options) | |||||
print('mge_options:', mge_options) | |||||
print('save_results:', save_results) | |||||
print('irrelevant_labels:', irrelevant_labels) | |||||
print() | |||||
# generate preimages. | |||||
for fit_method in ['k-graphs', 'expert'] + ['random'] * 5: | |||||
print('\n-------------------------------------') | print('\n-------------------------------------') | ||||
print('fit method:', fit_method, '\n') | print('fit method:', fit_method, '\n') | ||||
mpg_options['fit_method'] = fit_method | mpg_options['fit_method'] = fit_method | ||||
@@ -178,7 +239,7 @@ def xp_median_preimage_8_1(): | |||||
'verbose': 2, | 'verbose': 2, | ||||
'refine': False} | 'refine': False} | ||||
save_results = True | save_results = True | ||||
dir_save='../results/xp_median_preimage/' | |||||
dir_save = '../results/xp_median_preimage/' + ds_name + '.' + kernel_options['name'] + '/' | |||||
irrelevant_labels = None # | irrelevant_labels = None # | ||||
edge_required = False # | edge_required = False # | ||||
@@ -194,7 +255,7 @@ def xp_median_preimage_8_1(): | |||||
print() | print() | ||||
# generate preimages. | # generate preimages. | ||||
for fit_method in ['k-graphs', 'expert'] + ['random'] * 10: | |||||
for fit_method in ['k-graphs', 'expert'] + ['random'] * 5: | |||||
print('\n-------------------------------------') | print('\n-------------------------------------') | ||||
print('fit method:', fit_method, '\n') | print('fit method:', fit_method, '\n') | ||||
mpg_options['fit_method'] = fit_method | mpg_options['fit_method'] = fit_method | ||||
@@ -239,7 +300,68 @@ def xp_median_preimage_8_2(): | |||||
'verbose': 2, | 'verbose': 2, | ||||
'refine': False} | 'refine': False} | ||||
save_results = True | save_results = True | ||||
dir_save='../results/xp_median_preimage/' | |||||
dir_save = '../results/xp_median_preimage/' + ds_name + '.' + kernel_options['name'] + '/' | |||||
irrelevant_labels = None # | |||||
edge_required = False # | |||||
# print settings. | |||||
print('parameters:') | |||||
print('dataset name:', ds_name) | |||||
print('mpg_options:', mpg_options) | |||||
print('kernel_options:', kernel_options) | |||||
print('ged_options:', ged_options) | |||||
print('mge_options:', mge_options) | |||||
print('save_results:', save_results) | |||||
print('irrelevant_labels:', irrelevant_labels) | |||||
print() | |||||
# generate preimages. | |||||
for fit_method in ['k-graphs', 'expert'] + ['random'] * 5: | |||||
print('\n-------------------------------------') | |||||
print('fit method:', fit_method, '\n') | |||||
mpg_options['fit_method'] = fit_method | |||||
generate_median_preimages_by_class(ds_name, mpg_options, kernel_options, ged_options, mge_options, save_results=save_results, save_medians=True, plot_medians=True, load_gm='auto', dir_save=dir_save, irrelevant_labels=irrelevant_labels, edge_required=edge_required) | |||||
def xp_median_preimage_8_3(): | |||||
"""xp 8_3: Monoterpenoides, Treelet, using CONSTANT. | |||||
""" | |||||
from gklearn.utils.kernels import polynomialkernel | |||||
# set parameters. | |||||
ds_name = 'Monoterpenoides' # | |||||
mpg_options = {'fit_method': 'k-graphs', | |||||
'init_ecc': [4, 4, 2, 1, 1, 1], # | |||||
'ds_name': ds_name, | |||||
'parallel': True, # False | |||||
'time_limit_in_sec': 0, | |||||
'max_itrs': 100, # | |||||
'max_itrs_without_update': 3, | |||||
'epsilon_residual': 0.01, | |||||
'epsilon_ec': 0.1, | |||||
'verbose': 2} | |||||
pkernel = functools.partial(polynomialkernel, d=2, c=1e+5) | |||||
kernel_options = {'name': 'Treelet', | |||||
'sub_kernel': pkernel, | |||||
'parallel': 'imap_unordered', | |||||
# 'parallel': None, | |||||
'n_jobs': multiprocessing.cpu_count(), | |||||
'normalize': True, | |||||
'verbose': 2} | |||||
ged_options = {'method': 'IPFP', | |||||
'initialization_method': 'RANDOM', # 'NODE' | |||||
'initial_solutions': 10, # 1 | |||||
'edit_cost': 'CONSTANT', # | |||||
'attr_distance': 'euclidean', | |||||
'ratio_runs_from_initial_solutions': 1, | |||||
'threads': multiprocessing.cpu_count(), | |||||
'init_option': 'EAGER_WITHOUT_SHUFFLED_COPIES'} | |||||
mge_options = {'init_type': 'MEDOID', | |||||
'random_inits': 10, | |||||
'time_limit': 600, | |||||
'verbose': 2, | |||||
'refine': False} | |||||
save_results = True | |||||
dir_save = '../results/xp_median_preimage/' + ds_name + '.' + kernel_options['name'] + '/' | |||||
irrelevant_labels = None # | irrelevant_labels = None # | ||||
edge_required = False # | edge_required = False # | ||||
@@ -255,7 +377,7 @@ def xp_median_preimage_8_2(): | |||||
print() | print() | ||||
# generate preimages. | # generate preimages. | ||||
for fit_method in ['k-graphs', 'expert'] + ['random'] * 10: | |||||
for fit_method in ['k-graphs', 'expert'] + ['random'] * 5: | |||||
print('\n-------------------------------------') | print('\n-------------------------------------') | ||||
print('fit method:', fit_method, '\n') | print('fit method:', fit_method, '\n') | ||||
mpg_options['fit_method'] = fit_method | mpg_options['fit_method'] = fit_method | ||||
@@ -303,7 +425,7 @@ def xp_median_preimage_7_1(): | |||||
'verbose': 2, | 'verbose': 2, | ||||
'refine': False} | 'refine': False} | ||||
save_results = True | save_results = True | ||||
dir_save='../results/xp_median_preimage/' | |||||
dir_save = '../results/xp_median_preimage/' + ds_name + '.' + kernel_options['name'] + '/' | |||||
irrelevant_labels = None # | irrelevant_labels = None # | ||||
edge_required = False # | edge_required = False # | ||||
@@ -319,7 +441,7 @@ def xp_median_preimage_7_1(): | |||||
print() | print() | ||||
# generate preimages. | # generate preimages. | ||||
for fit_method in ['k-graphs', 'expert'] + ['random'] * 10: | |||||
for fit_method in ['k-graphs', 'expert'] + ['random'] * 5: | |||||
print('\n-------------------------------------') | print('\n-------------------------------------') | ||||
print('fit method:', fit_method, '\n') | print('fit method:', fit_method, '\n') | ||||
mpg_options['fit_method'] = fit_method | mpg_options['fit_method'] = fit_method | ||||
@@ -364,7 +486,7 @@ def xp_median_preimage_7_2(): | |||||
'verbose': 2, | 'verbose': 2, | ||||
'refine': False} | 'refine': False} | ||||
save_results = True | save_results = True | ||||
dir_save='../results/xp_median_preimage/' | |||||
dir_save = '../results/xp_median_preimage/' + ds_name + '.' + kernel_options['name'] + '/' | |||||
irrelevant_labels = None # | irrelevant_labels = None # | ||||
edge_required = False # | edge_required = False # | ||||
@@ -380,7 +502,68 @@ def xp_median_preimage_7_2(): | |||||
print() | print() | ||||
# generate preimages. | # generate preimages. | ||||
for fit_method in ['k-graphs', 'expert'] + ['random'] * 10: | |||||
for fit_method in ['k-graphs', 'expert'] + ['random'] * 5: | |||||
print('\n-------------------------------------') | |||||
print('fit method:', fit_method, '\n') | |||||
mpg_options['fit_method'] = fit_method | |||||
generate_median_preimages_by_class(ds_name, mpg_options, kernel_options, ged_options, mge_options, save_results=save_results, save_medians=True, plot_medians=True, load_gm='auto', dir_save=dir_save, irrelevant_labels=irrelevant_labels, edge_required=edge_required) | |||||
def xp_median_preimage_7_3(): | |||||
"""xp 7_3: MUTAG, Treelet, using CONSTANT. | |||||
""" | |||||
from gklearn.utils.kernels import polynomialkernel | |||||
# set parameters. | |||||
ds_name = 'MUTAG' # | |||||
mpg_options = {'fit_method': 'k-graphs', | |||||
'init_ecc': [4, 4, 2, 1, 1, 1], # | |||||
'ds_name': ds_name, | |||||
'parallel': True, # False | |||||
'time_limit_in_sec': 0, | |||||
'max_itrs': 100, # | |||||
'max_itrs_without_update': 3, | |||||
'epsilon_residual': 0.01, | |||||
'epsilon_ec': 0.1, | |||||
'verbose': 2} | |||||
pkernel = functools.partial(polynomialkernel, d=3, c=1e+8) | |||||
kernel_options = {'name': 'Treelet', | |||||
'sub_kernel': pkernel, | |||||
'parallel': 'imap_unordered', | |||||
# 'parallel': None, | |||||
'n_jobs': multiprocessing.cpu_count(), | |||||
'normalize': True, | |||||
'verbose': 2} | |||||
ged_options = {'method': 'IPFP', | |||||
'initialization_method': 'RANDOM', # 'NODE' | |||||
'initial_solutions': 10, # 1 | |||||
'edit_cost': 'CONSTANT', # | |||||
'attr_distance': 'euclidean', | |||||
'ratio_runs_from_initial_solutions': 1, | |||||
'threads': multiprocessing.cpu_count(), | |||||
'init_option': 'EAGER_WITHOUT_SHUFFLED_COPIES'} | |||||
mge_options = {'init_type': 'MEDOID', | |||||
'random_inits': 10, | |||||
'time_limit': 600, | |||||
'verbose': 2, | |||||
'refine': False} | |||||
save_results = True | |||||
dir_save = '../results/xp_median_preimage/' + ds_name + '.' + kernel_options['name'] + '/' | |||||
irrelevant_labels = None # | |||||
edge_required = False # | |||||
# print settings. | |||||
print('parameters:') | |||||
print('dataset name:', ds_name) | |||||
print('mpg_options:', mpg_options) | |||||
print('kernel_options:', kernel_options) | |||||
print('ged_options:', ged_options) | |||||
print('mge_options:', mge_options) | |||||
print('save_results:', save_results) | |||||
print('irrelevant_labels:', irrelevant_labels) | |||||
print() | |||||
# generate preimages. | |||||
for fit_method in ['k-graphs', 'expert'] + ['random'] * 5: | |||||
print('\n-------------------------------------') | print('\n-------------------------------------') | ||||
print('fit method:', fit_method, '\n') | print('fit method:', fit_method, '\n') | ||||
mpg_options['fit_method'] = fit_method | mpg_options['fit_method'] = fit_method | ||||
@@ -428,7 +611,7 @@ def xp_median_preimage_6_1(): | |||||
'verbose': 2, | 'verbose': 2, | ||||
'refine': False} | 'refine': False} | ||||
save_results = True | save_results = True | ||||
dir_save='../results/xp_median_preimage/' | |||||
dir_save = '../results/xp_median_preimage/' + ds_name + '.' + kernel_options['name'] + '/' | |||||
irrelevant_labels = None # | irrelevant_labels = None # | ||||
edge_required = False # | edge_required = False # | ||||
@@ -444,7 +627,7 @@ def xp_median_preimage_6_1(): | |||||
print() | print() | ||||
# generate preimages. | # generate preimages. | ||||
for fit_method in ['k-graphs'] + ['random'] * 10: | |||||
for fit_method in ['k-graphs'] + ['random'] * 5: | |||||
print('\n-------------------------------------') | print('\n-------------------------------------') | ||||
print('fit method:', fit_method, '\n') | print('fit method:', fit_method, '\n') | ||||
mpg_options['fit_method'] = fit_method | mpg_options['fit_method'] = fit_method | ||||
@@ -490,7 +673,7 @@ def xp_median_preimage_6_2(): | |||||
'verbose': 2, | 'verbose': 2, | ||||
'refine': False} | 'refine': False} | ||||
save_results = True | save_results = True | ||||
dir_save='../results/xp_median_preimage/' | |||||
dir_save = '../results/xp_median_preimage/' + ds_name + '.' + kernel_options['name'] + '/' | |||||
irrelevant_labels = None # | irrelevant_labels = None # | ||||
edge_required = True # | edge_required = True # | ||||
@@ -506,7 +689,7 @@ def xp_median_preimage_6_2(): | |||||
print() | print() | ||||
# generate preimages. | # generate preimages. | ||||
for fit_method in ['k-graphs'] + ['random'] * 10: | |||||
for fit_method in ['k-graphs'] + ['random'] * 5: | |||||
print('\n-------------------------------------') | print('\n-------------------------------------') | ||||
print('fit method:', fit_method, '\n') | print('fit method:', fit_method, '\n') | ||||
mpg_options['fit_method'] = fit_method | mpg_options['fit_method'] = fit_method | ||||
@@ -554,7 +737,7 @@ def xp_median_preimage_5_1(): | |||||
'verbose': 2, | 'verbose': 2, | ||||
'refine': False} | 'refine': False} | ||||
save_results = True | save_results = True | ||||
dir_save='../results/xp_median_preimage/' | |||||
dir_save = '../results/xp_median_preimage/' + ds_name + '.' + kernel_options['name'] + '/' | |||||
irrelevant_labels = None # | irrelevant_labels = None # | ||||
edge_required = False # | edge_required = False # | ||||
@@ -570,7 +753,7 @@ def xp_median_preimage_5_1(): | |||||
print() | print() | ||||
# generate preimages. | # generate preimages. | ||||
for fit_method in ['k-graphs'] + ['random'] * 10: | |||||
for fit_method in ['k-graphs'] + ['random'] * 5: | |||||
print('\n-------------------------------------') | print('\n-------------------------------------') | ||||
print('fit method:', fit_method, '\n') | print('fit method:', fit_method, '\n') | ||||
mpg_options['fit_method'] = fit_method | mpg_options['fit_method'] = fit_method | ||||
@@ -618,7 +801,7 @@ def xp_median_preimage_4_1(): | |||||
'verbose': 2, | 'verbose': 2, | ||||
'refine': False} | 'refine': False} | ||||
save_results = True | save_results = True | ||||
dir_save='../results/xp_median_preimage/' | |||||
dir_save = '../results/xp_median_preimage/' + ds_name + '.' + kernel_options['name'] + '/' | |||||
irrelevant_labels = None # | irrelevant_labels = None # | ||||
edge_required = False # | edge_required = False # | ||||
@@ -634,7 +817,7 @@ def xp_median_preimage_4_1(): | |||||
print() | print() | ||||
# generate preimages. | # generate preimages. | ||||
for fit_method in ['k-graphs'] + ['random'] * 10: | |||||
for fit_method in ['k-graphs'] + ['random'] * 5: | |||||
print('\n-------------------------------------') | print('\n-------------------------------------') | ||||
print('fit method:', fit_method, '\n') | print('fit method:', fit_method, '\n') | ||||
mpg_options['fit_method'] = fit_method | mpg_options['fit_method'] = fit_method | ||||
@@ -680,7 +863,7 @@ def xp_median_preimage_3_2(): | |||||
'verbose': 2, | 'verbose': 2, | ||||
'refine': False} | 'refine': False} | ||||
save_results = True | save_results = True | ||||
dir_save='../results/xp_median_preimage/' | |||||
dir_save = '../results/xp_median_preimage/' + ds_name + '.' + kernel_options['name'] + '/' | |||||
irrelevant_labels = {'edge_attrs': ['orient', 'angle']} # | irrelevant_labels = {'edge_attrs': ['orient', 'angle']} # | ||||
edge_required = True # | edge_required = True # | ||||
@@ -696,7 +879,7 @@ def xp_median_preimage_3_2(): | |||||
print() | print() | ||||
# generate preimages. | # generate preimages. | ||||
for fit_method in ['k-graphs'] + ['random'] * 10: | |||||
for fit_method in ['k-graphs'] + ['random'] * 5: | |||||
print('\n-------------------------------------') | print('\n-------------------------------------') | ||||
print('fit method:', fit_method, '\n') | print('fit method:', fit_method, '\n') | ||||
mpg_options['fit_method'] = fit_method | mpg_options['fit_method'] = fit_method | ||||
@@ -744,7 +927,7 @@ def xp_median_preimage_3_1(): | |||||
'verbose': 2, | 'verbose': 2, | ||||
'refine': False} | 'refine': False} | ||||
save_results = True | save_results = True | ||||
dir_save='../results/xp_median_preimage/' | |||||
dir_save = '../results/xp_median_preimage/' + ds_name + '.' + kernel_options['name'] + '/' | |||||
irrelevant_labels = {'edge_attrs': ['orient', 'angle']} # | irrelevant_labels = {'edge_attrs': ['orient', 'angle']} # | ||||
edge_required = False # | edge_required = False # | ||||
@@ -760,7 +943,7 @@ def xp_median_preimage_3_1(): | |||||
print() | print() | ||||
# generate preimages. | # generate preimages. | ||||
for fit_method in ['k-graphs'] + ['random'] * 10: | |||||
for fit_method in ['k-graphs'] + ['random'] * 5: | |||||
print('\n-------------------------------------') | print('\n-------------------------------------') | ||||
print('fit method:', fit_method, '\n') | print('fit method:', fit_method, '\n') | ||||
mpg_options['fit_method'] = fit_method | mpg_options['fit_method'] = fit_method | ||||
@@ -808,7 +991,7 @@ def xp_median_preimage_2_1(): | |||||
'verbose': 2, | 'verbose': 2, | ||||
'refine': False} | 'refine': False} | ||||
save_results = True | save_results = True | ||||
dir_save='../results/xp_median_preimage/' | |||||
dir_save = '../results/xp_median_preimage/' + ds_name + '.' + kernel_options['name'] + '/' | |||||
irrelevant_labels = {'edge_labels': ['valence']} | irrelevant_labels = {'edge_labels': ['valence']} | ||||
# print settings. | # print settings. | ||||
@@ -827,7 +1010,7 @@ def xp_median_preimage_2_1(): | |||||
# compute_gram_matrices_by_class(ds_name, kernel_options, save_results=True, dir_save=dir_save, irrelevant_labels=irrelevant_labels) | # compute_gram_matrices_by_class(ds_name, kernel_options, save_results=True, dir_save=dir_save, irrelevant_labels=irrelevant_labels) | ||||
# generate preimages. | # generate preimages. | ||||
for fit_method in ['k-graphs'] + ['random'] * 10: | |||||
for fit_method in ['k-graphs'] + ['random'] * 5: | |||||
print('\n-------------------------------------') | print('\n-------------------------------------') | ||||
print('fit method:', fit_method, '\n') | print('fit method:', fit_method, '\n') | ||||
mpg_options['fit_method'] = fit_method | mpg_options['fit_method'] = fit_method | ||||
@@ -875,6 +1058,7 @@ def xp_median_preimage_1_1(): | |||||
'verbose': 2, | 'verbose': 2, | ||||
'refine': False} | 'refine': False} | ||||
save_results = True | save_results = True | ||||
dir_save = '../results/xp_median_preimage/' + ds_name + '.' + kernel_options['name'] + '/' | |||||
# print settings. | # print settings. | ||||
print('parameters:') | print('parameters:') | ||||
@@ -886,11 +1070,11 @@ def xp_median_preimage_1_1(): | |||||
print('save_results:', save_results) | print('save_results:', save_results) | ||||
# generate preimages. | # generate preimages. | ||||
for fit_method in ['k-graphs', 'expert'] + ['random'] * 10: | |||||
for fit_method in ['k-graphs', 'expert'] + ['random'] * 5: | |||||
print('\n-------------------------------------') | print('\n-------------------------------------') | ||||
print('fit method:', fit_method, '\n') | print('fit method:', fit_method, '\n') | ||||
mpg_options['fit_method'] = fit_method | mpg_options['fit_method'] = fit_method | ||||
generate_median_preimages_by_class(ds_name, mpg_options, kernel_options, ged_options, mge_options, save_results=save_results, save_medians=True, plot_medians=True, load_gm='auto', dir_save='../results/xp_median_preimage/') | |||||
generate_median_preimages_by_class(ds_name, mpg_options, kernel_options, ged_options, mge_options, save_results=save_results, save_medians=True, plot_medians=True, load_gm='auto', dir_save=dir_save) | |||||
def xp_median_preimage_1_2(): | def xp_median_preimage_1_2(): | ||||
@@ -932,7 +1116,7 @@ def xp_median_preimage_1_2(): | |||||
'verbose': 2, | 'verbose': 2, | ||||
'refine': False} | 'refine': False} | ||||
save_results = True | save_results = True | ||||
dir_save='../results/xp_median_preimage/' | |||||
dir_save = '../results/xp_median_preimage/' + ds_name + '.' + kernel_options['name'] + '/' | |||||
irrelevant_labels = None # | irrelevant_labels = None # | ||||
edge_required = True # | edge_required = True # | ||||
@@ -948,7 +1132,7 @@ def xp_median_preimage_1_2(): | |||||
print() | print() | ||||
# generate preimages. | # generate preimages. | ||||
for fit_method in ['k-graphs', 'expert'] + ['random'] * 10: | |||||
for fit_method in ['k-graphs', 'expert'] + ['random'] * 5: | |||||
print('\n-------------------------------------') | print('\n-------------------------------------') | ||||
print('fit method:', fit_method, '\n') | print('fit method:', fit_method, '\n') | ||||
mpg_options['fit_method'] = fit_method | mpg_options['fit_method'] = fit_method | ||||
@@ -996,6 +1180,7 @@ def xp_median_preimage_10_1(): | |||||
'verbose': 2, | 'verbose': 2, | ||||
'refine': False} | 'refine': False} | ||||
save_results = True | save_results = True | ||||
dir_save = '../results/xp_median_preimage/' + ds_name + '.' + kernel_options['name'] + '/' | |||||
# print settings. | # print settings. | ||||
print('parameters:') | print('parameters:') | ||||
@@ -1007,11 +1192,11 @@ def xp_median_preimage_10_1(): | |||||
print('save_results:', save_results) | print('save_results:', save_results) | ||||
# generate preimages. | # generate preimages. | ||||
for fit_method in ['k-graphs', 'expert'] + ['random'] * 10: | |||||
for fit_method in ['k-graphs', 'expert'] + ['random'] * 5: | |||||
print('\n-------------------------------------') | print('\n-------------------------------------') | ||||
print('fit method:', fit_method, '\n') | print('fit method:', fit_method, '\n') | ||||
mpg_options['fit_method'] = fit_method | mpg_options['fit_method'] = fit_method | ||||
generate_median_preimages_by_class(ds_name, mpg_options, kernel_options, ged_options, mge_options, save_results=save_results, save_medians=True, plot_medians=True, load_gm='auto', dir_save='../results/xp_median_preimage/') | |||||
generate_median_preimages_by_class(ds_name, mpg_options, kernel_options, ged_options, mge_options, save_results=save_results, save_medians=True, plot_medians=True, load_gm='auto', dir_save=dir_save) | |||||
def xp_median_preimage_10_2(): | def xp_median_preimage_10_2(): | ||||
@@ -1053,7 +1238,7 @@ def xp_median_preimage_10_2(): | |||||
'verbose': 2, | 'verbose': 2, | ||||
'refine': False} | 'refine': False} | ||||
save_results = True | save_results = True | ||||
dir_save='../results/xp_median_preimage/' | |||||
dir_save = '../results/xp_median_preimage/' + ds_name + '.' + kernel_options['name'] + '/' | |||||
irrelevant_labels = None # | irrelevant_labels = None # | ||||
edge_required = True # | edge_required = True # | ||||
@@ -1069,7 +1254,7 @@ def xp_median_preimage_10_2(): | |||||
print() | print() | ||||
# generate preimages. | # generate preimages. | ||||
for fit_method in ['k-graphs', 'expert'] + ['random'] * 10: | |||||
for fit_method in ['k-graphs', 'expert'] + ['random'] * 5: | |||||
print('\n-------------------------------------') | print('\n-------------------------------------') | ||||
print('fit method:', fit_method, '\n') | print('fit method:', fit_method, '\n') | ||||
mpg_options['fit_method'] = fit_method | mpg_options['fit_method'] = fit_method | ||||
@@ -1117,6 +1302,7 @@ def xp_median_preimage_11_1(): | |||||
'verbose': 2, | 'verbose': 2, | ||||
'refine': False} | 'refine': False} | ||||
save_results = True | save_results = True | ||||
dir_save = '../results/xp_median_preimage/' + ds_name + '.' + kernel_options['name'] + '/' | |||||
# print settings. | # print settings. | ||||
print('parameters:') | print('parameters:') | ||||
@@ -1128,11 +1314,11 @@ def xp_median_preimage_11_1(): | |||||
print('save_results:', save_results) | print('save_results:', save_results) | ||||
# generate preimages. | # generate preimages. | ||||
for fit_method in ['k-graphs', 'expert'] + ['random'] * 10: | |||||
for fit_method in ['k-graphs', 'expert'] + ['random'] * 5: | |||||
print('\n-------------------------------------') | print('\n-------------------------------------') | ||||
print('fit method:', fit_method, '\n') | print('fit method:', fit_method, '\n') | ||||
mpg_options['fit_method'] = fit_method | mpg_options['fit_method'] = fit_method | ||||
generate_median_preimages_by_class(ds_name, mpg_options, kernel_options, ged_options, mge_options, save_results=save_results, save_medians=True, plot_medians=True, load_gm='auto', dir_save='../results/xp_median_preimage/') | |||||
generate_median_preimages_by_class(ds_name, mpg_options, kernel_options, ged_options, mge_options, save_results=save_results, save_medians=True, plot_medians=True, load_gm='auto', dir_save=dir_save) | |||||
def xp_median_preimage_11_2(): | def xp_median_preimage_11_2(): | ||||
@@ -1174,7 +1360,7 @@ def xp_median_preimage_11_2(): | |||||
'verbose': 2, | 'verbose': 2, | ||||
'refine': False} | 'refine': False} | ||||
save_results = True | save_results = True | ||||
dir_save='../results/xp_median_preimage/' | |||||
dir_save = '../results/xp_median_preimage/' + ds_name + '.' + kernel_options['name'] + '/' | |||||
irrelevant_labels = None # | irrelevant_labels = None # | ||||
edge_required = True # | edge_required = True # | ||||
@@ -1190,7 +1376,7 @@ def xp_median_preimage_11_2(): | |||||
print() | print() | ||||
# generate preimages. | # generate preimages. | ||||
for fit_method in ['k-graphs', 'expert'] + ['random'] * 10: | |||||
for fit_method in ['k-graphs', 'expert'] + ['random'] * 5: | |||||
print('\n-------------------------------------') | print('\n-------------------------------------') | ||||
print('fit method:', fit_method, '\n') | print('fit method:', fit_method, '\n') | ||||
mpg_options['fit_method'] = fit_method | mpg_options['fit_method'] = fit_method | ||||
@@ -1242,16 +1428,25 @@ if __name__ == "__main__": | |||||
# xp_median_preimage_7_1() | # xp_median_preimage_7_1() | ||||
#### xp 7_2: MUTAG, PathUpToH, using CONSTANT. | #### xp 7_2: MUTAG, PathUpToH, using CONSTANT. | ||||
xp_median_preimage_7_2() | |||||
# xp_median_preimage_7_2() | |||||
#### xp 7_3: MUTAG, Treelet, using CONSTANT. | |||||
# xp_median_preimage_7_3() | |||||
#### xp 8_1: Monoterpenoides, StructuralSP, using CONSTANT. | #### xp 8_1: Monoterpenoides, StructuralSP, using CONSTANT. | ||||
# xp_median_preimage_8_1() | # xp_median_preimage_8_1() | ||||
#### xp 8_2: Monoterpenoides, PathUpToH, using CONSTANT. | #### xp 8_2: Monoterpenoides, PathUpToH, using CONSTANT. | ||||
# xp_median_preimage_8_2() | |||||
# xp_median_preimage_8_2() | |||||
#### xp 8_3: Monoterpenoides, Treelet, using CONSTANT. | |||||
# xp_median_preimage_8_3() | |||||
#### xp 9_1: MAO, StructuralSP, using CONSTANT, symbolic only. | #### xp 9_1: MAO, StructuralSP, using CONSTANT, symbolic only. | ||||
# xp_median_preimage_9_1() | # xp_median_preimage_9_1() | ||||
#### xp 9_2: MAO, PathUpToH, using CONSTANT, symbolic only. | #### xp 9_2: MAO, PathUpToH, using CONSTANT, symbolic only. | ||||
# xp_median_preimage_9_2() | |||||
# xp_median_preimage_9_2() | |||||
#### xp 9_3: MAO, Treelet, using CONSTANT. | |||||
xp_median_preimage_9_3() |
@@ -745,8 +745,14 @@ class MedianPreimageGenerator(PreimageGenerator): | |||||
edge_labels=self._dataset.edge_labels, | edge_labels=self._dataset.edge_labels, | ||||
ds_infos=self._dataset.get_dataset_infos(keys=['directed']), | ds_infos=self._dataset.get_dataset_infos(keys=['directed']), | ||||
**self._kernel_options) | **self._kernel_options) | ||||
elif self._kernel_options['name'] == 'Treelet': | |||||
from gklearn.kernels import Treelet | |||||
self._graph_kernel = Treelet(node_labels=self._dataset.node_labels, | |||||
edge_labels=self._dataset.edge_labels, | |||||
ds_infos=self._dataset.get_dataset_infos(keys=['directed']), | |||||
**self._kernel_options) | |||||
else: | else: | ||||
raise Exception('The graph kernel given is not defined. Possible choices include: "StructuralSP", "ShortestPath", "PathUpToH".') | |||||
raise Exception('The graph kernel given is not defined. Possible choices include: "StructuralSP", "ShortestPath", "PathUpToH", "Treelet".') | |||||
# def __clean_graph(self, G, node_labels=[], edge_labels=[], node_attrs=[], edge_attrs=[]): | # def __clean_graph(self, G, node_labels=[], edge_labels=[], node_attrs=[], edge_attrs=[]): | ||||
@@ -22,6 +22,7 @@ from gklearn.kernels.weisfeilerLehmanKernel import weisfeilerlehmankernel | |||||
from gklearn.utils import Dataset | from gklearn.utils import Dataset | ||||
import csv | import csv | ||||
import networkx as nx | import networkx as nx | ||||
import os | |||||
def generate_median_preimages_by_class(ds_name, mpg_options, kernel_options, ged_options, mge_options, save_results=True, save_medians=True, plot_medians=True, load_gm='auto', dir_save='', irrelevant_labels=None, edge_required=False): | def generate_median_preimages_by_class(ds_name, mpg_options, kernel_options, ged_options, mge_options, save_results=True, save_medians=True, plot_medians=True, load_gm='auto', dir_save='', irrelevant_labels=None, edge_required=False): | ||||
@@ -215,6 +216,8 @@ def generate_median_preimages_by_class(ds_name, mpg_options, kernel_options, ged | |||||
# save median graphs. | # save median graphs. | ||||
if save_medians: | if save_medians: | ||||
if not os.path.exists(dir_save + 'medians/'): | |||||
os.makedirs(dir_save + 'medians/') | |||||
print('Saving median graphs to files...') | print('Saving median graphs to files...') | ||||
fn_pre_sm = dir_save + 'medians/set_median.' + mpg_options['fit_method'] + '.nbg' + str(num_graphs) + '.y' + str(target) + '.repeat' + str(1) | fn_pre_sm = dir_save + 'medians/set_median.' + mpg_options['fit_method'] + '.nbg' + str(num_graphs) + '.y' + str(target) + '.repeat' + str(1) | ||||
saveGXL(mpg.set_median, fn_pre_sm + '.gxl', method='default', | saveGXL(mpg.set_median, fn_pre_sm + '.gxl', method='default', | ||||
@@ -286,6 +289,8 @@ def generate_median_preimages_by_class(ds_name, mpg_options, kernel_options, ged | |||||
def __init_output_file(ds_name, gkernel, fit_method, dir_output): | def __init_output_file(ds_name, gkernel, fit_method, dir_output): | ||||
if not os.path.exists(dir_output): | |||||
os.makedirs(dir_output) | |||||
# fn_output_detail = 'results_detail.' + ds_name + '.' + gkernel + '.' + fit_method + '.csv' | # fn_output_detail = 'results_detail.' + ds_name + '.' + gkernel + '.' + fit_method + '.csv' | ||||
fn_output_detail = 'results_detail.' + ds_name + '.' + gkernel + '.csv' | fn_output_detail = 'results_detail.' + ds_name + '.' + gkernel + '.csv' | ||||
f_detail = open(dir_output + fn_output_detail, 'a') | f_detail = open(dir_output + fn_output_detail, 'a') | ||||
@@ -231,28 +231,31 @@ def test_PathUpToH(ds_name, parallel, k_func, compute_method): | |||||
assert False, exception | assert False, exception | ||||
# @pytest.mark.parametrize('ds_name', ['Alkane', 'AIDS']) | |||||
# @pytest.mark.parametrize('parallel', ['imap_unordered', None]) | |||||
# def test_treeletkernel(ds_name, parallel): | |||||
# """Test treelet kernel. | |||||
# """ | |||||
# from gklearn.kernels.treeletKernel import treeletkernel | |||||
# from gklearn.utils.kernels import polynomialkernel | |||||
# import functools | |||||
@pytest.mark.parametrize('ds_name', ['Alkane', 'AIDS']) | |||||
@pytest.mark.parametrize('parallel', ['imap_unordered', None]) | |||||
def test_treeletkernel(ds_name, parallel): | |||||
"""Test treelet kernel. | |||||
""" | |||||
from gklearn.kernels import Treelet | |||||
from gklearn.utils.kernels import polynomialkernel | |||||
import functools | |||||
# Gn, y = chooseDataset(ds_name) | |||||
dataset = chooseDataset(ds_name) | |||||
# pkernel = functools.partial(polynomialkernel, d=2, c=1e5) | |||||
# try: | |||||
# Kmatrix, run_time = treeletkernel(Gn, | |||||
# sub_kernel=pkernel, | |||||
# node_label='atom', | |||||
# edge_label='bond_type', | |||||
# parallel=parallel, | |||||
# n_jobs=multiprocessing.cpu_count(), | |||||
# verbose=True) | |||||
# except Exception as exception: | |||||
# assert False, exception | |||||
pkernel = functools.partial(polynomialkernel, d=2, c=1e5) | |||||
try: | |||||
graph_kernel = Treelet(node_labels=dataset.node_labels, | |||||
edge_labels=dataset.edge_labels, | |||||
ds_infos=dataset.get_dataset_infos(keys=['directed']), | |||||
sub_kernel=pkernel) | |||||
gram_matrix, run_time = graph_kernel.compute(dataset.graphs, | |||||
parallel=parallel, n_jobs=multiprocessing.cpu_count(), verbose=True) | |||||
kernel_list, run_time = graph_kernel.compute(dataset.graphs[0], dataset.graphs[1:], | |||||
parallel=parallel, n_jobs=multiprocessing.cpu_count(), verbose=True) | |||||
kernel, run_time = graph_kernel.compute(dataset.graphs[0], dataset.graphs[1], | |||||
parallel=parallel, n_jobs=multiprocessing.cpu_count(), verbose=True) | |||||
except Exception as exception: | |||||
assert False, exception | |||||
# @pytest.mark.parametrize('ds_name', ['Acyclic']) | # @pytest.mark.parametrize('ds_name', ['Acyclic']) | ||||
@@ -351,4 +351,77 @@ def compute_gram_matrices_by_class(ds_name, kernel_options, save_results=True, d | |||||
if save_results: | if save_results: | ||||
np.savez(dir_save + 'gram_matrix_unnorm.' + ds_name + '.' + kernel_options['name'] + '.gm', gram_matrix_unnorm_list=gram_matrix_unnorm_list, run_time_list=run_time_list) | np.savez(dir_save + 'gram_matrix_unnorm.' + ds_name + '.' + kernel_options['name'] + '.gm', gram_matrix_unnorm_list=gram_matrix_unnorm_list, run_time_list=run_time_list) | ||||
print('\ncomplete.') | |||||
print('\ncomplete.') | |||||
def find_paths(G, source_node, length): | |||||
"""Find all paths with a certain length those start from a source node. | |||||
A recursive depth first search is applied. | |||||
Parameters | |||||
---------- | |||||
G : NetworkX graphs | |||||
The graph in which paths are searched. | |||||
source_node : integer | |||||
The number of the node from where all paths start. | |||||
length : integer | |||||
The length of paths. | |||||
Return | |||||
------ | |||||
path : list of list | |||||
List of paths retrieved, where each path is represented by a list of nodes. | |||||
""" | |||||
if length == 0: | |||||
return [[source_node]] | |||||
path = [[source_node] + path for neighbor in G[source_node] \ | |||||
for path in find_paths(G, neighbor, length - 1) if source_node not in path] | |||||
return path | |||||
def find_all_paths(G, length, is_directed): | |||||
"""Find all paths with a certain length in a graph. A recursive depth first | |||||
search is applied. | |||||
Parameters | |||||
---------- | |||||
G : NetworkX graphs | |||||
The graph in which paths are searched. | |||||
length : integer | |||||
The length of paths. | |||||
Return | |||||
------ | |||||
path : list of list | |||||
List of paths retrieved, where each path is represented by a list of nodes. | |||||
""" | |||||
all_paths = [] | |||||
for node in G: | |||||
all_paths.extend(find_paths(G, node, length)) | |||||
if not is_directed: | |||||
# For each path, two presentations are retrieved from its two extremities. | |||||
# Remove one of them. | |||||
all_paths_r = [path[::-1] for path in all_paths] | |||||
for idx, path in enumerate(all_paths[:-1]): | |||||
for path2 in all_paths_r[idx+1::]: | |||||
if path == path2: | |||||
all_paths[idx] = [] | |||||
break | |||||
all_paths = list(filter(lambda a: a != [], all_paths)) | |||||
return all_paths | |||||
def get_mlti_dim_node_attrs(G, attr_names): | |||||
attributes = [] | |||||
for nd, attrs in G.nodes(data=True): | |||||
attributes.append(tuple(attrs[aname] for aname in attr_names)) | |||||
return attributes | |||||
def get_mlti_dim_edge_attrs(G, attr_names): | |||||
attributes = [] | |||||
for ed, attrs in G.edges(data=True): | |||||
attributes.append(tuple(attrs[aname] for aname in attr_names)) | |||||
return attributes |