@@ -28,6 +28,7 @@ dslist = [ | |||
# {'name': 'NCI1', 'dataset': '../datasets/NCI1/NCI1_A.txt'}, # node symb | |||
# {'name': 'NCI109', 'dataset': '../datasets/NCI109/NCI109_A.txt'}, # node symb | |||
# {'name': 'D&D', 'dataset': '../datasets/DD/DD_A.txt'}, # node symb | |||
# {'name': 'monoterpenoides', 'dataset': '../datasets/monoterpenoides/dataset_10+.ds'}, # node/edge symb | |||
# | |||
# {'name': 'Mutagenicity', 'dataset': '../datasets/Mutagenicity/Mutagenicity_A.txt'}, | |||
# # node/edge symb | |||
@@ -57,7 +58,7 @@ estimator = marginalizedkernel | |||
#param_grid_precomputed = {'p_quit': np.linspace(0.1, 0.3, 3), | |||
# 'n_iteration': np.linspace(1, 1, 1), | |||
param_grid_precomputed = {'p_quit': np.linspace(0.1, 0.9, 9), | |||
'n_iteration': np.linspace(5, 20, 4), | |||
'n_iteration': np.linspace(1, 19, 7), | |||
'remove_totters': [False]} | |||
param_grid = [{'C': np.logspace(-10, 10, num=41, base=10)}, | |||
{'alpha': np.logspace(-10, 10, num=41, base=10)}] | |||
@@ -24,6 +24,9 @@ dslist = [ | |||
# {'name': 'NCI1', 'dataset': '../datasets/NCI1/NCI1_A.txt'}, # node symb | |||
# {'name': 'NCI109', 'dataset': '../datasets/NCI109/NCI109_A.txt'}, # node symb | |||
# {'name': 'D&D', 'dataset': '../datasets/DD/DD_A.txt'}, # node symb | |||
# {'name': 'monoterpenoides', 'dataset': '../datasets/monoterpenoides/dataset_10+.ds'}, # node/edge | |||
# {'name': 'Letter-high', 'dataset': '../datasets/Letter-high/Letter-high_A.txt'}, | |||
# # node nsymb symb | |||
# | |||
# {'name': 'Mutagenicity', 'dataset': '../datasets/Mutagenicity/Mutagenicity_A.txt'}, | |||
# # node/edge symb | |||
@@ -30,6 +30,8 @@ dslist = [ | |||
# {'name': 'ENZYMES', 'dataset': '../datasets/ENZYMES_txt/ENZYMES_A_sparse.txt'}, | |||
# # node symb/nsymb | |||
# {'name': 'D&D', 'dataset': '../datasets/DD/DD_A.txt'}, # node symb | |||
# {'name': 'Letter-high', 'dataset': '../datasets/Letter-high/Letter-high_A.txt'}, | |||
# # node nsymb symb | |||
# | |||
# {'name': 'Mutagenicity', 'dataset': '../datasets/Mutagenicity/Mutagenicity_A.txt'}, | |||
# # node/edge symb | |||
@@ -26,6 +26,7 @@ dslist = [ | |||
{'name': 'NCI109', 'dataset': '../datasets/NCI109/NCI109_A.txt'}, # node symb | |||
{'name': 'AIDS', 'dataset': '../datasets/AIDS/AIDS_A.txt'}, # node symb/nsymb, edge symb | |||
{'name': 'ENZYMES', 'dataset': '../datasets/ENZYMES_txt/ENZYMES_A_sparse.txt'}, | |||
# {'name': 'monoterpenoides', 'dataset': '../datasets/monoterpenoides/dataset_10+.ds'}, # node/edge symb | |||
# node symb/nsymb | |||
# {'name': 'D&D', 'dataset': '../datasets/DD/DD_A.txt'}, # node symb | |||
# {'name': 'Letter-med', 'dataset': '../datasets/Letter-med/Letter-med_A.txt'}, | |||
@@ -27,7 +27,8 @@ dslist = [ | |||
{'name': 'AIDS', 'dataset': '../datasets/AIDS/AIDS_A.txt'}, # node symb/nsymb, edge symb | |||
{'name': 'NCI1', 'dataset': '../datasets/NCI1/NCI1_A.txt'}, # node symb | |||
{'name': 'NCI109', 'dataset': '../datasets/NCI109/NCI109_A.txt'}, # node symb | |||
{'name': 'D&D', 'dataset': '../datasets/DD/DD_A.txt'}, # node symb | |||
{'name': 'D&D', 'dataset': '../datasets/DD/DD_A.txt'}, # node symb | |||
# {'name': 'monoterpenoides', 'dataset': '../datasets/monoterpenoides/dataset_10+.ds'}, # node/edge symb | |||
# | |||
# {'name': 'Mutagenicity', 'dataset': '../datasets/Mutagenicity/Mutagenicity_A.txt'}, | |||
# # node/edge symb | |||
@@ -54,11 +55,11 @@ dslist = [ | |||
# {'name': 'PTC_MR', 'dataset': '../datasets/PTC/Train/MR.ds',}, | |||
] | |||
estimator = untilhpathkernel | |||
param_grid_precomputed = {'depth': np.linspace(3, 10, 8), # [2], | |||
'k_func': [None]} # ['MinMax', 'tanimoto'], | |||
#param_grid_precomputed = {'depth': np.linspace(1, 10, 10), # [2], | |||
# 'k_func': ['MinMax'], # ['MinMax', 'tanimoto'], | |||
# 'compute_method': ['trie']} # ['MinMax']} | |||
#param_grid_precomputed = {'depth': np.linspace(3, 10, 8), # [2], | |||
# 'k_func': [None]} # ['MinMax', 'tanimoto'], | |||
param_grid_precomputed = {'depth': np.linspace(1, 10, 10), # [2], | |||
'k_func': ['MinMax', 'tanimoto'], # ['MinMax'], # | |||
'compute_method': ['trie']} # ['MinMax']} | |||
param_grid = [{'C': np.logspace(-10, 10, num=41, base=10)}, | |||
{'alpha': np.logspace(-10, 10, num=41, base=10)}] | |||
@@ -30,6 +30,8 @@ dslist = [ | |||
{'name': 'NCI1', 'dataset': '../datasets/NCI1/NCI1_A.txt'}, # node symb | |||
{'name': 'NCI109', 'dataset': '../datasets/NCI109/NCI109_A.txt'}, # node symb | |||
{'name': 'D&D', 'dataset': '../datasets/DD/DD_A.txt'}, # node symb | |||
# {'name': 'monoterpenoides', 'dataset': '../datasets/monoterpenoides/dataset_10+.ds'}, # node/edge symb | |||
# | |||
# {'name': 'Mutagenicity', 'dataset': '../datasets/Mutagenicity/Mutagenicity_A.txt'}, | |||
# # node/edge symb | |||
@@ -7,7 +7,7 @@ Created on Wed Oct 16 14:20:06 2019 | |||
""" | |||
import numpy as np | |||
from tqdm import tqdm | |||
from itertools import combinations_with_replacement | |||
from itertools import combinations_with_replacement, combinations | |||
import multiprocessing | |||
from multiprocessing import Pool | |||
from functools import partial | |||
@@ -22,110 +22,88 @@ import sys | |||
from ged import GED, get_nb_edit_operations | |||
from utils import kernel_distance_matrix | |||
def fit_GED_to_kernel_distance(Gn, node_label, edge_label, gkernel, itr_max, | |||
fitkernel=None, gamma=1.0): | |||
def fit_GED_to_kernel_distance(Gn, node_label, edge_label, gkernel, itr_max, k=4, | |||
params_ged={'lib': 'gedlibpy', 'cost': 'CONSTANT', | |||
'method': 'IPFP', 'stabilizer': None}, | |||
init_costs=[3, 3, 1, 3, 3, 1], | |||
parallel=True): | |||
# c_vi, c_vr, c_vs, c_ei, c_er, c_es or parts of them. | |||
# random.seed(1) | |||
cost_rdm = random.sample(range(1, 10), 6) | |||
# edit_costs = cost_rdm + [0] | |||
edit_costs = cost_rdm | |||
# edit_costs = [i * 0.01 for i in cost_rdm] + [0] | |||
# edit_costs = [0.2, 0.2, 0.2, 0.2, 0.2, 0] | |||
# edit_costs = [0, 0, 0.9544, 0.026, 0.0196, 0] | |||
# edit_costs = [0.008429912251810438, 0.025461055985319694, 0.2047320869225948, 0.004148727085832133, 0.0, 0] | |||
idx_cost_nonzeros = [i for i, item in enumerate(edit_costs) if item != 0] | |||
# cost_rdm = random.sample(range(1, 10), 6) | |||
# init_costs = cost_rdm + [0] | |||
# init_costs = cost_rdm | |||
init_costs = [3, 3, 1, 3, 3, 1] | |||
# init_costs = [i * 0.01 for i in cost_rdm] + [0] | |||
# init_costs = [0.2, 0.2, 0.2, 0.2, 0.2, 0] | |||
# init_costs = [0, 0, 0.9544, 0.026, 0.0196, 0] | |||
# init_costs = [0.008429912251810438, 0.025461055985319694, 0.2047320869225948, 0.004148727085832133, 0.0, 0] | |||
# idx_cost_nonzeros = [i for i, item in enumerate(edit_costs) if item != 0] | |||
# compute distances in feature space. | |||
coef_dk = 1 | |||
dis_k_mat, _, _, _ = kernel_distance_matrix(Gn, node_label, edge_label, gkernel=gkernel) | |||
dis_k_vec = [] | |||
for i in range(len(dis_k_mat)): | |||
for j in range(i, len(dis_k_mat)): | |||
# for j in range(i, len(dis_k_mat)): | |||
for j in range(i + 1, len(dis_k_mat)): | |||
dis_k_vec.append(dis_k_mat[i, j]) | |||
dis_k_vec = np.array(dis_k_vec) | |||
if fitkernel == None: | |||
dis_k_vec_ajusted = dis_k_vec | |||
elif fitkernel == 'gaussian': | |||
coef_dk = 1 / np.max(dis_k_vec) | |||
idx_dk_nonzeros = np.where(dis_k_vec != 0)[0] | |||
# remove 0's and constraint d_k between 0 and 1. | |||
dis_k_vec = dis_k_vec[idx_dk_nonzeros] * coef_dk | |||
dis_k_vec_ajusted = np.sqrt(-np.log(dis_k_vec) / gamma) | |||
residual_list = [] | |||
edit_cost_list = [] | |||
time_list = [] | |||
nb_cost_mat_list = [] | |||
# init ged. | |||
print('\ninitial:') | |||
time0 = time.time() | |||
params_ged['edit_cost_constant'] = init_costs | |||
ged_vec_init, ged_mat, n_edit_operations = compute_geds(Gn, params_ged, | |||
parallel=parallel) | |||
residual_list = [np.sqrt(np.sum(np.square(np.array(ged_vec_init) - dis_k_vec)))] | |||
time_list = [time.time() - time0] | |||
edit_cost_list = [init_costs] | |||
nb_cost_mat = np.array(n_edit_operations) | |||
nb_cost_mat_list = [nb_cost_mat] | |||
print('edit_costs:', init_costs) | |||
print('residual_list:', residual_list) | |||
for itr in range(itr_max): | |||
print('\niteration', itr) | |||
time0 = time.time() | |||
# compute GEDs and numbers of edit operations. | |||
edit_cost_constant = [i for i in edit_costs] | |||
edit_cost_list.append(edit_cost_constant) | |||
ged_all, ged_mat, n_edit_operations = compute_geds(Gn, edit_cost_constant, | |||
idx_cost_nonzeros, parallel=True) | |||
if fitkernel == None: | |||
residual = np.sqrt(np.sum(np.square(np.array(ged_all) - dis_k_vec))) | |||
elif fitkernel == 'gaussian': | |||
ged_all = np.array(ged_all)[idx_dk_nonzeros] | |||
residual = np.sqrt(np.sum(np.square( | |||
np.exp(-gamma * ged_all ** 2) / coef_dk - dis_k_vec))) | |||
residual_list.append(residual) | |||
# "fit" geds to distances in feature space by tuning edit costs using the | |||
# Least Squares Method. | |||
nb_cost_mat = np.array(n_edit_operations).T | |||
if fitkernel == 'gaussian': | |||
nb_cost_mat = nb_cost_mat[idx_dk_nonzeros] | |||
nb_cost_mat_list.append(nb_cost_mat) | |||
edit_costs_new, residual = compute_better_costs(nb_cost_mat, dis_k_vec_ajusted) | |||
print('pseudo residual:', residual) | |||
edit_costs_new, residual = update_costs(nb_cost_mat, dis_k_vec) | |||
for i in range(len(edit_costs_new)): | |||
if edit_costs_new[i] < 0: | |||
if edit_costs_new[i] > -1e-9: | |||
edit_costs_new[i] = 0 | |||
else: | |||
raise ValueError('The edit cost is negative.') | |||
for idx, item in enumerate(idx_cost_nonzeros): | |||
edit_costs[item] = edit_costs_new[idx] | |||
# for i in range(len(edit_costs_new)): | |||
# if edit_costs_new[i] < 0: | |||
# edit_costs_new[i] = 0 | |||
# compute new GEDs and numbers of edit operations. | |||
params_ged['edit_cost_constant'] = edit_costs_new | |||
ged_vec, ged_mat, n_edit_operations = compute_geds(Gn, params_ged, | |||
parallel=parallel) | |||
residual_list.append(np.sqrt(np.sum(np.square(np.array(ged_vec) - dis_k_vec)))) | |||
time_list.append(time.time() - time0) | |||
print('edit_costs:', edit_costs) | |||
edit_cost_list.append(edit_costs_new) | |||
nb_cost_mat = np.array(n_edit_operations) | |||
nb_cost_mat_list.append(nb_cost_mat) | |||
print('edit_costs:', edit_costs_new) | |||
print('residual_list:', residual_list) | |||
print() | |||
edit_cost_list.append(edit_costs) | |||
ged_all, ged_mat, n_edit_operations = compute_geds(Gn, edit_costs, | |||
idx_cost_nonzeros, parallel=True) | |||
if fitkernel == 0: | |||
residual = np.sqrt(np.sum(np.square(np.array(ged_all) - dis_k_vec))) | |||
elif fitkernel == 'gaussian': | |||
ged_all = np.array(ged_all)[idx_dk_nonzeros] | |||
residual = np.sqrt(np.sum(np.square( | |||
np.exp(-gamma * ged_all ** 2) / coef_dk - dis_k_vec))) | |||
residual_list.append(residual) | |||
nb_cost_mat_list.append(np.array(n_edit_operations).T) | |||
return edit_costs, residual_list, edit_cost_list, dis_k_mat, ged_mat, \ | |||
time_list, nb_cost_mat_list, coef_dk | |||
return edit_costs_new, residual_list, edit_cost_list, dis_k_mat, ged_mat, \ | |||
time_list, nb_cost_mat_list | |||
def compute_geds(Gn, edit_cost_constant, idx_nonzeros, parallel=False): | |||
def compute_geds(Gn, params_ged, parallel=False): | |||
ged_mat = np.zeros((len(Gn), len(Gn))) | |||
if parallel: | |||
# print('parallel') | |||
len_itr = int(len(Gn) * (len(Gn) + 1) / 2) | |||
ged_all = [0 for i in range(len_itr)] | |||
n_edit_operations = [[0 for i in range(len_itr)] for j in | |||
range(len(idx_nonzeros))] | |||
itr = combinations_with_replacement(range(0, len(Gn)), 2) | |||
# len_itr = int(len(Gn) * (len(Gn) + 1) / 2) | |||
len_itr = int(len(Gn) * (len(Gn) - 1) / 2) | |||
ged_vec = [0 for i in range(len_itr)] | |||
n_edit_operations = [0 for i in range(len_itr)] | |||
# itr = combinations_with_replacement(range(0, len(Gn)), 2) | |||
itr = combinations(range(0, len(Gn)), 2) | |||
n_jobs = multiprocessing.cpu_count() | |||
if len_itr < 100 * n_jobs: | |||
chunksize = int(len_itr / n_jobs) + 1 | |||
@@ -134,68 +112,52 @@ def compute_geds(Gn, edit_cost_constant, idx_nonzeros, parallel=False): | |||
def init_worker(gn_toshare): | |||
global G_gn | |||
G_gn = gn_toshare | |||
do_partial = partial(_wrapper_compute_ged_parallel, edit_cost_constant, | |||
idx_nonzeros) | |||
do_partial = partial(_wrapper_compute_ged_parallel, params_ged) | |||
pool = Pool(processes=n_jobs, initializer=init_worker, initargs=(Gn,)) | |||
iterator = tqdm(pool.imap_unordered(do_partial, itr, chunksize), | |||
desc='computing GEDs', file=sys.stdout) | |||
# iterator = pool.imap_unordered(do_partial, itr, chunksize) | |||
for i, j, dis, n_eo_tmp in iterator: | |||
idx_itr = int(len(Gn) * i + j - i * (i + 1) / 2) | |||
ged_all[idx_itr] = dis | |||
idx_itr = int(len(Gn) * i + j - (i + 1) * (i + 2) / 2) | |||
ged_vec[idx_itr] = dis | |||
ged_mat[i][j] = dis | |||
ged_mat[j][i] = dis | |||
for idx, item in enumerate(idx_nonzeros): | |||
n_edit_operations[idx][idx_itr] = n_eo_tmp[item] | |||
n_edit_operations[idx_itr] = n_eo_tmp | |||
# print('\n-------------------------------------------') | |||
# print(i, j, idx_itr, dis) | |||
pool.close() | |||
pool.join() | |||
else: | |||
ged_all = [] | |||
n_edit_operations = [[] for i in range(len(idx_nonzeros))] | |||
ged_vec = [] | |||
n_edit_operations = [] | |||
for i in tqdm(range(len(Gn)), desc='computing GEDs', file=sys.stdout): | |||
# for i in range(len(Gn)): | |||
for j in range(i, len(Gn)): | |||
# time0 = time.time() | |||
dis, pi_forward, pi_backward = GED(Gn[i], Gn[j], lib='gedlibpy', | |||
cost='CONSTANT', method='IPFP', | |||
edit_cost_constant=edit_cost_constant, stabilizer='min', | |||
repeat=50) | |||
# time1 = time.time() - time0 | |||
# time0 = time.time() | |||
ged_all.append(dis) | |||
for j in range(i + 1, len(Gn)): | |||
dis, pi_forward, pi_backward = GED(Gn[i], Gn[j], **params_ged) | |||
ged_vec.append(dis) | |||
ged_mat[i][j] = dis | |||
ged_mat[j][i] = dis | |||
n_eo_tmp = get_nb_edit_operations(Gn[i], Gn[j], pi_forward, pi_backward) | |||
for idx, item in enumerate(idx_nonzeros): | |||
n_edit_operations[idx].append(n_eo_tmp[item]) | |||
# time2 = time.time() - time0 | |||
# print(time1, time2, time1 / time2) | |||
n_edit_operations.append(n_eo_tmp) | |||
return ged_all, ged_mat, n_edit_operations | |||
return ged_vec, ged_mat, n_edit_operations | |||
def _wrapper_compute_ged_parallel(edit_cost_constant, idx_nonzeros, itr): | |||
def _wrapper_compute_ged_parallel(params_ged, itr): | |||
i = itr[0] | |||
j = itr[1] | |||
dis, n_eo_tmp = _compute_ged_parallel(G_gn[i], G_gn[j], edit_cost_constant, | |||
idx_nonzeros) | |||
dis, n_eo_tmp = _compute_ged_parallel(G_gn[i], G_gn[j], params_ged) | |||
return i, j, dis, n_eo_tmp | |||
def _compute_ged_parallel(g1, g2, edit_cost_constant, idx_nonzeros): | |||
dis, pi_forward, pi_backward = GED(g1, g2, lib='gedlibpy', | |||
cost='CONSTANT', method='IPFP', | |||
edit_cost_constant=edit_cost_constant, stabilizer='min', | |||
repeat=50) | |||
n_eo_tmp = get_nb_edit_operations(g1, g2, pi_forward, pi_backward) | |||
def _compute_ged_parallel(g1, g2, params_ged): | |||
dis, pi_forward, pi_backward = GED(g1, g2, **params_ged) | |||
n_eo_tmp = get_nb_edit_operations(g1, g2, pi_forward, pi_backward) | |||
return dis, n_eo_tmp | |||
def compute_better_costs(nb_cost_mat, dis_k_vec): | |||
def update_costs(nb_cost_mat, dis_k_vec): | |||
# # method 1: simple least square method. | |||
# edit_costs_new, residual, _, _ = np.linalg.lstsq(nb_cost_mat, dis_k_vec, | |||
# rcond=None) | |||
@@ -203,7 +165,7 @@ def compute_better_costs(nb_cost_mat, dis_k_vec): | |||
# # method 2: least square method with x_i >= 0. | |||
# edit_costs_new, residual = optimize.nnls(nb_cost_mat, dis_k_vec) | |||
# method 3: solve as a quadratic program with constraints: x_i >= 0, sum(x) = 1. | |||
# method 3: solve as a quadratic program with constraints. | |||
# P = np.dot(nb_cost_mat.T, nb_cost_mat) | |||
# q_T = -2 * np.dot(dis_k_vec.T, nb_cost_mat) | |||
# G = -1 * np.identity(nb_cost_mat.shape[1]) | |||
@@ -221,7 +183,7 @@ def compute_better_costs(nb_cost_mat, dis_k_vec): | |||
# h = np.array([0 for i in range(nb_cost_mat.shape[1])]) | |||
x = cp.Variable(nb_cost_mat.shape[1]) | |||
cost = cp.sum_squares(nb_cost_mat * x - dis_k_vec) | |||
constraints = [x >= [0.01 for i in range(nb_cost_mat.shape[1])], | |||
constraints = [x >= [0.0001 for i in range(nb_cost_mat.shape[1])], | |||
# np.array([1.0, 1.0, -1.0, 0.0, 0.0]).T@x >= 0.0] | |||
np.array([1.0, 1.0, -1.0, 0.0, 0.0, 0.0]).T@x >= 0.0, | |||
np.array([0.0, 0.0, 0.0, 1.0, 1.0, -1.0]).T@x >= 0.0] | |||
@@ -13,29 +13,30 @@ import multiprocessing | |||
from multiprocessing import Pool | |||
from functools import partial | |||
from gedlibpy import librariesImport, gedlibpy | |||
from gedlibpy_linlin import librariesImport, gedlibpy | |||
def GED(g1, g2, lib='gedlibpy', cost='CHEM_1', method='IPFP', | |||
edit_cost_constant=[], stabilizer='min', repeat=50): | |||
edit_cost_constant=[], algo_options='', stabilizer='min', repeat=50): | |||
""" | |||
Compute GED for 2 graphs. | |||
""" | |||
if lib == 'gedlibpy': | |||
def convertGraph(G): | |||
"""Convert a graph to the proper NetworkX format that can be | |||
recognized by library gedlibpy. | |||
""" | |||
G_new = nx.Graph() | |||
for nd, attrs in G.nodes(data=True): | |||
G_new.add_node(str(nd), chem=attrs['atom']) | |||
def convertGraph(G): | |||
"""Convert a graph to the proper NetworkX format that can be | |||
recognized by library gedlibpy. | |||
""" | |||
G_new = nx.Graph() | |||
for nd, attrs in G.nodes(data=True): | |||
G_new.add_node(str(nd), chem=attrs['atom']) | |||
# G_new.add_node(str(nd), x=str(attrs['attributes'][0]), | |||
# y=str(attrs['attributes'][1])) | |||
for nd1, nd2, attrs in G.edges(data=True): | |||
G_new.add_edge(str(nd1), str(nd2), valence=attrs['bond_type']) | |||
# G_new.add_edge(str(nd1), str(nd2)) | |||
return G_new | |||
for nd1, nd2, attrs in G.edges(data=True): | |||
# G_new.add_edge(str(nd1), str(nd2), valence=attrs['bond_type']) | |||
G_new.add_edge(str(nd1), str(nd2)) | |||
return G_new | |||
if lib == 'gedlibpy': | |||
gedlibpy.restart_env() | |||
gedlibpy.add_nx_graph(convertGraph(g1), "") | |||
gedlibpy.add_nx_graph(convertGraph(g2), "") | |||
@@ -43,12 +44,12 @@ def GED(g1, g2, lib='gedlibpy', cost='CHEM_1', method='IPFP', | |||
listID = gedlibpy.get_all_graph_ids() | |||
gedlibpy.set_edit_cost(cost, edit_cost_constant=edit_cost_constant) | |||
gedlibpy.init() | |||
gedlibpy.set_method(method, "") | |||
gedlibpy.set_method(method, algo_options) | |||
gedlibpy.init_method() | |||
g = listID[0] | |||
h = listID[1] | |||
if stabilizer == None: | |||
if stabilizer is None: | |||
gedlibpy.run_method(g, h) | |||
pi_forward = gedlibpy.get_forward_map(g, h) | |||
pi_backward = gedlibpy.get_backward_map(g, h) | |||
@@ -107,13 +108,57 @@ def GED(g1, g2, lib='gedlibpy', cost='CHEM_1', method='IPFP', | |||
dis = upper | |||
# make the map label correct (label remove map as np.inf) | |||
nodes1 = [n for n in g1.nodes()] | |||
nodes2 = [n for n in g2.nodes()] | |||
nb1 = nx.number_of_nodes(g1) | |||
nb2 = nx.number_of_nodes(g2) | |||
pi_forward = [nodes2[pi] if pi < nb2 else np.inf for pi in pi_forward] | |||
pi_backward = [nodes1[pi] if pi < nb1 else np.inf for pi in pi_backward] | |||
elif lib == 'gedlib-bash': | |||
import time | |||
import random | |||
import sys | |||
import os | |||
sys.path.insert(0, "../") | |||
from pygraph.utils.graphfiles import saveDataset | |||
tmp_dir = '/media/ljia/DATA/research-repo/codes/others/gedlib/tests_linlin/output/tmp_ged/' | |||
if not os.path.exists(tmp_dir): | |||
os.makedirs(tmp_dir) | |||
fn_collection = tmp_dir + 'collection.' + str(time.time()) + str(random.randint(0, 1e9)) | |||
xparams = {'method': 'gedlib', 'graph_dir': fn_collection} | |||
saveDataset([g1, g2], ['dummy', 'dummy'], gformat='gxl', group='xml', | |||
filename=fn_collection, xparams=xparams) | |||
command = 'GEDLIB_HOME=\'/media/ljia/DATA/research-repo/codes/others/gedlib/gedlib2\'\n' | |||
command += 'LD_LIBRARY_PATH=$LD_LIBRARY_PATH:$GEDLIB_HOME/lib\n' | |||
command += 'export LD_LIBRARY_PATH\n' | |||
command += 'cd \'/media/ljia/DATA/research-repo/codes/others/gedlib/tests_linlin/bin\'\n' | |||
command += './ged_for_python_bash monoterpenoides ' + fn_collection \ | |||
+ ' \'' + algo_options + '\' ' | |||
for ec in edit_cost_constant: | |||
command += str(ec) + ' ' | |||
# output = os.system(command) | |||
stream = os.popen(command) | |||
output = stream.readlines() | |||
# print(output) | |||
dis = float(output[0].strip()) | |||
runtime = float(output[1].strip()) | |||
size_forward = int(output[2].strip()) | |||
pi_forward = [int(item.strip()) for item in output[3:3+size_forward]] | |||
pi_backward = [int(item.strip()) for item in output[3+size_forward:]] | |||
# print(dis) | |||
# print(runtime) | |||
# print(size_forward) | |||
# print(pi_forward) | |||
# print(pi_backward) | |||
# make the map label correct (label remove map as np.inf) | |||
nodes1 = [n for n in g1.nodes()] | |||
nodes2 = [n for n in g2.nodes()] | |||
nb1 = nx.number_of_nodes(g1) | |||
nb2 = nx.number_of_nodes(g2) | |||
pi_forward = [nodes2[pi] if pi < nb2 else np.inf for pi in pi_forward] | |||
pi_backward = [nodes1[pi] if pi < nb1 else np.inf for pi in pi_backward] | |||
# print(pi_forward) | |||
return dis, pi_forward, pi_backward | |||
@@ -149,7 +194,7 @@ def GED_n(Gn, lib='gedlibpy', cost='CHEM_1', method='IPFP', | |||
g = listID[0] | |||
h = listID[1] | |||
if stabilizer == None: | |||
if stabilizer is None: | |||
gedlibpy.run_method(g, h) | |||
pi_forward = gedlibpy.get_forward_map(g, h) | |||
pi_backward = gedlibpy.get_backward_map(g, h) | |||
@@ -183,7 +228,8 @@ def GED_n(Gn, lib='gedlibpy', cost='CHEM_1', method='IPFP', | |||
def ged_median(Gn, Gn_median, verbose=False, params_ged={'lib': 'gedlibpy', | |||
'cost': 'CHEM_1', 'method': 'IPFP', 'edit_cost_constant': [], | |||
'stabilizer': 'min', 'repeat': 50}, parallel=False): | |||
'algo_options': '--threads 8 --initial-solutions 40 --ratio-runs-from-initial-solutions 1', | |||
'stabilizer': None}, parallel=False): | |||
if parallel: | |||
len_itr = int(len(Gn)) | |||
pi_forward_list = [[] for i in range(len_itr)] | |||
@@ -23,7 +23,8 @@ def iam_upgraded(Gn_median, Gn_candidate, c_ei=3, c_er=3, c_es=1, ite_max=50, | |||
connected=False, removeNodes=True, allBestInit=False, allBestNodes=False, | |||
allBestEdges=False, allBestOutput=False, | |||
params_ged={'lib': 'gedlibpy', 'cost': 'CHEM_1', 'method': 'IPFP', | |||
'edit_cost_constant': [], 'stabilizer': 'min', 'repeat': 50}): | |||
'edit_cost_constant': [], 'stabilizer': None, | |||
'algo_options': '--threads 8 --initial-solutions 40 --ratio-runs-from-initial-solutions 1'}): | |||
"""See my name, then you know what I do. | |||
""" | |||
# Gn_median = Gn_median[0:10] | |||
@@ -435,6 +436,62 @@ def iam_upgraded(Gn_median, Gn_candidate, c_ei=3, c_er=3, c_es=1, ite_max=50, | |||
return G_gen_median_list, sod_gen_median, sod_list, G_set_median_list, sod_set_median | |||
def iam_bash(Gn_names, edit_cost_constant, dataset='monoterpenoides', | |||
graph_dir='/media/ljia/DATA/research-repo/codes/Linlin/py-graph/datasets/monoterpenoides/'): | |||
"""Compute the iam by c++ implementation (gedlib) through bash. | |||
""" | |||
import os | |||
import time | |||
def createCollectionFile(Gn_names, y, filename): | |||
"""Create collection file. | |||
""" | |||
dirname_ds = os.path.dirname(filename) | |||
if dirname_ds != '': | |||
dirname_ds += '/' | |||
if not os.path.exists(dirname_ds) : | |||
os.makedirs(dirname_ds) | |||
with open(filename + '.xml', 'w') as fgroup: | |||
fgroup.write("<?xml version=\"1.0\"?>") | |||
fgroup.write("\n<!DOCTYPE GraphCollection SYSTEM \"http://www.inf.unibz.it/~blumenthal/dtd/GraphCollection.dtd\">") | |||
fgroup.write("\n<GraphCollection>") | |||
for idx, fname in enumerate(Gn_names): | |||
fgroup.write("\n\t<graph file=\"" + fname + "\" class=\"" + str(y[idx]) + "\"/>") | |||
fgroup.write("\n</GraphCollection>") | |||
fgroup.close() | |||
tmp_dir = '/media/ljia/DATA/research-repo/codes/others/gedlib/tests_linlin/output/tmp_ged/' | |||
fn_collection = tmp_dir + 'collection.' + str(time.time()) + str(random.randint(0, 1e9)) | |||
createCollectionFile(Gn_names, ['dummy'] * len(Gn_names), fn_collection) | |||
# graph_dir = '/media/ljia/DATA/research-repo/codes/others/gedlib/tests_linlin/generated_datsets/monoterpenoides/gxl' | |||
command = 'GEDLIB_HOME=\'/media/ljia/DATA/research-repo/codes/others/gedlib/gedlib2\'\n' | |||
command += 'LD_LIBRARY_PATH=$LD_LIBRARY_PATH:$GEDLIB_HOME/lib\n' | |||
command += 'export LD_LIBRARY_PATH\n' | |||
command += 'cd \'/media/ljia/DATA/research-repo/codes/others/gedlib/tests_linlin/bin\'\n' | |||
command += './iam_for_python_bash ' + dataset + ' ' + fn_collection \ | |||
+ ' \'' + graph_dir + '\' ' | |||
if edit_cost_constant is None: | |||
command += 'None' | |||
else: | |||
for ec in edit_cost_constant: | |||
command += str(ec) + ' ' | |||
# output = os.system(command) | |||
stream = os.popen(command) | |||
output = stream.readlines() | |||
# print(output) | |||
sod_sm = float(output[0].strip()) | |||
sod_gm= float(output[1].strip()) | |||
fname_sm = '/media/ljia/DATA/research-repo/codes/others/gedlib/tests_linlin/output/tmp_ged/set_median.gxl' | |||
fname_gm = '/media/ljia/DATA/research-repo/codes/others/gedlib/tests_linlin/output/tmp_ged/gen_median.gxl' | |||
return sod_sm, sod_gm, fname_sm, fname_gm | |||
############################################################################### | |||
# Old implementations. | |||
@@ -16,6 +16,319 @@ from utils import remove_edges | |||
from fitDistance import fit_GED_to_kernel_distance | |||
from utils import normalize_distance_matrix | |||
def median_paper_clcpc_python_best(): | |||
"""c_vs <= c_vi + c_vr, c_es <= c_ei + c_er with ged computation with | |||
python invoking the c++ code by bash command (with updated library). | |||
""" | |||
# ds = {'name': 'monoterpenoides', | |||
# 'dataset': '../datasets/monoterpenoides/dataset_10+.ds'} # node/edge symb | |||
# _, y_all = loadDataset(ds['dataset']) | |||
gkernel = 'untilhpathkernel' | |||
node_label = 'atom' | |||
edge_label = 'bond_type' | |||
itr_max = 6 | |||
algo_options = '--threads 8 --initial-solutions 40 --ratio-runs-from-initial-solutions 1' | |||
params_ged = {'lib': 'gedlibpy', 'cost': 'CONSTANT', 'method': 'IPFP', | |||
'algo_options': algo_options, 'stabilizer': None} | |||
y_all = ['3', '1', '4', '6', '7', '8', '9', '2'] | |||
repeats = 50 | |||
collection_path = '/media/ljia/DATA/research-repo/codes/others/gedlib/tests_linlin/generated_datsets/monoterpenoides/' | |||
graph_dir = collection_path + 'gxl/' | |||
fn_edit_costs_output = 'results/median_paper/edit_costs_output.python_init40.k10.txt' | |||
for y in y_all: | |||
for repeat in range(repeats): | |||
edit_costs_output_file = open(fn_edit_costs_output, 'a') | |||
collection_file = collection_path + 'monoterpenoides_' + y + '_' + str(repeat) + '.xml' | |||
Gn, _ = loadDataset(collection_file, extra_params=graph_dir) | |||
edit_costs, residual_list, edit_cost_list, dis_k_mat, ged_mat, time_list, \ | |||
nb_cost_mat_list = fit_GED_to_kernel_distance(Gn, node_label, edge_label, | |||
gkernel, itr_max, params_ged=params_ged, | |||
parallel=True) | |||
total_time = np.sum(time_list) | |||
# print('\nedit_costs:', edit_costs) | |||
# print('\nresidual_list:', residual_list) | |||
# print('\nedit_cost_list:', edit_cost_list) | |||
# print('\ndistance matrix in kernel space:', dis_k_mat) | |||
# print('\nged matrix:', ged_mat) | |||
# print('\ntotal time:', total_time) | |||
# print('\nnb_cost_mat:', nb_cost_mat_list[-1]) | |||
np.savez('results/median_paper/fit_distance.clcpc.python_init40.monot.elabeled.uhpkernel.y' | |||
+ y + '.repeat' + str(repeat) + '.k10..gm', | |||
edit_costs=edit_costs, | |||
residual_list=residual_list, edit_cost_list=edit_cost_list, | |||
dis_k_mat=dis_k_mat, ged_mat=ged_mat, time_list=time_list, | |||
total_time=total_time, nb_cost_mat_list=nb_cost_mat_list) | |||
for ec in edit_costs: | |||
edit_costs_output_file.write(str(ec) + ' ') | |||
edit_costs_output_file.write('\n') | |||
edit_costs_output_file.close() | |||
# # normalized distance matrices. | |||
# gmfile = np.load('results/fit_distance.cs_leq_ci_plus_cr.cost_leq_1en2.monot.elabeled.uhpkernel.gm.npz') | |||
# edit_costs = gmfile['edit_costs'] | |||
# residual_list = gmfile['residual_list'] | |||
# edit_cost_list = gmfile['edit_cost_list'] | |||
# dis_k_mat = gmfile['dis_k_mat'] | |||
# ged_mat = gmfile['ged_mat'] | |||
# total_time = gmfile['total_time'] | |||
# nb_cost_mat_list = gmfile['nb_cost_mat_list'] | |||
nb_consistent, nb_inconsistent, ratio_consistent = pairwise_substitution_consistence(dis_k_mat, ged_mat) | |||
print(nb_consistent, nb_inconsistent, ratio_consistent) | |||
# norm_dis_k_mat = normalize_distance_matrix(dis_k_mat) | |||
# plt.imshow(norm_dis_k_mat) | |||
# plt.colorbar() | |||
# plt.savefig('results/median_paper/norm_dis_k_mat.clcpc.python_best.monot.elabeled.uhpkernel.y' | |||
# + y + '.repeat' + str(repeat) + '.eps', format='eps', dpi=300) | |||
# plt.savefig('results/median_paper/norm_dis_k_mat.clcpc.python_best.monot.elabeled.uhpkernel.y' | |||
# + y + '.repeat' + str(repeat) + '.png', format='png') | |||
# # plt.show() | |||
# plt.clf() | |||
# | |||
# norm_ged_mat = normalize_distance_matrix(ged_mat) | |||
# plt.imshow(norm_ged_mat) | |||
# plt.colorbar() | |||
# plt.savefig('results/median_paper/norm_ged_mat.clcpc.python_best.monot.elabeled.uhpkernel.y' | |||
# + y + '.repeat' + str(repeat) + '.eps', format='eps', dpi=300) | |||
# plt.savefig('results/median_paper/norm_ged_mat.clcpc.python_best.monot.elabeled.uhpkernel.y' | |||
# + y + '.repeat' + str(repeat) + '.png', format='png') | |||
# # plt.show() | |||
# plt.clf() | |||
# | |||
# norm_diff = norm_ged_mat - norm_dis_k_mat | |||
# plt.imshow(norm_diff) | |||
# plt.colorbar() | |||
# plt.savefig('results/median_paper/diff_mat_norm_ged_dis_k.clcpc.python_best.monot.elabeled.uhpkernel.y' | |||
# + y + '.repeat' + str(repeat) + '.eps', format='eps', dpi=300) | |||
# plt.savefig('results/median_paper/diff_mat_norm_ged_dis_k.clcpc.python_best.monot.elabeled.uhpkernel.y' | |||
# + y + '.repeat' + str(repeat) + '.png', format='png') | |||
# # plt.show() | |||
# plt.clf() | |||
# # draw_count_bar(norm_diff) | |||
def median_paper_clcpc_python_bash_cpp(): | |||
"""c_vs <= c_vi + c_vr, c_es <= c_ei + c_er with ged computation with | |||
python invoking the c++ code by bash command (with updated library). | |||
""" | |||
# ds = {'name': 'monoterpenoides', | |||
# 'dataset': '../datasets/monoterpenoides/dataset_10+.ds'} # node/edge symb | |||
# _, y_all = loadDataset(ds['dataset']) | |||
gkernel = 'untilhpathkernel' | |||
node_label = 'atom' | |||
edge_label = 'bond_type' | |||
itr_max = 20 | |||
algo_options = '--threads 6 --initial-solutions 10 --ratio-runs-from-initial-solutions .5' | |||
params_ged = {'lib': 'gedlib-bash', 'cost': 'CONSTANT', 'method': 'IPFP', | |||
'algo_options': algo_options} | |||
y_all = ['3', '1', '4', '6', '7', '8', '9', '2'] | |||
repeats = 50 | |||
collection_path = '/media/ljia/DATA/research-repo/codes/others/gedlib/tests_linlin/generated_datsets/monoterpenoides/' | |||
graph_dir = collection_path + 'gxl/' | |||
fn_edit_costs_output = 'results/median_paper/edit_costs_output.txt' | |||
for y in y_all: | |||
for repeat in range(repeats): | |||
edit_costs_output_file = open(fn_edit_costs_output, 'a') | |||
collection_file = collection_path + 'monoterpenoides_' + y + '_' + str(repeat) + '.xml' | |||
Gn, _ = loadDataset(collection_file, extra_params=graph_dir) | |||
edit_costs, residual_list, edit_cost_list, dis_k_mat, ged_mat, time_list, \ | |||
nb_cost_mat_list, coef_dk = fit_GED_to_kernel_distance(Gn, node_label, edge_label, | |||
gkernel, itr_max, params_ged=params_ged, | |||
parallel=False) | |||
total_time = np.sum(time_list) | |||
# print('\nedit_costs:', edit_costs) | |||
# print('\nresidual_list:', residual_list) | |||
# print('\nedit_cost_list:', edit_cost_list) | |||
# print('\ndistance matrix in kernel space:', dis_k_mat) | |||
# print('\nged matrix:', ged_mat) | |||
# print('\ntotal time:', total_time) | |||
# print('\nnb_cost_mat:', nb_cost_mat_list[-1]) | |||
np.savez('results/median_paper/fit_distance.clcpc.python_bash_cpp.monot.elabeled.uhpkernel.y' | |||
+ y + '.repeat' + str(repeat) + '.gm', | |||
edit_costs=edit_costs, | |||
residual_list=residual_list, edit_cost_list=edit_cost_list, | |||
dis_k_mat=dis_k_mat, ged_mat=ged_mat, time_list=time_list, | |||
total_time=total_time, nb_cost_mat_list=nb_cost_mat_list, | |||
coef_dk=coef_dk) | |||
for ec in edit_costs: | |||
edit_costs_output_file.write(str(ec) + ' ') | |||
edit_costs_output_file.write('\n') | |||
edit_costs_output_file.close() | |||
# # normalized distance matrices. | |||
# gmfile = np.load('results/fit_distance.cs_leq_ci_plus_cr.cost_leq_1en2.monot.elabeled.uhpkernel.gm.npz') | |||
# edit_costs = gmfile['edit_costs'] | |||
# residual_list = gmfile['residual_list'] | |||
# edit_cost_list = gmfile['edit_cost_list'] | |||
# dis_k_mat = gmfile['dis_k_mat'] | |||
# ged_mat = gmfile['ged_mat'] | |||
# total_time = gmfile['total_time'] | |||
# nb_cost_mat_list = gmfile['nb_cost_mat_list'] | |||
# coef_dk = gmfile['coef_dk'] | |||
nb_consistent, nb_inconsistent, ratio_consistent = pairwise_substitution_consistence(dis_k_mat, ged_mat) | |||
print(nb_consistent, nb_inconsistent, ratio_consistent) | |||
# norm_dis_k_mat = normalize_distance_matrix(dis_k_mat) | |||
# plt.imshow(norm_dis_k_mat) | |||
# plt.colorbar() | |||
# plt.savefig('results/median_paper/norm_dis_k_mat.clcpc.python_bash_cpp.monot.elabeled.uhpkernel.y' | |||
# + y + '.repeat' + str(repeat) + '.eps', format='eps', dpi=300) | |||
# plt.savefig('results/median_paper/norm_dis_k_mat.clcpc.python_bash_cpp.monot.elabeled.uhpkernel.y' | |||
# + y + '.repeat' + str(repeat) + '.png', format='png') | |||
# # plt.show() | |||
# plt.clf() | |||
# | |||
# norm_ged_mat = normalize_distance_matrix(ged_mat) | |||
# plt.imshow(norm_ged_mat) | |||
# plt.colorbar() | |||
# plt.savefig('results/median_paper/norm_ged_mat.clcpc.python_bash_cpp.monot.elabeled.uhpkernel.y' | |||
# + y + '.repeat' + str(repeat) + '.eps', format='eps', dpi=300) | |||
# plt.savefig('results/median_paper/norm_ged_mat.clcpc.python_bash_cpp.monot.elabeled.uhpkernel.y' | |||
# + y + '.repeat' + str(repeat) + '.png', format='png') | |||
# # plt.show() | |||
# plt.clf() | |||
# | |||
# norm_diff = norm_ged_mat - norm_dis_k_mat | |||
# plt.imshow(norm_diff) | |||
# plt.colorbar() | |||
# plt.savefig('results/median_paper/diff_mat_norm_ged_dis_k.clcpc.python_bash_cpp.monot.elabeled.uhpkernel.y' | |||
# + y + '.repeat' + str(repeat) + '.eps', format='eps', dpi=300) | |||
# plt.savefig('results/median_paper/diff_mat_norm_ged_dis_k.clcpc.python_bash_cpp.monot.elabeled.uhpkernel.y' | |||
# + y + '.repeat' + str(repeat) + '.png', format='png') | |||
# # plt.show() | |||
# plt.clf() | |||
# # draw_count_bar(norm_diff) | |||
def test_cs_leq_ci_plus_cr_python_bash_cpp(): | |||
"""c_vs <= c_vi + c_vr, c_es <= c_ei + c_er with ged computation with | |||
python invoking the c++ code by bash command (with updated library). | |||
""" | |||
ds = {'name': 'monoterpenoides', | |||
'dataset': '../datasets/monoterpenoides/dataset_10+.ds'} # node/edge symb | |||
Gn, y_all = loadDataset(ds['dataset']) | |||
# Gn = Gn[0:10] | |||
gkernel = 'untilhpathkernel' | |||
node_label = 'atom' | |||
edge_label = 'bond_type' | |||
itr_max = 10 | |||
algo_options = '--threads 6 --initial-solutions 10 --ratio-runs-from-initial-solutions .5' | |||
params_ged = {'lib': 'gedlib-bash', 'cost': 'CONSTANT', 'method': 'IPFP', | |||
'algo_options': algo_options} | |||
edit_costs, residual_list, edit_cost_list, dis_k_mat, ged_mat, time_list, \ | |||
nb_cost_mat_list, coef_dk = fit_GED_to_kernel_distance(Gn, node_label, edge_label, | |||
gkernel, itr_max, params_ged=params_ged, | |||
parallel=False) | |||
total_time = np.sum(time_list) | |||
print('\nedit_costs:', edit_costs) | |||
print('\nresidual_list:', residual_list) | |||
print('\nedit_cost_list:', edit_cost_list) | |||
print('\ndistance matrix in kernel space:', dis_k_mat) | |||
print('\nged matrix:', ged_mat) | |||
print('\ntotal time:', total_time) | |||
print('\nnb_cost_mat:', nb_cost_mat_list[-1]) | |||
np.savez('results/fit_distance.cs_leq_ci_plus_cr.python_bash_cpp.monot.elabeled.uhpkernel.gm', | |||
edit_costs=edit_costs, | |||
residual_list=residual_list, edit_cost_list=edit_cost_list, | |||
dis_k_mat=dis_k_mat, ged_mat=ged_mat, time_list=time_list, | |||
total_time=total_time, nb_cost_mat_list=nb_cost_mat_list, | |||
coef_dk=coef_dk) | |||
# ds = {'name': 'MUTAG', 'dataset': '../datasets/MUTAG/MUTAG_A.txt', | |||
# 'extra_params': {}} # node/edge symb | |||
# Gn, y_all = loadDataset(ds['dataset'], extra_params=ds['extra_params']) | |||
## Gn = Gn[0:10] | |||
## remove_edges(Gn) | |||
# gkernel = 'untilhpathkernel' | |||
# node_label = 'atom' | |||
# edge_label = 'bond_type' | |||
# itr_max = 10 | |||
# edit_costs, residual_list, edit_cost_list, dis_k_mat, ged_mat, time_list, \ | |||
# nb_cost_mat_list, coef_dk = fit_GED_to_kernel_distance(Gn, node_label, edge_label, | |||
# gkernel, itr_max) | |||
# total_time = np.sum(time_list) | |||
# print('\nedit_costs:', edit_costs) | |||
# print('\nresidual_list:', residual_list) | |||
# print('\nedit_cost_list:', edit_cost_list) | |||
# print('\ndistance matrix in kernel space:', dis_k_mat) | |||
# print('\nged matrix:', ged_mat) | |||
# print('\ntotal time:', total_time) | |||
# print('\nnb_cost_mat:', nb_cost_mat_list[-1]) | |||
# np.savez('results/fit_distance.cs_leq_ci_plus_cr.mutag.elabeled.uhpkernel.gm', | |||
# edit_costs=edit_costs, | |||
# residual_list=residual_list, edit_cost_list=edit_cost_list, | |||
# dis_k_mat=dis_k_mat, ged_mat=ged_mat, time_list=time_list, | |||
# total_time=total_time, nb_cost_mat_list=nb_cost_mat_list, coef_dk) | |||
# # normalized distance matrices. | |||
# gmfile = np.load('results/fit_distance.cs_leq_ci_plus_cr.monot.elabeled.uhpkernel.gm.npz') | |||
# edit_costs = gmfile['edit_costs'] | |||
# residual_list = gmfile['residual_list'] | |||
# edit_cost_list = gmfile['edit_cost_list'] | |||
# dis_k_mat = gmfile['dis_k_mat'] | |||
# ged_mat = gmfile['ged_mat'] | |||
# total_time = gmfile['total_time'] | |||
# nb_cost_mat_list = gmfile['nb_cost_mat_list'] | |||
# coef_dk = gmfile['coef_dk'] | |||
nb_consistent, nb_inconsistent, ratio_consistent = pairwise_substitution_consistence(dis_k_mat, ged_mat) | |||
print(nb_consistent, nb_inconsistent, ratio_consistent) | |||
# dis_k_sub = pairwise_substitution(dis_k_mat) | |||
# ged_sub = pairwise_substitution(ged_mat) | |||
# np.savez('results/sub_dis_mat.cs_leq_ci_plus_cr.gm', | |||
# dis_k_sub=dis_k_sub, ged_sub=ged_sub) | |||
norm_dis_k_mat = normalize_distance_matrix(dis_k_mat) | |||
plt.imshow(norm_dis_k_mat) | |||
plt.colorbar() | |||
plt.savefig('results/norm_dis_k_mat.cs_leq_ci_plus_cr.python_bash_cpp.monot.elabeled.uhpkernel' | |||
+ '.eps', format='eps', dpi=300) | |||
plt.savefig('results/norm_dis_k_mat.cs_leq_ci_plus_cr.python_bash_cpp.monot.elabeled.uhpkernel' | |||
+ '.png', format='png') | |||
# plt.show() | |||
plt.clf() | |||
norm_ged_mat = normalize_distance_matrix(ged_mat) | |||
plt.imshow(norm_ged_mat) | |||
plt.colorbar() | |||
plt.savefig('results/norm_ged_mat.cs_leq_ci_plus_cr.python_bash_cpp.monot.elabeled.uhpkernel' | |||
+ '.eps', format='eps', dpi=300) | |||
plt.savefig('results/norm_ged_mat.cs_leq_ci_plus_cr.python_bash_cpp.monot.elabeled.uhpkernel' | |||
+ '.png', format='png') | |||
# plt.show() | |||
plt.clf() | |||
norm_diff = norm_ged_mat - norm_dis_k_mat | |||
plt.imshow(norm_diff) | |||
plt.colorbar() | |||
plt.savefig('results/diff_mat_norm_ged_dis_k.cs_leq_ci_plus_cr.python_bash_cpp.monot.elabeled.uhpkernel' | |||
+ '.eps', format='eps', dpi=300) | |||
plt.savefig('results/diff_mat_norm_ged_dis_k.cs_leq_ci_plus_cr.python_bash_cpp.monot.elabeled.uhpkernel' | |||
+ '.png', format='png') | |||
# plt.show() | |||
plt.clf() | |||
# draw_count_bar(norm_diff) | |||
def test_anycosts(): | |||
ds = {'name': 'MUTAG', 'dataset': '../datasets/MUTAG/MUTAG_A.txt', | |||
'extra_params': {}} # node/edge symb | |||
@@ -295,8 +608,12 @@ def draw_count_bar(norm_diff): | |||
if __name__ == '__main__': | |||
# test_anycosts() | |||
test_cs_leq_ci_plus_cr() | |||
# test_cs_leq_ci_plus_cr() | |||
# test_unfitted() | |||
# test_cs_leq_ci_plus_cr_python_bash_cpp() | |||
# median_paper_clcpc_python_bash_cpp() | |||
median_paper_clcpc_python_best() | |||
# x = np.array([[1,2,3],[4,5,6],[7,8,9]]) | |||
# xx = pairwise_substitution(x) |
@@ -22,6 +22,130 @@ from iam import iam_upgraded | |||
from utils import remove_edges, compute_kernel, get_same_item_indices, dis_gstar | |||
#from ged import ged_median | |||
def test_iam_monoterpenoides_with_init40(): | |||
gkernel = 'untilhpathkernel' | |||
node_label = 'atom' | |||
edge_label = 'bond_type' | |||
# unfitted edit costs. | |||
c_vi = 3 | |||
c_vr = 3 | |||
c_vs = 1 | |||
c_ei = 3 | |||
c_er = 3 | |||
c_es = 1 | |||
ite_max_iam = 50 | |||
epsilon_iam = 0.0001 | |||
removeNodes = False | |||
connected_iam = False | |||
# parameters for IAM function | |||
# ged_cost = 'CONSTANT' | |||
ged_cost = 'CONSTANT' | |||
ged_method = 'IPFP' | |||
edit_cost_constant = [c_vi, c_vr, c_vs, c_ei, c_er, c_es] | |||
ged_stabilizer = None | |||
# ged_repeat = 50 | |||
algo_options = '--threads 8 --initial-solutions 40 --ratio-runs-from-initial-solutions 1' | |||
params_ged = {'lib': 'gedlibpy', 'cost': ged_cost, 'method': ged_method, | |||
'edit_cost_constant': edit_cost_constant, | |||
'algo_options': algo_options, | |||
'stabilizer': ged_stabilizer} | |||
collection_path = '/media/ljia/DATA/research-repo/codes/others/gedlib/tests_linlin/generated_datsets/monoterpenoides/' | |||
graph_dir = collection_path + 'gxl/' | |||
y_all = ['3', '1', '4', '6', '7', '8', '9', '2'] | |||
repeats = 50 | |||
# classify graphs according to classes. | |||
time_list = [] | |||
dis_ks_min_list = [] | |||
dis_ks_set_median_list = [] | |||
sod_gs_list = [] | |||
g_best = [] | |||
sod_set_median_list = [] | |||
sod_list_list = [] | |||
for y in y_all: | |||
print('\n-------------------------------------------------------') | |||
print('class of y:', y) | |||
time_list.append([]) | |||
dis_ks_min_list.append([]) | |||
dis_ks_set_median_list.append([]) | |||
sod_gs_list.append([]) | |||
g_best.append([]) | |||
sod_set_median_list.append([]) | |||
for repeat in range(repeats): | |||
# load median set. | |||
collection_file = collection_path + 'monoterpenoides_' + y + '_' + str(repeat) + '.xml' | |||
Gn_median, _ = loadDataset(collection_file, extra_params=graph_dir) | |||
Gn_candidate = [g.copy() for g in Gn_median] | |||
time0 = time.time() | |||
G_gen_median_list, sod_gen_median, sod_list, G_set_median_list, sod_set_median \ | |||
= iam_upgraded(Gn_median, | |||
Gn_candidate, c_ei=c_ei, c_er=c_er, c_es=c_es, ite_max=ite_max_iam, | |||
epsilon=epsilon_iam, node_label=node_label, edge_label=edge_label, | |||
connected=connected_iam, removeNodes=removeNodes, | |||
params_ged=params_ged) | |||
time_total = time.time() - time0 | |||
print('\ntime: ', time_total) | |||
time_list[-1].append(time_total) | |||
g_best[-1].append(G_gen_median_list[0]) | |||
sod_set_median_list[-1].append(sod_set_median) | |||
print('\nsmallest sod of the set median:', sod_set_median) | |||
sod_gs_list[-1].append(sod_gen_median) | |||
print('\nsmallest sod in graph space:', sod_gen_median) | |||
sod_list_list.append(sod_list) | |||
# # show the best graph and save it to file. | |||
# print('one of the possible corresponding pre-images is') | |||
# nx.draw(G_gen_median_list[0], labels=nx.get_node_attributes(G_gen_median_list[0], 'atom'), | |||
# with_labels=True) | |||
## plt.show() | |||
# # plt.savefig('results/iam/mutag_median.fit_costs2.001.nb' + str(nb_median) + | |||
## plt.savefig('results/iam/paper_compare/monoter_y' + str(y_class) + | |||
## '_repeat' + str(repeat) + '_' + str(time.time()) + | |||
## '.png', format="PNG") | |||
# plt.clf() | |||
# # print(G_gen_median_list[0].nodes(data=True)) | |||
# # print(G_gen_median_list[0].edges(data=True)) | |||
print('\nsods of the set median for this class:', sod_set_median_list[-1]) | |||
print('\nsods in graph space for this class:', sod_gs_list[-1]) | |||
# print('\ndistance in kernel space of set median for this class:', | |||
# dis_ks_set_median_list[-1]) | |||
# print('\nsmallest distances in kernel space for this class:', | |||
# dis_ks_min_list[-1]) | |||
print('\ntimes for this class:', time_list[-1]) | |||
sod_set_median_list[-1] = np.mean(sod_set_median_list[-1]) | |||
sod_gs_list[-1] = np.mean(sod_gs_list[-1]) | |||
# dis_ks_set_median_list[-1] = np.mean(dis_ks_set_median_list[-1]) | |||
# dis_ks_min_list[-1] = np.mean(dis_ks_min_list[-1]) | |||
time_list[-1] = np.mean(time_list[-1]) | |||
print() | |||
print('\nmean sods of the set median for each class:', sod_set_median_list) | |||
print('\nmean sods in graph space for each class:', sod_gs_list) | |||
# print('\ndistances in kernel space of set median for each class:', | |||
# dis_ks_set_median_list) | |||
# print('\nmean smallest distances in kernel space for each class:', | |||
# dis_ks_min_list) | |||
print('\nmean times for each class:', time_list) | |||
print('\nmean sods of the set median of all:', np.mean(sod_set_median_list)) | |||
print('\nmean sods in graph space of all:', np.mean(sod_gs_list)) | |||
# print('\nmean distances in kernel space of set median of all:', | |||
# np.mean(dis_ks_set_median_list)) | |||
# print('\nmean smallest distances in kernel space of all:', | |||
# np.mean(dis_ks_min_list)) | |||
print('\nmean times of all:', np.mean(time_list)) | |||
def test_iam_monoterpenoides(): | |||
ds = {'name': 'monoterpenoides', | |||
'dataset': '../datasets/monoterpenoides/dataset_10+.ds'} # node/edge symb | |||
@@ -834,9 +958,10 @@ if __name__ == '__main__': | |||
# tests on different numbers of median-sets. | |||
# test_iam_median_nb() | |||
# test_iam_letter_h() | |||
test_iam_monoterpenoides() | |||
# test_iam_monoterpenoides() | |||
# test_iam_mutag() | |||
# test_iam_fitdistance() | |||
# print("test log") | |||
test_iam_monoterpenoides_with_init40() |
@@ -17,8 +17,10 @@ from pygraph.kernels.marginalizedKernel import marginalizedkernel | |||
from pygraph.kernels.untilHPathKernel import untilhpathkernel | |||
from pygraph.kernels.spKernel import spkernel | |||
import functools | |||
from pygraph.utils.kernels import deltakernel, gaussiankernel, kernelproduct | |||
from pygraph.utils.kernels import deltakernel, gaussiankernel, kernelproduct, polynomialkernel | |||
from pygraph.kernels.structuralspKernel import structuralspkernel | |||
from pygraph.kernels.treeletKernel import treeletkernel | |||
from pygraph.kernels.weisfeilerLehmanKernel import weisfeilerlehmankernel | |||
def remove_edges(Gn): | |||
@@ -46,18 +48,29 @@ def compute_kernel(Gn, graph_kernel, node_label, edge_label, verbose): | |||
n_jobs=multiprocessing.cpu_count(), verbose=verbose) | |||
elif graph_kernel == 'untilhpathkernel': | |||
Kmatrix, _ = untilhpathkernel(Gn, node_label=node_label, edge_label=edge_label, | |||
depth=10, k_func='MinMax', compute_method='trie', | |||
depth=7, k_func='MinMax', compute_method='trie', | |||
n_jobs=multiprocessing.cpu_count(), verbose=verbose) | |||
elif graph_kernel == 'spkernel': | |||
mixkernel = functools.partial(kernelproduct, deltakernel, gaussiankernel) | |||
Kmatrix, _, _ = spkernel(Gn, node_label='atom', node_kernels= | |||
Kmatrix, _, _ = spkernel(Gn, node_label=node_label, node_kernels= | |||
{'symb': deltakernel, 'nsymb': gaussiankernel, 'mix': mixkernel}, | |||
n_jobs=multiprocessing.cpu_count(), verbose=verbose) | |||
elif graph_kernel == 'structuralspkernel': | |||
mixkernel = functools.partial(kernelproduct, deltakernel, gaussiankernel) | |||
Kmatrix, _ = structuralspkernel(Gn, node_label='atom', node_kernels= | |||
Kmatrix, _ = structuralspkernel(Gn, node_label=node_label, node_kernels= | |||
{'symb': deltakernel, 'nsymb': gaussiankernel, 'mix': mixkernel}, | |||
n_jobs=multiprocessing.cpu_count(), verbose=verbose) | |||
elif graph_kernel == 'treeletkernel': | |||
# pkernel = functools.partial(polynomialkernel, d=2, c=1e5) | |||
pkernel = functools.partial(gaussiankernel, gamma=1e-6) | |||
mixkernel = functools.partial(kernelproduct, deltakernel, gaussiankernel) | |||
Kmatrix, _ = treeletkernel(Gn, node_label=node_label, edge_label=edge_label, | |||
sub_kernel=pkernel, | |||
n_jobs=multiprocessing.cpu_count(), verbose=verbose) | |||
elif graph_kernel == 'weisfeilerlehmankernel': | |||
Kmatrix, _ = weisfeilerlehmankernel(Gn, node_label=node_label, edge_label=edge_label, | |||
height=4, base_kernel='subtree', | |||
n_jobs=multiprocessing.cpu_count(), verbose=verbose) | |||
# normalization | |||
Kmatrix_diag = Kmatrix.diagonal().copy() | |||
@@ -79,7 +92,7 @@ def gram2distances(Kmatrix): | |||
def kernel_distance_matrix(Gn, node_label, edge_label, Kmatrix=None, gkernel=None): | |||
dis_mat = np.empty((len(Gn), len(Gn))) | |||
if Kmatrix == None: | |||
if Kmatrix is None: | |||
Kmatrix = compute_kernel(Gn, gkernel, node_label, edge_label, True) | |||
for i in range(len(Gn)): | |||
for j in range(i, len(Gn)): | |||
@@ -109,6 +122,21 @@ def get_same_item_indices(ls): | |||
return idx_dict | |||
def k_nearest_neighbors_to_median_in_kernel_space(Gn, Kmatrix=None, gkernel=None, | |||
node_label=None, edge_label=None): | |||
dis_k_all = [] # distance between g_star and each graph. | |||
alpha = [1 / len(Gn)] * len(Gn) | |||
if Kmatrix is None: | |||
Kmatrix = compute_kernel(Gn, gkernel, node_label, edge_label, True) | |||
term3 = 0 | |||
for i1, a1 in enumerate(alpha): | |||
for i2, a2 in enumerate(alpha): | |||
term3 += a1 * a2 * Kmatrix[idx_gi[i1], idx_gi[i2]] | |||
for ig, g in tqdm(enumerate(Gn_init), desc='computing distances', file=sys.stdout): | |||
dtemp = dis_gstar(ig, idx_gi, alpha, Kmatrix, term3=term3) | |||
dis_all.append(dtemp) | |||
def normalize_distance_matrix(D): | |||
max_value = np.amax(D) | |||
min_value = np.amin(D) |
@@ -124,21 +124,21 @@ def saveGXL(graph, filename, method='benoit'): | |||
# reference: https://github.com/dbblumenthal/gedlib/blob/master/data/generate_molecules.py#L22 | |||
# pass | |||
gxl_file = open(filename, 'w') | |||
gxl_file.write("<?xml version=\"1.0\"?>\n") | |||
gxl_file.write("<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n") | |||
gxl_file.write("<!DOCTYPE gxl SYSTEM \"http://www.gupro.de/GXL/gxl-1.0.dtd\">\n") | |||
gxl_file.write("<gxl>\n") | |||
gxl_file.write("<gxl xmlns:xlink=\"http://www.w3.org/1999/xlink\">\n") | |||
gxl_file.write("<graph id=\"" + str(graph.graph['name']) + "\" edgeids=\"true\" edgemode=\"undirected\">\n") | |||
for v, attrs in graph.nodes(data=True): | |||
gxl_file.write("<node id=\"_" + str(v) + "\">") | |||
gxl_file.write("<attr name=\"" + "chem" + "\"><int>" + str(attrs['atom']) + "</int></attr>") | |||
gxl_file.write("<attr name=\"" + "chem" + "\"><int>" + str(attrs['chem']) + "</int></attr>") | |||
gxl_file.write("</node>\n") | |||
for v1, v2, attrs in graph.edges(data=True): | |||
gxl_file.write("<edge from=\"_" + str(v1) + "\" to=\"_" + str(v2) + "\">") | |||
# gxl_file.write("<attr name=\"valence\"><int>" + str(attrs['bond_type']) + "</int></attr>") | |||
gxl_file.write("<attr name=\"valence\"><int>" + "1" + "</int></attr>") | |||
gxl_file.write("<attr name=\"valence\"><int>" + str(attrs['valence']) + "</int></attr>") | |||
# gxl_file.write("<attr name=\"valence\"><int>" + "1" + "</int></attr>") | |||
gxl_file.write("</edge>\n") | |||
gxl_file.write("</graph>\n") | |||
gxl_file.write("</gxl>\n") | |||
gxl_file.write("</gxl>") | |||
gxl_file.close() | |||
elif method == 'gedlib-letter': | |||
# reference: https://github.com/dbblumenthal/gedlib/blob/master/data/generate_molecules.py#L22 | |||
@@ -147,15 +147,15 @@ def saveGXL(graph, filename, method='benoit'): | |||
gxl_file.write("<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n") | |||
gxl_file.write("<!DOCTYPE gxl SYSTEM \"http://www.gupro.de/GXL/gxl-1.0.dtd\">\n") | |||
gxl_file.write("<gxl xmlns:xlink=\"http://www.w3.org/1999/xlink\">\n") | |||
gxl_file.write("<graph id=\"" + str(graph.graph['name']) + "\" edgeids=\"false\" edgemode=\"undirected\">") | |||
gxl_file.write("<graph id=\"" + str(graph.graph['name']) + "\" edgeids=\"false\" edgemode=\"undirected\">\n") | |||
for v, attrs in graph.nodes(data=True): | |||
gxl_file.write("<node id=\"_" + str(v) + "\">") | |||
gxl_file.write("<attr name=\"x\"><float>" + str(attrs['attributes'][0]) + "</float></attr>") | |||
gxl_file.write("<attr name=\"y\"><float>" + str(attrs['attributes'][1]) + "</float></attr>") | |||
gxl_file.write("</node>") | |||
gxl_file.write("</node>\n") | |||
for v1, v2, attrs in graph.edges(data=True): | |||
gxl_file.write("<edge from=\"_" + str(v1) + "\" to=\"_" + str(v2) + "\"/>") | |||
gxl_file.write("</graph>") | |||
gxl_file.write("<edge from=\"_" + str(v1) + "\" to=\"_" + str(v2) + "\"/>\n") | |||
gxl_file.write("</graph>\n") | |||
gxl_file.write("</gxl>") | |||
gxl_file.close() | |||
@@ -466,12 +466,15 @@ def loadDataset(filename, filename_y=None, extra_params=None): | |||
def loadFromXML(filename, extra_params): | |||
import xml.etree.ElementTree as ET | |||
dirname_dataset = dirname(filename) | |||
if extra_params: | |||
dirname_dataset = extra_params | |||
else: | |||
dirname_dataset = dirname(filename) | |||
tree = ET.parse(filename) | |||
root = tree.getroot() | |||
data = [] | |||
y = [] | |||
for graph in root.iter('print'): | |||
for graph in root.iter('graph'): | |||
mol_filename = graph.attrib['file'] | |||
mol_class = graph.attrib['class'] | |||
data.append(loadGXL(dirname_dataset + '/' + mol_filename)) | |||
@@ -541,15 +544,22 @@ def saveDataset(Gn, y, gformat='gxl', group=None, filename='gfile', xparams=None | |||
dirname_ds += '/' | |||
if not os.path.exists(dirname_ds) : | |||
os.makedirs(dirname_ds) | |||
if 'graph_dir' in xparams: | |||
graph_dir = xparams['graph_dir'] + '/' | |||
if not os.path.exists(graph_dir): | |||
os.makedirs(graph_dir) | |||
else: | |||
graph_dir = dirname_ds | |||
if group == 'xml' and gformat == 'gxl': | |||
with open(filename + '.xml', 'w') as fgroup: | |||
fgroup.write("<?xml version=\"1.0\"?>") | |||
fgroup.write("\n<!DOCTYPE GraphCollection SYSTEM \"https://dbblumenthal.github.io/gedlib/GraphCollection_8dtd_source.html\">") | |||
fgroup.write("\n<!DOCTYPE GraphCollection SYSTEM \"http://www.inf.unibz.it/~blumenthal/dtd/GraphCollection.dtd\">") | |||
fgroup.write("\n<GraphCollection>") | |||
for idx, g in enumerate(Gn): | |||
fname_tmp = "graph" + str(idx) + ".gxl" | |||
saveGXL(g, dirname_ds + fname_tmp, method=xparams['method']) | |||
saveGXL(g, graph_dir + fname_tmp, method=xparams['method']) | |||
fgroup.write("\n\t<graph file=\"" + fname_tmp + "\" class=\"" + str(y[idx]) + "\"/>") | |||
fgroup.write("\n</GraphCollection>") | |||
fgroup.close() | |||
@@ -558,18 +568,18 @@ def saveDataset(Gn, y, gformat='gxl', group=None, filename='gfile', xparams=None | |||
if __name__ == '__main__': | |||
# ### Load dataset from .ds file. | |||
# # .ct files. | |||
ds = {'name': 'Alkane', 'dataset': '../../datasets/Alkane/dataset.ds', | |||
'dataset_y': '../../datasets/Alkane/dataset_boiling_point_names.txt'} | |||
Gn, y = loadDataset(ds['dataset'], filename_y=ds['dataset_y']) | |||
# ds = {'name': 'Acyclic', 'dataset': '../../datasets/acyclic/dataset_bps.ds'} # node symb | |||
# Gn, y = loadDataset(ds['dataset']) | |||
# ds = {'name': 'MAO', 'dataset': '../../datasets/MAO/dataset.ds'} # node/edge symb | |||
# Gn, y = loadDataset(ds['dataset']) | |||
# ds = {'name': 'PAH', 'dataset': '../../datasets/PAH/dataset.ds'} # unlabeled | |||
# Gn, y = loadDataset(ds['dataset']) | |||
print(Gn[1].nodes(data=True)) | |||
print(Gn[1].edges(data=True)) | |||
print(y[1]) | |||
# ds = {'name': 'Alkane', 'dataset': '../../datasets/Alkane/dataset.ds', | |||
# 'dataset_y': '../../datasets/Alkane/dataset_boiling_point_names.txt'} | |||
# Gn, y = loadDataset(ds['dataset'], filename_y=ds['dataset_y']) | |||
## ds = {'name': 'Acyclic', 'dataset': '../../datasets/acyclic/dataset_bps.ds'} # node symb | |||
## Gn, y = loadDataset(ds['dataset']) | |||
## ds = {'name': 'MAO', 'dataset': '../../datasets/MAO/dataset.ds'} # node/edge symb | |||
## Gn, y = loadDataset(ds['dataset']) | |||
## ds = {'name': 'PAH', 'dataset': '../../datasets/PAH/dataset.ds'} # unlabeled | |||
## Gn, y = loadDataset(ds['dataset']) | |||
# print(Gn[1].nodes(data=True)) | |||
# print(Gn[1].edges(data=True)) | |||
# print(y[1]) | |||
# # .gxl file. | |||
# ds = {'name': 'monoterpenoides', | |||
@@ -579,6 +589,33 @@ if __name__ == '__main__': | |||
# print(Gn[1].edges(data=True)) | |||
# print(y[1]) | |||
### Convert graph from one format to another. | |||
# .gxl file. | |||
import networkx as nx | |||
ds = {'name': 'monoterpenoides', | |||
'dataset': '../../datasets/monoterpenoides/dataset_10+.ds'} # node/edge symb | |||
Gn, y = loadDataset(ds['dataset']) | |||
y = [int(i) for i in y] | |||
print(Gn[1].nodes(data=True)) | |||
print(Gn[1].edges(data=True)) | |||
print(y[1]) | |||
# Convert a graph to the proper NetworkX format that can be recognized by library gedlib. | |||
Gn_new = [] | |||
for G in Gn: | |||
G_new = nx.Graph() | |||
for nd, attrs in G.nodes(data=True): | |||
G_new.add_node(str(nd), chem=attrs['atom']) | |||
for nd1, nd2, attrs in G.edges(data=True): | |||
G_new.add_edge(str(nd1), str(nd2), valence=attrs['bond_type']) | |||
# G_new.add_edge(str(nd1), str(nd2)) | |||
Gn_new.append(G_new) | |||
print(Gn_new[1].nodes(data=True)) | |||
print(Gn_new[1].edges(data=True)) | |||
print(Gn_new[1]) | |||
filename = '/media/ljia/DATA/research-repo/codes/others/gedlib/tests_linlin/generated_datsets/monoterpenoides/gxl/monoterpenoides' | |||
xparams = {'method': 'gedlib'} | |||
saveDataset(Gn, y, gformat='gxl', group='xml', filename=filename, xparams=xparams) | |||
# ds = {'name': 'MUTAG', 'dataset': '../../datasets/MUTAG/MUTAG.mat', | |||
# 'extra_params': {'am_sp_al_nl_el': [0, 0, 3, 1, 2]}} # node/edge symb | |||
# Gn, y = loadDataset(ds['dataset'], extra_params=ds['extra_params']) |