@@ -15,24 +15,28 @@ import time | |||
import random | |||
from scipy import optimize | |||
from scipy.optimize import minimize | |||
import cvxpy as cp | |||
import sys | |||
#sys.path.insert(0, "../") | |||
from ged import GED, get_nb_edit_operations | |||
from utils import kernel_distance_matrix | |||
sys.path.insert(0, "../") | |||
from preimage.ged import GED, get_nb_edit_operations, get_nb_edit_operations_letter | |||
from preimage.utils import kernel_distance_matrix | |||
def fit_GED_to_kernel_distance(Gn, node_label, edge_label, gkernel, itr_max, k=4, | |||
def fit_GED_to_kernel_distance(Gn, node_label, edge_label, gkernel, itr_max, | |||
params_ged={'lib': 'gedlibpy', 'cost': 'CONSTANT', | |||
'method': 'IPFP', 'stabilizer': None}, | |||
init_costs=[3, 3, 1, 3, 3, 1], | |||
dataset='monoterpenoides', | |||
parallel=True): | |||
dataset = dataset.lower() | |||
# c_vi, c_vr, c_vs, c_ei, c_er, c_es or parts of them. | |||
# random.seed(1) | |||
# cost_rdm = random.sample(range(1, 10), 6) | |||
# init_costs = cost_rdm + [0] | |||
# init_costs = cost_rdm | |||
init_costs = [3, 3, 1, 3, 3, 1] | |||
# init_costs = [3, 3, 1, 3, 3, 1] | |||
# init_costs = [i * 0.01 for i in cost_rdm] + [0] | |||
# init_costs = [0.2, 0.2, 0.2, 0.2, 0.2, 0] | |||
# init_costs = [0, 0, 0.9544, 0.026, 0.0196, 0] | |||
@@ -51,8 +55,10 @@ def fit_GED_to_kernel_distance(Gn, node_label, edge_label, gkernel, itr_max, k=4 | |||
# init ged. | |||
print('\ninitial:') | |||
time0 = time.time() | |||
params_ged['dataset'] = dataset | |||
params_ged['edit_cost_constant'] = init_costs | |||
ged_vec_init, ged_mat, n_edit_operations = compute_geds(Gn, params_ged, | |||
dataset, | |||
parallel=parallel) | |||
residual_list = [np.sqrt(np.sum(np.square(np.array(ged_vec_init) - dis_k_vec)))] | |||
time_list = [time.time() - time0] | |||
@@ -67,20 +73,21 @@ def fit_GED_to_kernel_distance(Gn, node_label, edge_label, gkernel, itr_max, k=4 | |||
time0 = time.time() | |||
# "fit" geds to distances in feature space by tuning edit costs using the | |||
# Least Squares Method. | |||
edit_costs_new, residual = update_costs(nb_cost_mat, dis_k_vec) | |||
edit_costs_new, residual = update_costs(nb_cost_mat, dis_k_vec, | |||
dataset=dataset, cost=params_ged['cost']) | |||
for i in range(len(edit_costs_new)): | |||
if -1e-9 <= edit_costs_new[i] <= 1e-9: | |||
edit_costs_new[i] = 0 | |||
if edit_costs_new[i] < 0: | |||
if edit_costs_new[i] > -1e-9: | |||
edit_costs_new[i] = 0 | |||
else: | |||
raise ValueError('The edit cost is negative.') | |||
raise ValueError('The edit cost is negative.') | |||
# for i in range(len(edit_costs_new)): | |||
# if edit_costs_new[i] < 0: | |||
# edit_costs_new[i] = 0 | |||
# compute new GEDs and numbers of edit operations. | |||
params_ged['edit_cost_constant'] = edit_costs_new | |||
ged_vec, ged_mat, n_edit_operations = compute_geds(Gn, params_ged, | |||
params_ged['edit_cost_constant'] = edit_costs_new # np.array([edit_costs_new[0], edit_costs_new[1], 0.75]) | |||
ged_vec, ged_mat, n_edit_operations = compute_geds(Gn, params_ged, | |||
dataset, | |||
parallel=parallel) | |||
residual_list.append(np.sqrt(np.sum(np.square(np.array(ged_vec) - dis_k_vec)))) | |||
time_list.append(time.time() - time0) | |||
@@ -94,7 +101,8 @@ def fit_GED_to_kernel_distance(Gn, node_label, edge_label, gkernel, itr_max, k=4 | |||
time_list, nb_cost_mat_list | |||
def compute_geds(Gn, params_ged, parallel=False): | |||
def compute_geds(Gn, params_ged, dataset, parallel=False): | |||
get_nb_eo = get_nb_edit_operations_letter if dataset == 'letter' else get_nb_edit_operations | |||
ged_mat = np.zeros((len(Gn), len(Gn))) | |||
if parallel: | |||
# print('parallel') | |||
@@ -112,7 +120,7 @@ def compute_geds(Gn, params_ged, parallel=False): | |||
def init_worker(gn_toshare): | |||
global G_gn | |||
G_gn = gn_toshare | |||
do_partial = partial(_wrapper_compute_ged_parallel, params_ged) | |||
do_partial = partial(_wrapper_compute_ged_parallel, params_ged, get_nb_eo) | |||
pool = Pool(processes=n_jobs, initializer=init_worker, initargs=(Gn,)) | |||
iterator = tqdm(pool.imap_unordered(do_partial, itr, chunksize), | |||
desc='computing GEDs', file=sys.stdout) | |||
@@ -138,26 +146,146 @@ def compute_geds(Gn, params_ged, parallel=False): | |||
ged_vec.append(dis) | |||
ged_mat[i][j] = dis | |||
ged_mat[j][i] = dis | |||
n_eo_tmp = get_nb_edit_operations(Gn[i], Gn[j], pi_forward, pi_backward) | |||
n_eo_tmp = get_nb_eo(Gn[i], Gn[j], pi_forward, pi_backward) | |||
n_edit_operations.append(n_eo_tmp) | |||
return ged_vec, ged_mat, n_edit_operations | |||
def _wrapper_compute_ged_parallel(params_ged, itr): | |||
def _wrapper_compute_ged_parallel(params_ged, get_nb_eo, itr): | |||
i = itr[0] | |||
j = itr[1] | |||
dis, n_eo_tmp = _compute_ged_parallel(G_gn[i], G_gn[j], params_ged) | |||
dis, n_eo_tmp = _compute_ged_parallel(G_gn[i], G_gn[j], params_ged, get_nb_eo) | |||
return i, j, dis, n_eo_tmp | |||
def _compute_ged_parallel(g1, g2, params_ged): | |||
def _compute_ged_parallel(g1, g2, params_ged, get_nb_eo): | |||
dis, pi_forward, pi_backward = GED(g1, g2, **params_ged) | |||
n_eo_tmp = get_nb_edit_operations(g1, g2, pi_forward, pi_backward) | |||
n_eo_tmp = get_nb_eo(g1, g2, pi_forward, pi_backward) # [0,0,0,0,0,0] | |||
return dis, n_eo_tmp | |||
def update_costs(nb_cost_mat, dis_k_vec): | |||
def update_costs(nb_cost_mat, dis_k_vec, dataset='monoterpenoides', | |||
cost='CONSTANT', rw_constraints='2constraints'): | |||
if dataset.lower() == 'letter': | |||
if cost == 'LETTER': | |||
pass | |||
# # method 1: set alpha automatically, just tune c_vir and c_eir by | |||
# # LMS using cvxpy. | |||
# alpha = 0.5 | |||
# coeff = 100 # np.max(alpha * nb_cost_mat[:,4] / dis_k_vec) | |||
## if np.count_nonzero(nb_cost_mat[:,4]) == 0: | |||
## alpha = 0.75 | |||
## else: | |||
## alpha = np.min([dis_k_vec / c_vs for c_vs in nb_cost_mat[:,4] if c_vs != 0]) | |||
## alpha = alpha * 0.99 | |||
# param_vir = alpha * (nb_cost_mat[:,0] + nb_cost_mat[:,1]) | |||
# param_eir = (1 - alpha) * (nb_cost_mat[:,4] + nb_cost_mat[:,5]) | |||
# nb_cost_mat_new = np.column_stack((param_vir, param_eir)) | |||
# dis_new = coeff * dis_k_vec - alpha * nb_cost_mat[:,3] | |||
# | |||
# x = cp.Variable(nb_cost_mat_new.shape[1]) | |||
# cost = cp.sum_squares(nb_cost_mat_new * x - dis_new) | |||
# constraints = [x >= [0.0 for i in range(nb_cost_mat_new.shape[1])]] | |||
# prob = cp.Problem(cp.Minimize(cost), constraints) | |||
# prob.solve() | |||
# edit_costs_new = x.value | |||
# edit_costs_new = np.array([edit_costs_new[0], edit_costs_new[1], alpha]) | |||
# residual = np.sqrt(prob.value) | |||
# # method 2: tune c_vir, c_eir and alpha by nonlinear programming by | |||
# # scipy.optimize.minimize. | |||
# w0 = nb_cost_mat[:,0] + nb_cost_mat[:,1] | |||
# w1 = nb_cost_mat[:,4] + nb_cost_mat[:,5] | |||
# w2 = nb_cost_mat[:,3] | |||
# w3 = dis_k_vec | |||
# func_min = lambda x: np.sum((w0 * x[0] * x[3] + w1 * x[1] * (1 - x[2]) \ | |||
# + w2 * x[2] - w3 * x[3]) ** 2) | |||
# bounds = ((0, None), (0., None), (0.5, 0.5), (0, None)) | |||
# res = minimize(func_min, [0.9, 1.7, 0.75, 10], bounds=bounds) | |||
# edit_costs_new = res.x[0:3] | |||
# residual = res.fun | |||
# method 3: tune c_vir, c_eir and alpha by nonlinear programming using cvxpy. | |||
# # method 4: tune c_vir, c_eir and alpha by QP function | |||
# # scipy.optimize.least_squares. An initial guess is required. | |||
# w0 = nb_cost_mat[:,0] + nb_cost_mat[:,1] | |||
# w1 = nb_cost_mat[:,4] + nb_cost_mat[:,5] | |||
# w2 = nb_cost_mat[:,3] | |||
# w3 = dis_k_vec | |||
# func = lambda x: (w0 * x[0] * x[3] + w1 * x[1] * (1 - x[2]) \ | |||
# + w2 * x[2] - w3 * x[3]) ** 2 | |||
# res = optimize.root(func, [0.9, 1.7, 0.75, 100]) | |||
# edit_costs_new = res.x | |||
# residual = None | |||
elif cost == 'LETTER2': | |||
# # 1. if c_vi != c_vr, c_ei != c_er. | |||
# nb_cost_mat_new = nb_cost_mat[:,[0,1,3,4,5]] | |||
# x = cp.Variable(nb_cost_mat_new.shape[1]) | |||
# cost_fun = cp.sum_squares(nb_cost_mat_new * x - dis_k_vec) | |||
## # 1.1 no constraints. | |||
## constraints = [x >= [0.0 for i in range(nb_cost_mat_new.shape[1])]] | |||
# # 1.2 c_vs <= c_vi + c_vr. | |||
# constraints = [x >= [0.0 for i in range(nb_cost_mat_new.shape[1])], | |||
# np.array([1.0, 1.0, -1.0, 0.0, 0.0]).T@x >= 0.0] | |||
## # 2. if c_vi == c_vr, c_ei == c_er. | |||
## nb_cost_mat_new = nb_cost_mat[:,[0,3,4]] | |||
## nb_cost_mat_new[:,0] += nb_cost_mat[:,1] | |||
## nb_cost_mat_new[:,2] += nb_cost_mat[:,5] | |||
## x = cp.Variable(nb_cost_mat_new.shape[1]) | |||
## cost_fun = cp.sum_squares(nb_cost_mat_new * x - dis_k_vec) | |||
## # 2.1 no constraints. | |||
## constraints = [x >= [0.0 for i in range(nb_cost_mat_new.shape[1])]] | |||
### # 2.2 c_vs <= c_vi + c_vr. | |||
### constraints = [x >= [0.0 for i in range(nb_cost_mat_new.shape[1])], | |||
### np.array([2.0, -1.0, 0.0]).T@x >= 0.0] | |||
# | |||
# prob = cp.Problem(cp.Minimize(cost_fun), constraints) | |||
# prob.solve() | |||
# edit_costs_new = [x.value[0], x.value[0], x.value[1], x.value[2], x.value[2]] | |||
# edit_costs_new = np.array(edit_costs_new) | |||
# residual = np.sqrt(prob.value) | |||
if rw_constraints == 'inequality': | |||
# c_vs <= c_vi + c_vr. | |||
nb_cost_mat_new = nb_cost_mat[:,[0,1,3,4,5]] | |||
x = cp.Variable(nb_cost_mat_new.shape[1]) | |||
cost_fun = cp.sum_squares(nb_cost_mat_new * x - dis_k_vec) | |||
constraints = [x >= [0.01 for i in range(nb_cost_mat_new.shape[1])], | |||
np.array([1.0, 1.0, -1.0, 0.0, 0.0]).T@x >= 0.0] | |||
prob = cp.Problem(cp.Minimize(cost_fun), constraints) | |||
prob.solve() | |||
edit_costs_new = x.value | |||
residual = np.sqrt(prob.value) | |||
elif rw_constraints == '2constraints': | |||
# c_vs <= c_vi + c_vr and c_vi == c_vr, c_ei == c_er. | |||
nb_cost_mat_new = nb_cost_mat[:,[0,1,3,4,5]] | |||
x = cp.Variable(nb_cost_mat_new.shape[1]) | |||
cost_fun = cp.sum_squares(nb_cost_mat_new * x - dis_k_vec) | |||
constraints = [x >= [0.01 for i in range(nb_cost_mat_new.shape[1])], | |||
np.array([1.0, 1.0, -1.0, 0.0, 0.0]).T@x >= 0.0, | |||
np.array([1.0, -1.0, 0.0, 0.0, 0.0]).T@x == 0.0, | |||
np.array([0.0, 0.0, 0.0, 1.0, -1.0]).T@x == 0.0] | |||
prob = cp.Problem(cp.Minimize(cost_fun), constraints) | |||
prob.solve() | |||
edit_costs_new = x.value | |||
residual = np.sqrt(prob.value) | |||
# elif method == 'inequality_modified': | |||
# # c_vs <= c_vi + c_vr. | |||
# nb_cost_mat_new = nb_cost_mat[:,[0,1,3,4,5]] | |||
# x = cp.Variable(nb_cost_mat_new.shape[1]) | |||
# cost_fun = cp.sum_squares(nb_cost_mat_new * x - dis_k_vec) | |||
# constraints = [x >= [0.0 for i in range(nb_cost_mat_new.shape[1])], | |||
# np.array([1.0, 1.0, -1.0, 0.0, 0.0]).T@x >= 0.0] | |||
# prob = cp.Problem(cp.Minimize(cost_fun), constraints) | |||
# prob.solve() | |||
# # use same costs for insertion and removal rather than the fitted costs. | |||
# edit_costs_new = [x.value[0], x.value[0], x.value[1], x.value[2], x.value[2]] | |||
# edit_costs_new = np.array(edit_costs_new) | |||
# residual = np.sqrt(prob.value) | |||
else: | |||
# # method 1: simple least square method. | |||
# edit_costs_new, residual, _, _ = np.linalg.lstsq(nb_cost_mat, dis_k_vec, | |||
# rcond=None) | |||
@@ -181,16 +309,16 @@ def update_costs(nb_cost_mat, dis_k_vec): | |||
# G = -1 * np.identity(nb_cost_mat.shape[1]) | |||
# h = np.array([0 for i in range(nb_cost_mat.shape[1])]) | |||
x = cp.Variable(nb_cost_mat.shape[1]) | |||
cost = cp.sum_squares(nb_cost_mat * x - dis_k_vec) | |||
constraints = [x >= [0.0001 for i in range(nb_cost_mat.shape[1])], | |||
# np.array([1.0, 1.0, -1.0, 0.0, 0.0]).T@x >= 0.0] | |||
np.array([1.0, 1.0, -1.0, 0.0, 0.0, 0.0]).T@x >= 0.0, | |||
np.array([0.0, 0.0, 0.0, 1.0, 1.0, -1.0]).T@x >= 0.0] | |||
prob = cp.Problem(cp.Minimize(cost), constraints) | |||
prob.solve() | |||
edit_costs_new = x.value | |||
residual = np.sqrt(prob.value) | |||
x = cp.Variable(nb_cost_mat.shape[1]) | |||
cost_fun = cp.sum_squares(nb_cost_mat * x - dis_k_vec) | |||
constraints = [x >= [0.0 for i in range(nb_cost_mat.shape[1])], | |||
# np.array([1.0, 1.0, -1.0, 0.0, 0.0]).T@x >= 0.0] | |||
np.array([1.0, 1.0, -1.0, 0.0, 0.0, 0.0]).T@x >= 0.0, | |||
np.array([0.0, 0.0, 0.0, 1.0, 1.0, -1.0]).T@x >= 0.0] | |||
prob = cp.Problem(cp.Minimize(cost_fun), constraints) | |||
prob.solve() | |||
edit_costs_new = x.value | |||
residual = np.sqrt(prob.value) | |||
# method 4: | |||
@@ -13,33 +13,46 @@ import multiprocessing | |||
from multiprocessing import Pool | |||
from functools import partial | |||
from gedlibpy_linlin import librariesImport, gedlibpy | |||
#from gedlibpy_linlin import librariesImport, gedlibpy | |||
from libs import * | |||
def GED(g1, g2, lib='gedlibpy', cost='CHEM_1', method='IPFP', | |||
def GED(g1, g2, dataset='monoterpenoides', lib='gedlibpy', cost='CHEM_1', method='IPFP', | |||
edit_cost_constant=[], algo_options='', stabilizer='min', repeat=50): | |||
""" | |||
Compute GED for 2 graphs. | |||
""" | |||
def convertGraph(G): | |||
def convertGraph(G, dataset): | |||
"""Convert a graph to the proper NetworkX format that can be | |||
recognized by library gedlibpy. | |||
""" | |||
G_new = nx.Graph() | |||
for nd, attrs in G.nodes(data=True): | |||
G_new.add_node(str(nd), chem=attrs['atom']) | |||
# G_new.add_node(str(nd), x=str(attrs['attributes'][0]), | |||
# y=str(attrs['attributes'][1])) | |||
for nd1, nd2, attrs in G.edges(data=True): | |||
# G_new.add_edge(str(nd1), str(nd2), valence=attrs['bond_type']) | |||
G_new.add_edge(str(nd1), str(nd2)) | |||
if dataset == 'monoterpenoides': | |||
for nd, attrs in G.nodes(data=True): | |||
G_new.add_node(str(nd), chem=attrs['atom']) | |||
for nd1, nd2, attrs in G.edges(data=True): | |||
G_new.add_edge(str(nd1), str(nd2), valence=attrs['bond_type']) | |||
elif dataset == 'letter': | |||
for nd, attrs in G.nodes(data=True): | |||
G_new.add_node(str(nd), x=str(attrs['attributes'][0]), | |||
y=str(attrs['attributes'][1])) | |||
for nd1, nd2, attrs in G.edges(data=True): | |||
G_new.add_edge(str(nd1), str(nd2)) | |||
else: | |||
for nd, attrs in G.nodes(data=True): | |||
G_new.add_node(str(nd), chem=attrs['atom']) | |||
for nd1, nd2, attrs in G.edges(data=True): | |||
G_new.add_edge(str(nd1), str(nd2), valence=attrs['bond_type']) | |||
# G_new.add_edge(str(nd1), str(nd2)) | |||
return G_new | |||
dataset = dataset.lower() | |||
if lib == 'gedlibpy': | |||
gedlibpy.restart_env() | |||
gedlibpy.add_nx_graph(convertGraph(g1), "") | |||
gedlibpy.add_nx_graph(convertGraph(g2), "") | |||
gedlibpy.add_nx_graph(convertGraph(g1, dataset), "") | |||
gedlibpy.add_nx_graph(convertGraph(g2, dataset), "") | |||
listID = gedlibpy.get_all_graph_ids() | |||
gedlibpy.set_edit_cost(cost, edit_cost_constant=edit_cost_constant) | |||
@@ -320,6 +333,60 @@ def get_nb_edit_operations(g1, g2, forward_map, backward_map): | |||
# one of the nodes is removed, thus the edge is removed. | |||
if forward_map[idx1] == np.inf or forward_map[idx2] == np.inf: | |||
n_er += 1 | |||
# corresponding edge is in g2. | |||
elif (forward_map[idx1], forward_map[idx2]) in g2.edges(): | |||
nb_edges2_cnted += 1 | |||
# edge labels are different. | |||
if g2.edges[((forward_map[idx1], forward_map[idx2]))]['bond_type'] \ | |||
!= g1.edges[(n1, n2)]['bond_type']: | |||
n_es += 1 | |||
elif (forward_map[idx2], forward_map[idx1]) in g2.edges(): | |||
nb_edges2_cnted += 1 | |||
# edge labels are different. | |||
if g2.edges[((forward_map[idx2], forward_map[idx1]))]['bond_type'] \ | |||
!= g1.edges[(n1, n2)]['bond_type']: | |||
n_es += 1 | |||
# corresponding nodes are in g2, however the edge is removed. | |||
else: | |||
n_er += 1 | |||
n_ei = nx.number_of_edges(g2) - nb_edges2_cnted | |||
return n_vi, n_vr, n_vs, n_ei, n_er, n_es | |||
def get_nb_edit_operations_letter(g1, g2, forward_map, backward_map): | |||
"""Compute the number of each edit operations. | |||
""" | |||
n_vi = 0 | |||
n_vr = 0 | |||
n_vs = 0 | |||
sod_vs = 0 | |||
n_ei = 0 | |||
n_er = 0 | |||
nodes1 = [n for n in g1.nodes()] | |||
for i, map_i in enumerate(forward_map): | |||
if map_i == np.inf: | |||
n_vr += 1 | |||
else: | |||
n_vs += 1 | |||
diff_x = float(g1.nodes[i]['x']) - float(g2.nodes[map_i]['x']) | |||
diff_y = float(g1.nodes[i]['y']) - float(g2.nodes[map_i]['y']) | |||
sod_vs += np.sqrt(np.square(diff_x) + np.square(diff_y)) | |||
for map_i in backward_map: | |||
if map_i == np.inf: | |||
n_vi += 1 | |||
# idx_nodes1 = range(0, len(node1)) | |||
edges1 = [e for e in g1.edges()] | |||
nb_edges2_cnted = 0 | |||
for n1, n2 in edges1: | |||
idx1 = nodes1.index(n1) | |||
idx2 = nodes1.index(n2) | |||
# one of the nodes is removed, thus the edge is removed. | |||
if forward_map[idx1] == np.inf or forward_map[idx2] == np.inf: | |||
n_er += 1 | |||
# corresponding edge is in g2. Edge label is not considered. | |||
elif (forward_map[idx1], forward_map[idx2]) in g2.edges() or \ | |||
(forward_map[idx2], forward_map[idx1]) in g2.edges(): | |||
@@ -329,4 +396,8 @@ def get_nb_edit_operations(g1, g2, forward_map, backward_map): | |||
n_er += 1 | |||
n_ei = nx.number_of_edges(g2) - nb_edges2_cnted | |||
return n_vi, n_vr, n_vs, n_ei, n_er, n_es | |||
return n_vi, n_vr, n_vs, sod_vs, n_ei, n_er | |||
if __name__ == '__main__': | |||
print('check test_ged.py') |
@@ -436,7 +436,8 @@ def iam_upgraded(Gn_median, Gn_candidate, c_ei=3, c_er=3, c_es=1, ite_max=50, | |||
return G_gen_median_list, sod_gen_median, sod_list, G_set_median_list, sod_set_median | |||
def iam_bash(Gn_names, edit_cost_constant, dataset='monoterpenoides', | |||
def iam_bash(Gn_names, edit_cost_constant, cost='CONSTANT', | |||
dataset='monoterpenoides', | |||
graph_dir='/media/ljia/DATA/research-repo/codes/Linlin/py-graph/datasets/monoterpenoides/'): | |||
"""Compute the iam by c++ implementation (gedlib) through bash. | |||
""" | |||
@@ -467,12 +468,12 @@ def iam_bash(Gn_names, edit_cost_constant, dataset='monoterpenoides', | |||
# graph_dir = '/media/ljia/DATA/research-repo/codes/others/gedlib/tests_linlin/generated_datsets/monoterpenoides/gxl' | |||
command = 'GEDLIB_HOME=\'/media/ljia/DATA/research-repo/codes/others/gedlib/gedlib2\'\n' | |||
command = 'GEDLIB_HOME=\'/media/ljia/DATA/research-repo/codes/Linlin/gedlib\'\n' | |||
command += 'LD_LIBRARY_PATH=$LD_LIBRARY_PATH:$GEDLIB_HOME/lib\n' | |||
command += 'export LD_LIBRARY_PATH\n' | |||
command += 'cd \'/media/ljia/DATA/research-repo/codes/others/gedlib/tests_linlin/bin\'\n' | |||
command += './iam_for_python_bash ' + dataset + ' ' + fn_collection \ | |||
+ ' \'' + graph_dir + '\' ' | |||
+ ' \'' + graph_dir + '\' ' + ' ' + cost + ' ' | |||
if edit_cost_constant is None: | |||
command += 'None' | |||
else: | |||
@@ -484,7 +485,7 @@ def iam_bash(Gn_names, edit_cost_constant, dataset='monoterpenoides', | |||
output = stream.readlines() | |||
# print(output) | |||
sod_sm = float(output[0].strip()) | |||
sod_gm= float(output[1].strip()) | |||
sod_gm = float(output[1].strip()) | |||
fname_sm = '/media/ljia/DATA/research-repo/codes/others/gedlib/tests_linlin/output/tmp_ged/set_median.gxl' | |||
fname_gm = '/media/ljia/DATA/research-repo/codes/others/gedlib/tests_linlin/output/tmp_ged/gen_median.gxl' | |||
@@ -31,8 +31,9 @@ from fitDistance import fit_GED_to_kernel_distance | |||
def median_on_k_closest_graphs(Gn, node_label, edge_label, gkernel, k, fit_method, | |||
graph_dir='/media/ljia/DATA/research-repo/codes/Linlin/py-graph/datasets/monoterpenoides/', | |||
edit_costs=None, group_min=None, dataset='monoterpenoides', | |||
parallel=True): | |||
cost='CONSTANT', parallel=True): | |||
dataset = dataset.lower() | |||
# # compute distances in kernel space. | |||
# dis_mat, _, _, _ = kernel_distance_matrix(Gn, node_label, edge_label, | |||
# Kmatrix=None, gkernel=gkernel) | |||
@@ -50,32 +51,53 @@ def median_on_k_closest_graphs(Gn, node_label, edge_label, gkernel, k, fit_metho | |||
# group_min = (12, 13, 22, 29) # closest w.r.t path kernel | |||
# group_min = (77, 85, 160, 171) # closest w.r.t ged | |||
# group_min = (0,1,2,3,4,5,6,7,8,9,10,11) # closest w.r.t treelet kernel | |||
Gn_median = [Gn[g].copy() for g in group_min] | |||
# fit edit costs. | |||
if fit_method == 'random': # random | |||
edit_cost_constant = random.sample(range(1, 10), 6) | |||
if cost == 'LETTER': | |||
edit_cost_constant = random.sample(range(1, 10), 3) | |||
edit_cost_constant = [item * 0.1 for item in edit_cost_constant] | |||
elif cost == 'LETTER2': | |||
random.seed(time.time()) | |||
edit_cost_constant = random.sample(range(1, 10), 5) | |||
# edit_cost_constant = [item * 0.1 for item in edit_cost_constant] | |||
else: | |||
edit_cost_constant = random.sample(range(1, 10), 6) | |||
print('edit costs used:', edit_cost_constant) | |||
elif fit_method == 'expert': # expert | |||
edit_cost_constant = [3, 3, 1, 3, 3, 1] | |||
elif fit_method == 'k-graphs': | |||
itr_max = 6 | |||
algo_options = '--threads 8 --initial-solutions 40 --ratio-runs-from-initial-solutions 1' | |||
params_ged = {'lib': 'gedlibpy', 'cost': 'CONSTANT', 'method': 'IPFP', | |||
'algo_options': algo_options, 'stabilizer': None} | |||
if cost == 'LETTER': | |||
init_costs = [0.9, 1.7, 0.75] | |||
elif cost == 'LETTER2': | |||
init_costs = [0.675, 0.675, 0.75, 0.425, 0.425] | |||
else: | |||
init_costs = [3, 3, 1, 3, 3, 1] | |||
algo_options = '--threads 1 --initial-solutions 40 --ratio-runs-from-initial-solutions 1' | |||
params_ged = {'lib': 'gedlibpy', 'cost': cost, 'method': 'IPFP', | |||
'algo_options': algo_options, 'stabilizer': None} | |||
# fit on k-graph subset | |||
edit_cost_constant, _, _, _, _, _, _ = fit_GED_to_kernel_distance(Gn_median, | |||
node_label, edge_label, gkernel, itr_max, params_ged=params_ged, parallel=True) | |||
node_label, edge_label, gkernel, itr_max, params_ged=params_ged, | |||
init_costs=init_costs, dataset=dataset, parallel=True) | |||
elif fit_method == 'whole-dataset': | |||
itr_max = 6 | |||
algo_options = '--threads 8 --initial-solutions 40 --ratio-runs-from-initial-solutions 1' | |||
params_ged = {'lib': 'gedlibpy', 'cost': 'CONSTANT', 'method': 'IPFP', | |||
if cost == 'LETTER': | |||
init_costs = [0.9, 1.7, 0.75] | |||
elif cost == 'LETTER2': | |||
init_costs = [0.675, 0.675, 0.75, 0.425, 0.425] | |||
else: | |||
init_costs = [3, 3, 1, 3, 3, 1] | |||
algo_options = '--threads 1 --initial-solutions 40 --ratio-runs-from-initial-solutions 1' | |||
params_ged = {'lib': 'gedlibpy', 'cost': cost, 'method': 'IPFP', | |||
'algo_options': algo_options, 'stabilizer': None} | |||
# fit on all subset | |||
edit_cost_constant, _, _, _, _, _, _ = fit_GED_to_kernel_distance(Gn, | |||
node_label, edge_label, gkernel, itr_max, params_ged=params_ged, parallel=True) | |||
node_label, edge_label, gkernel, itr_max, params_ged=params_ged, | |||
init_costs=init_costs, dataset=dataset, parallel=True) | |||
elif fit_method == 'precomputed': | |||
edit_cost_constant = edit_costs | |||
@@ -83,14 +105,17 @@ def median_on_k_closest_graphs(Gn, node_label, edge_label, gkernel, k, fit_metho | |||
# compute set median and gen median using IAM (C++ through bash). | |||
group_fnames = [Gn[g].graph['filename'] for g in group_min] | |||
sod_sm, sod_gm, fname_sm, fname_gm = iam_bash(group_fnames, edit_cost_constant, | |||
graph_dir=graph_dir, dataset=dataset) | |||
cost=cost, graph_dir=graph_dir, | |||
dataset=dataset) | |||
# compute distances in kernel space. | |||
Gn_median = [Gn[g].copy() for g in group_min] | |||
set_median = loadGXL(fname_sm) | |||
gen_median = loadGXL(fname_gm) | |||
if dataset == 'Letter': | |||
# print(gen_median.nodes(data=True)) | |||
# print(gen_median.edges(data=True)) | |||
if dataset == 'letter': | |||
for g in Gn_median: | |||
reform_attributes(g) | |||
reform_attributes(set_median) | |||
@@ -98,16 +123,19 @@ def median_on_k_closest_graphs(Gn, node_label, edge_label, gkernel, k, fit_metho | |||
# compute distance in kernel space for set median. | |||
Kmatrix_sm = compute_kernel([set_median] + Gn_median, gkernel, | |||
None if dataset == 'Letter' else 'chem', | |||
None if dataset == 'Letter' else 'valence', | |||
None if dataset == 'letter' else 'chem', | |||
None if dataset == 'letter' else 'valence', | |||
False) | |||
dis_k_sm = dis_gstar(0, range(1, 1+len(Gn_median)), | |||
[1 / len(Gn_median)] * len(Gn_median), Kmatrix_sm, withterm3=False) | |||
# print(gen_median.nodes(data=True)) | |||
# print(gen_median.edges(data=True)) | |||
# print(set_median.nodes(data=True)) | |||
# print(set_median.edges(data=True)) | |||
# compute distance in kernel space for generalized median. | |||
Kmatrix_gm = compute_kernel([gen_median] + Gn_median, gkernel, | |||
None if dataset == 'Letter' else 'chem', | |||
None if dataset == 'Letter' else 'valence', | |||
None if dataset == 'letter' else 'chem', | |||
None if dataset == 'letter' else 'valence', | |||
False) | |||
dis_k_gm = dis_gstar(0, range(1, 1+len(Gn_median)), | |||
[1 / len(Gn_median)] * len(Gn_median), Kmatrix_gm, withterm3=False) | |||
@@ -61,8 +61,8 @@ def compute_kernel(Gn, graph_kernel, node_label, edge_label, verbose): | |||
{'symb': deltakernel, 'nsymb': gaussiankernel, 'mix': mixkernel}, | |||
n_jobs=multiprocessing.cpu_count(), verbose=verbose) | |||
elif graph_kernel == 'treeletkernel': | |||
# pkernel = functools.partial(polynomialkernel, d=2, c=1e5) | |||
pkernel = functools.partial(gaussiankernel, gamma=1e-6) | |||
pkernel = functools.partial(polynomialkernel, d=2, c=1e5) | |||
# pkernel = functools.partial(gaussiankernel, gamma=1e-6) | |||
mixkernel = functools.partial(kernelproduct, deltakernel, gaussiankernel) | |||
Kmatrix, _ = treeletkernel(Gn, node_label=node_label, edge_label=edge_label, | |||
sub_kernel=pkernel, | |||
@@ -19,11 +19,13 @@ from preimage.test_k_closest_graphs import median_on_k_closest_graphs, reform_at | |||
from preimage.utils import get_same_item_indices | |||
from preimage.find_best_k import getRelations | |||
def xp_letter_h(): | |||
ds = {'name': 'Letter-high', | |||
'dataset': '/media/ljia/DATA/research-repo/codes/others/gedlib/tests_linlin/data/collections/Letter.xml', | |||
def xp_letter_h_LETTER2_cost(): | |||
ds = {'dataset': '/media/ljia/DATA/research-repo/codes/others/gedlib/tests_linlin/data/collections/Letter.xml', | |||
'graph_dir': '/media/ljia/DATA/research-repo/codes/others/gedlib/tests_linlin/data/datasets/Letter/HIGH/'} # node/edge symb | |||
Gn, y_all = loadDataset(ds['dataset'], extra_params=ds['graph_dir']) | |||
for G in Gn: | |||
reform_attributes(G) | |||
# ds = {'name': 'Letter-high', | |||
# 'dataset': '../datasets/Letter-high/Letter-high_A.txt'} # node/edge symb | |||
# Gn, y_all = loadDataset(ds['dataset']) | |||
@@ -33,32 +35,35 @@ def xp_letter_h(): | |||
edge_label = None | |||
ds_name = 'letter-h' | |||
dir_output = 'results/xp_letter_h/' | |||
save_results = True | |||
cost = 'LETTER2' | |||
repeats = 1 | |||
# k_list = range(2, 11) | |||
k_list = [150] | |||
fit_method = 'precomputed' | |||
fit_method = 'k-graphs' | |||
# get indices by classes. | |||
y_idx = get_same_item_indices(y_all) | |||
# create result files. | |||
fn_output_detail = 'results_detail.' + fit_method + '.csv' | |||
f_detail = open(dir_output + fn_output_detail, 'a') | |||
csv.writer(f_detail).writerow(['dataset', 'graph kernel', 'fit method', 'k', | |||
'target', 'repeat', 'SOD SM', 'SOD GM', 'dis_k SM', 'dis_k GM', | |||
'min dis_k gi', 'SOD SM -> GM', 'dis_k SM -> GM', 'dis_k gi -> SM', | |||
'dis_k gi -> GM', 'median set']) | |||
f_detail.close() | |||
fn_output_summary = 'results_summary.csv' | |||
f_summary = open(dir_output + fn_output_summary, 'a') | |||
csv.writer(f_summary).writerow(['dataset', 'graph kernel', 'fit method', 'k', | |||
'target', 'SOD SM', 'SOD GM', 'dis_k SM', 'dis_k GM', | |||
'min dis_k gi', 'SOD SM -> GM', 'dis_k SM -> GM', 'dis_k gi -> SM', | |||
'dis_k gi -> GM', '# SOD SM -> GM', '# dis_k SM -> GM', | |||
'# dis_k gi -> SM', '# dis_k gi -> GM', 'repeats better SOD SM -> GM', | |||
'repeats better dis_k SM -> GM', 'repeats better dis_k gi -> SM', | |||
'repeats better dis_k gi -> GM']) | |||
f_summary.close() | |||
if save_results: | |||
# create result files. | |||
fn_output_detail = 'results_detail.' + ds_name + '.' + gkernel + '.' + fit_method + '.csv' | |||
f_detail = open(dir_output + fn_output_detail, 'a') | |||
csv.writer(f_detail).writerow(['dataset', 'graph kernel', 'fit method', 'k', | |||
'target', 'repeat', 'SOD SM', 'SOD GM', 'dis_k SM', 'dis_k GM', | |||
'min dis_k gi', 'SOD SM -> GM', 'dis_k SM -> GM', 'dis_k gi -> SM', | |||
'dis_k gi -> GM', 'median set']) | |||
f_detail.close() | |||
fn_output_summary = 'results_summary.' + ds_name + '.' + gkernel + '.' + fit_method + '.csv' | |||
f_summary = open(dir_output + fn_output_summary, 'a') | |||
csv.writer(f_summary).writerow(['dataset', 'graph kernel', 'fit method', 'k', | |||
'target', 'SOD SM', 'SOD GM', 'dis_k SM', 'dis_k GM', | |||
'min dis_k gi', 'SOD SM -> GM', 'dis_k SM -> GM', 'dis_k gi -> SM', | |||
'dis_k gi -> GM', '# SOD SM -> GM', '# dis_k SM -> GM', | |||
'# dis_k gi -> SM', '# dis_k gi -> GM', 'repeats better SOD SM -> GM', | |||
'repeats better dis_k SM -> GM', 'repeats better dis_k gi -> SM', | |||
'repeats better dis_k gi -> GM']) | |||
f_summary.close() | |||
random.seed(1) | |||
rdn_seed_list = random.sample(range(0, repeats * 100), repeats) | |||
@@ -82,11 +87,11 @@ def xp_letter_h(): | |||
for i, (y, values) in enumerate(y_idx.items()): | |||
print('\ny =', y) | |||
# y = 'I' | |||
# y = 'F' | |||
# values = y_idx[y] | |||
# values = values[0:10] | |||
# k = len(values) | |||
# k = kkk | |||
k = len(values) | |||
sod_sm_list = [] | |||
sod_gm_list = [] | |||
@@ -114,20 +119,21 @@ def xp_letter_h(): | |||
= median_on_k_closest_graphs(Gn_median, node_label, edge_label, | |||
gkernel, k, fit_method=fit_method, graph_dir=ds['graph_dir'], | |||
edit_costs=None, group_min=median_set_idx_idx, | |||
dataset='Letter', parallel=False) | |||
dataset='Letter', cost=cost, parallel=False) | |||
# write result detail. | |||
sod_sm2gm = getRelations(np.sign(sod_gm - sod_sm)) | |||
dis_k_sm2gm = getRelations(np.sign(dis_k_gm - dis_k_sm)) | |||
dis_k_gi2sm = getRelations(np.sign(dis_k_sm - dis_k_gi_min)) | |||
dis_k_gi2gm = getRelations(np.sign(dis_k_gm - dis_k_gi_min)) | |||
f_detail = open(dir_output + fn_output_detail, 'a') | |||
csv.writer(f_detail).writerow([ds_name, gkernel, fit_method, k, | |||
y, repeat, | |||
sod_sm, sod_gm, dis_k_sm, dis_k_gm, | |||
dis_k_gi_min, sod_sm2gm, dis_k_sm2gm, dis_k_gi2sm, | |||
dis_k_gi2gm, median_set_idx]) | |||
f_detail.close() | |||
if save_results: | |||
f_detail = open(dir_output + fn_output_detail, 'a') | |||
csv.writer(f_detail).writerow([ds_name, gkernel, fit_method, k, | |||
y, repeat, | |||
sod_sm, sod_gm, dis_k_sm, dis_k_gm, | |||
dis_k_gi_min, sod_sm2gm, dis_k_sm2gm, dis_k_gi2sm, | |||
dis_k_gi2gm, median_set_idx]) | |||
f_detail.close() | |||
# compute result summary. | |||
sod_sm_list.append(sod_sm) | |||
@@ -170,14 +176,17 @@ def xp_letter_h(): | |||
# save median graphs. | |||
fname_sm = '/media/ljia/DATA/research-repo/codes/others/gedlib/tests_linlin/output/tmp_ged/set_median.gxl' | |||
fn_pre_sm_new = dir_output + 'medians/set_median.k' + str(int(k)) + '.y' + y + '.repeat' + str(repeat) | |||
fn_pre_sm_new = dir_output + 'medians/set_median.' + fit_method \ | |||
+ '.k' + str(int(k)) + '.y' + y + '.repeat' + str(repeat) | |||
copyfile(fname_sm, fn_pre_sm_new + '.gxl') | |||
fname_gm = '/media/ljia/DATA/research-repo/codes/others/gedlib/tests_linlin/output/tmp_ged/gen_median.gxl' | |||
fn_pre_gm_new = dir_output + 'medians/gen_median.k' + str(int(k)) + '.y' + y + '.repeat' + str(repeat) | |||
fn_pre_gm_new = dir_output + 'medians/gen_median.' + fit_method \ | |||
+ '.k' + str(int(k)) + '.y' + y + '.repeat' + str(repeat) | |||
copyfile(fname_gm, fn_pre_gm_new + '.gxl') | |||
G_best_kernel = Gn_median[idx_dis_k_gi_min].copy() | |||
reform_attributes(G_best_kernel) | |||
fn_pre_g_best_kernel = dir_output + 'medians/g_best_kernel.k' + str(int(k)) + '.y' + y + '.repeat' + str(repeat) | |||
fn_pre_g_best_kernel = dir_output + 'medians/g_best_kernel.' + fit_method \ | |||
+ '.k' + str(int(k)) + '.y' + y + '.repeat' + str(repeat) | |||
saveGXL(G_best_kernel, fn_pre_g_best_kernel + '.gxl', method='gedlib-letter') | |||
# plot median graphs. | |||
@@ -197,16 +206,17 @@ def xp_letter_h(): | |||
dis_k_sm2gm_mean = getRelations(np.sign(dis_k_gm_mean_list[-1] - dis_k_sm_mean_list[-1])) | |||
dis_k_gi2sm_mean = getRelations(np.sign(dis_k_sm_mean_list[-1] - dis_k_gi_min_mean_list[-1])) | |||
dis_k_gi2gm_mean = getRelations(np.sign(dis_k_gm_mean_list[-1] - dis_k_gi_min_mean_list[-1])) | |||
f_summary = open(dir_output + fn_output_summary, 'a') | |||
csv.writer(f_summary).writerow([ds_name, gkernel, fit_method, k, y, | |||
sod_sm_mean_list[-1], sod_gm_mean_list[-1], | |||
dis_k_sm_mean_list[-1], dis_k_gm_mean_list[-1], | |||
dis_k_gi_min_mean_list[-1], sod_sm2gm_mean, dis_k_sm2gm_mean, | |||
dis_k_gi2sm_mean, dis_k_gi2gm_mean, nb_sod_sm2gm, | |||
nb_dis_k_sm2gm, nb_dis_k_gi2sm, nb_dis_k_gi2gm, | |||
repeats_better_sod_sm2gm, repeats_better_dis_k_sm2gm, | |||
repeats_better_dis_k_gi2sm, repeats_better_dis_k_gi2gm]) | |||
f_summary.close() | |||
if save_results: | |||
f_summary = open(dir_output + fn_output_summary, 'a') | |||
csv.writer(f_summary).writerow([ds_name, gkernel, fit_method, k, y, | |||
sod_sm_mean_list[-1], sod_gm_mean_list[-1], | |||
dis_k_sm_mean_list[-1], dis_k_gm_mean_list[-1], | |||
dis_k_gi_min_mean_list[-1], sod_sm2gm_mean, dis_k_sm2gm_mean, | |||
dis_k_gi2sm_mean, dis_k_gi2gm_mean, nb_sod_sm2gm, | |||
nb_dis_k_sm2gm, nb_dis_k_gi2sm, nb_dis_k_gi2gm, | |||
repeats_better_sod_sm2gm, repeats_better_dis_k_sm2gm, | |||
repeats_better_dis_k_gi2sm, repeats_better_dis_k_gi2gm]) | |||
f_summary.close() | |||
# write result summary for each letter. | |||
@@ -219,13 +229,232 @@ def xp_letter_h(): | |||
dis_k_sm2gm_mean = getRelations(np.sign(dis_k_gm_mean - dis_k_sm_mean)) | |||
dis_k_gi2sm_mean = getRelations(np.sign(dis_k_sm_mean - dis_k_gi_min_mean)) | |||
dis_k_gi2gm_mean = getRelations(np.sign(dis_k_gm_mean - dis_k_gi_min_mean)) | |||
if save_results: | |||
f_summary = open(dir_output + fn_output_summary, 'a') | |||
csv.writer(f_summary).writerow([ds_name, gkernel, fit_method, k, 'all', | |||
sod_sm_mean, sod_gm_mean, dis_k_sm_mean, dis_k_gm_mean, | |||
dis_k_gi_min_mean, sod_sm2gm_mean, dis_k_sm2gm_mean, | |||
dis_k_gi2sm_mean, dis_k_gi2gm_mean]) | |||
f_summary.close() | |||
print('\ncomplete.') | |||
def xp_letter_h(): | |||
ds = {'dataset': '/media/ljia/DATA/research-repo/codes/others/gedlib/tests_linlin/data/collections/Letter.xml', | |||
'graph_dir': '/media/ljia/DATA/research-repo/codes/others/gedlib/tests_linlin/data/datasets/Letter/HIGH/'} # node/edge symb | |||
Gn, y_all = loadDataset(ds['dataset'], extra_params=ds['graph_dir']) | |||
for G in Gn: | |||
reform_attributes(G) | |||
# ds = {'name': 'Letter-high', | |||
# 'dataset': '../datasets/Letter-high/Letter-high_A.txt'} # node/edge symb | |||
# Gn, y_all = loadDataset(ds['dataset']) | |||
# Gn = Gn[0:50] | |||
gkernel = 'structuralspkernel' | |||
node_label = None | |||
edge_label = None | |||
ds_name = 'letter-h' | |||
dir_output = 'results/xp_letter_h/' | |||
save_results = False | |||
repeats = 1 | |||
# k_list = range(2, 11) | |||
k_list = [150] | |||
fit_method = 'k-graphs' | |||
# get indices by classes. | |||
y_idx = get_same_item_indices(y_all) | |||
if save_results: | |||
# create result files. | |||
fn_output_detail = 'results_detail.' + ds_name + '.' + gkernel + '.' + fit_method + '.csv' | |||
f_detail = open(dir_output + fn_output_detail, 'a') | |||
csv.writer(f_detail).writerow(['dataset', 'graph kernel', 'fit method', 'k', | |||
'target', 'repeat', 'SOD SM', 'SOD GM', 'dis_k SM', 'dis_k GM', | |||
'min dis_k gi', 'SOD SM -> GM', 'dis_k SM -> GM', 'dis_k gi -> SM', | |||
'dis_k gi -> GM', 'median set']) | |||
f_detail.close() | |||
fn_output_summary = 'results_summary.' + ds_name + '.' + gkernel + '.' + fit_method + '.csv' | |||
f_summary = open(dir_output + fn_output_summary, 'a') | |||
csv.writer(f_summary).writerow([ds_name, gkernel, fit_method, k, 'all', | |||
sod_sm_mean, sod_gm_mean, dis_k_sm_mean, dis_k_gm_mean, | |||
dis_k_gi_min_mean, sod_sm2gm_mean, dis_k_sm2gm_mean, | |||
dis_k_gi2sm_mean, dis_k_gi2gm_mean]) | |||
csv.writer(f_summary).writerow(['dataset', 'graph kernel', 'fit method', 'k', | |||
'target', 'SOD SM', 'SOD GM', 'dis_k SM', 'dis_k GM', | |||
'min dis_k gi', 'SOD SM -> GM', 'dis_k SM -> GM', 'dis_k gi -> SM', | |||
'dis_k gi -> GM', '# SOD SM -> GM', '# dis_k SM -> GM', | |||
'# dis_k gi -> SM', '# dis_k gi -> GM', 'repeats better SOD SM -> GM', | |||
'repeats better dis_k SM -> GM', 'repeats better dis_k gi -> SM', | |||
'repeats better dis_k gi -> GM']) | |||
f_summary.close() | |||
random.seed(1) | |||
rdn_seed_list = random.sample(range(0, repeats * 100), repeats) | |||
for k in k_list: | |||
print('\n--------- k =', k, '----------') | |||
sod_sm_mean_list = [] | |||
sod_gm_mean_list = [] | |||
dis_k_sm_mean_list = [] | |||
dis_k_gm_mean_list = [] | |||
dis_k_gi_min_mean_list = [] | |||
# nb_sod_sm2gm = [0, 0, 0] | |||
# nb_dis_k_sm2gm = [0, 0, 0] | |||
# nb_dis_k_gi2sm = [0, 0, 0] | |||
# nb_dis_k_gi2gm = [0, 0, 0] | |||
# repeats_better_sod_sm2gm = [] | |||
# repeats_better_dis_k_sm2gm = [] | |||
# repeats_better_dis_k_gi2sm = [] | |||
# repeats_better_dis_k_gi2gm = [] | |||
for i, (y, values) in enumerate(y_idx.items()): | |||
print('\ny =', y) | |||
# y = 'N' | |||
# values = y_idx[y] | |||
# values = values[0:10] | |||
k = len(values) | |||
sod_sm_list = [] | |||
sod_gm_list = [] | |||
dis_k_sm_list = [] | |||
dis_k_gm_list = [] | |||
dis_k_gi_min_list = [] | |||
nb_sod_sm2gm = [0, 0, 0] | |||
nb_dis_k_sm2gm = [0, 0, 0] | |||
nb_dis_k_gi2sm = [0, 0, 0] | |||
nb_dis_k_gi2gm = [0, 0, 0] | |||
repeats_better_sod_sm2gm = [] | |||
repeats_better_dis_k_sm2gm = [] | |||
repeats_better_dis_k_gi2sm = [] | |||
repeats_better_dis_k_gi2gm = [] | |||
for repeat in range(repeats): | |||
print('\nrepeat =', repeat) | |||
random.seed(rdn_seed_list[repeat]) | |||
median_set_idx_idx = random.sample(range(0, len(values)), k) | |||
median_set_idx = [values[idx] for idx in median_set_idx_idx] | |||
print('median set: ', median_set_idx) | |||
Gn_median = [Gn[g] for g in values] | |||
sod_sm, sod_gm, dis_k_sm, dis_k_gm, dis_k_gi, dis_k_gi_min, idx_dis_k_gi_min \ | |||
= median_on_k_closest_graphs(Gn_median, node_label, edge_label, | |||
gkernel, k, fit_method=fit_method, graph_dir=ds['graph_dir'], | |||
edit_costs=None, group_min=median_set_idx_idx, | |||
dataset='Letter', parallel=False) | |||
# write result detail. | |||
sod_sm2gm = getRelations(np.sign(sod_gm - sod_sm)) | |||
dis_k_sm2gm = getRelations(np.sign(dis_k_gm - dis_k_sm)) | |||
dis_k_gi2sm = getRelations(np.sign(dis_k_sm - dis_k_gi_min)) | |||
dis_k_gi2gm = getRelations(np.sign(dis_k_gm - dis_k_gi_min)) | |||
if save_results: | |||
f_detail = open(dir_output + fn_output_detail, 'a') | |||
csv.writer(f_detail).writerow([ds_name, gkernel, fit_method, k, | |||
y, repeat, | |||
sod_sm, sod_gm, dis_k_sm, dis_k_gm, | |||
dis_k_gi_min, sod_sm2gm, dis_k_sm2gm, dis_k_gi2sm, | |||
dis_k_gi2gm, median_set_idx]) | |||
f_detail.close() | |||
# compute result summary. | |||
sod_sm_list.append(sod_sm) | |||
sod_gm_list.append(sod_gm) | |||
dis_k_sm_list.append(dis_k_sm) | |||
dis_k_gm_list.append(dis_k_gm) | |||
dis_k_gi_min_list.append(dis_k_gi_min) | |||
# # SOD SM -> GM | |||
if sod_sm > sod_gm: | |||
nb_sod_sm2gm[0] += 1 | |||
repeats_better_sod_sm2gm.append(repeat) | |||
elif sod_sm == sod_gm: | |||
nb_sod_sm2gm[1] += 1 | |||
elif sod_sm < sod_gm: | |||
nb_sod_sm2gm[2] += 1 | |||
# # dis_k SM -> GM | |||
if dis_k_sm > dis_k_gm: | |||
nb_dis_k_sm2gm[0] += 1 | |||
repeats_better_dis_k_sm2gm.append(repeat) | |||
elif dis_k_sm == dis_k_gm: | |||
nb_dis_k_sm2gm[1] += 1 | |||
elif dis_k_sm < dis_k_gm: | |||
nb_dis_k_sm2gm[2] += 1 | |||
# # dis_k gi -> SM | |||
if dis_k_gi_min > dis_k_sm: | |||
nb_dis_k_gi2sm[0] += 1 | |||
repeats_better_dis_k_gi2sm.append(repeat) | |||
elif dis_k_gi_min == dis_k_sm: | |||
nb_dis_k_gi2sm[1] += 1 | |||
elif dis_k_gi_min < dis_k_sm: | |||
nb_dis_k_gi2sm[2] += 1 | |||
# # dis_k gi -> GM | |||
if dis_k_gi_min > dis_k_gm: | |||
nb_dis_k_gi2gm[0] += 1 | |||
repeats_better_dis_k_gi2gm.append(repeat) | |||
elif dis_k_gi_min == dis_k_gm: | |||
nb_dis_k_gi2gm[1] += 1 | |||
elif dis_k_gi_min < dis_k_gm: | |||
nb_dis_k_gi2gm[2] += 1 | |||
# save median graphs. | |||
fname_sm = '/media/ljia/DATA/research-repo/codes/others/gedlib/tests_linlin/output/tmp_ged/set_median.gxl' | |||
fn_pre_sm_new = dir_output + 'medians/set_median.' + fit_method \ | |||
+ '.k' + str(int(k)) + '.y' + y + '.repeat' + str(repeat) | |||
copyfile(fname_sm, fn_pre_sm_new + '.gxl') | |||
fname_gm = '/media/ljia/DATA/research-repo/codes/others/gedlib/tests_linlin/output/tmp_ged/gen_median.gxl' | |||
fn_pre_gm_new = dir_output + 'medians/gen_median.' + fit_method \ | |||
+ '.k' + str(int(k)) + '.y' + y + '.repeat' + str(repeat) | |||
copyfile(fname_gm, fn_pre_gm_new + '.gxl') | |||
G_best_kernel = Gn_median[idx_dis_k_gi_min].copy() | |||
reform_attributes(G_best_kernel) | |||
fn_pre_g_best_kernel = dir_output + 'medians/g_best_kernel.' + fit_method \ | |||
+ '.k' + str(int(k)) + '.y' + y + '.repeat' + str(repeat) | |||
saveGXL(G_best_kernel, fn_pre_g_best_kernel + '.gxl', method='gedlib-letter') | |||
# plot median graphs. | |||
set_median = loadGXL(fn_pre_sm_new + '.gxl') | |||
gen_median = loadGXL(fn_pre_gm_new + '.gxl') | |||
draw_Letter_graph(set_median, fn_pre_sm_new) | |||
draw_Letter_graph(gen_median, fn_pre_gm_new) | |||
draw_Letter_graph(G_best_kernel, fn_pre_g_best_kernel) | |||
# write result summary for each letter. | |||
sod_sm_mean_list.append(np.mean(sod_sm_list)) | |||
sod_gm_mean_list.append(np.mean(sod_gm_list)) | |||
dis_k_sm_mean_list.append(np.mean(dis_k_sm_list)) | |||
dis_k_gm_mean_list.append(np.mean(dis_k_gm_list)) | |||
dis_k_gi_min_mean_list.append(np.mean(dis_k_gi_min_list)) | |||
sod_sm2gm_mean = getRelations(np.sign(sod_gm_mean_list[-1] - sod_sm_mean_list[-1])) | |||
dis_k_sm2gm_mean = getRelations(np.sign(dis_k_gm_mean_list[-1] - dis_k_sm_mean_list[-1])) | |||
dis_k_gi2sm_mean = getRelations(np.sign(dis_k_sm_mean_list[-1] - dis_k_gi_min_mean_list[-1])) | |||
dis_k_gi2gm_mean = getRelations(np.sign(dis_k_gm_mean_list[-1] - dis_k_gi_min_mean_list[-1])) | |||
if save_results: | |||
f_summary = open(dir_output + fn_output_summary, 'a') | |||
csv.writer(f_summary).writerow([ds_name, gkernel, fit_method, k, y, | |||
sod_sm_mean_list[-1], sod_gm_mean_list[-1], | |||
dis_k_sm_mean_list[-1], dis_k_gm_mean_list[-1], | |||
dis_k_gi_min_mean_list[-1], sod_sm2gm_mean, dis_k_sm2gm_mean, | |||
dis_k_gi2sm_mean, dis_k_gi2gm_mean, nb_sod_sm2gm, | |||
nb_dis_k_sm2gm, nb_dis_k_gi2sm, nb_dis_k_gi2gm, | |||
repeats_better_sod_sm2gm, repeats_better_dis_k_sm2gm, | |||
repeats_better_dis_k_gi2sm, repeats_better_dis_k_gi2gm]) | |||
f_summary.close() | |||
# write result summary for each letter. | |||
sod_sm_mean = np.mean(sod_sm_mean_list) | |||
sod_gm_mean = np.mean(sod_gm_mean_list) | |||
dis_k_sm_mean = np.mean(dis_k_sm_mean_list) | |||
dis_k_gm_mean = np.mean(dis_k_gm_mean_list) | |||
dis_k_gi_min_mean = np.mean(dis_k_gi_min_list) | |||
sod_sm2gm_mean = getRelations(np.sign(sod_gm_mean - sod_sm_mean)) | |||
dis_k_sm2gm_mean = getRelations(np.sign(dis_k_gm_mean - dis_k_sm_mean)) | |||
dis_k_gi2sm_mean = getRelations(np.sign(dis_k_sm_mean - dis_k_gi_min_mean)) | |||
dis_k_gi2gm_mean = getRelations(np.sign(dis_k_gm_mean - dis_k_gi_min_mean)) | |||
if save_results: | |||
f_summary = open(dir_output + fn_output_summary, 'a') | |||
csv.writer(f_summary).writerow([ds_name, gkernel, fit_method, k, 'all', | |||
sod_sm_mean, sod_gm_mean, dis_k_sm_mean, dis_k_gm_mean, | |||
dis_k_gi_min_mean, sod_sm2gm_mean, dis_k_sm2gm_mean, | |||
dis_k_gi2sm_mean, dis_k_gi2gm_mean]) | |||
f_summary.close() | |||
print('\ncomplete.') | |||
@@ -243,4 +472,5 @@ def draw_Letter_graph(graph, file_prefix): | |||
if __name__ == "__main__": | |||
xp_letter_h() | |||
# xp_letter_h() | |||
xp_letter_h_LETTER2_cost() |