Browse Source

update pre-image method for Letter-h dataset

v0.1
jajupmochi 5 years ago
parent
commit
5b91920275
6 changed files with 577 additions and 119 deletions
  1. +158
    -30
      preimage/fitDistance.py
  2. +85
    -14
      preimage/ged.py
  3. +5
    -4
      preimage/iam.py
  4. +46
    -18
      preimage/test_k_closest_graphs.py
  5. +2
    -2
      preimage/utils.py
  6. +281
    -51
      preimage/xp_letter_h.py

+ 158
- 30
preimage/fitDistance.py View File

@@ -15,24 +15,28 @@ import time
import random

from scipy import optimize
from scipy.optimize import minimize
import cvxpy as cp

import sys
#sys.path.insert(0, "../")
from ged import GED, get_nb_edit_operations
from utils import kernel_distance_matrix
sys.path.insert(0, "../")
from preimage.ged import GED, get_nb_edit_operations, get_nb_edit_operations_letter
from preimage.utils import kernel_distance_matrix

def fit_GED_to_kernel_distance(Gn, node_label, edge_label, gkernel, itr_max, k=4,
def fit_GED_to_kernel_distance(Gn, node_label, edge_label, gkernel, itr_max,
params_ged={'lib': 'gedlibpy', 'cost': 'CONSTANT',
'method': 'IPFP', 'stabilizer': None},
init_costs=[3, 3, 1, 3, 3, 1],
dataset='monoterpenoides',
parallel=True):
dataset = dataset.lower()
# c_vi, c_vr, c_vs, c_ei, c_er, c_es or parts of them.
# random.seed(1)
# cost_rdm = random.sample(range(1, 10), 6)
# init_costs = cost_rdm + [0]
# init_costs = cost_rdm
init_costs = [3, 3, 1, 3, 3, 1]
# init_costs = [3, 3, 1, 3, 3, 1]
# init_costs = [i * 0.01 for i in cost_rdm] + [0]
# init_costs = [0.2, 0.2, 0.2, 0.2, 0.2, 0]
# init_costs = [0, 0, 0.9544, 0.026, 0.0196, 0]
@@ -51,8 +55,10 @@ def fit_GED_to_kernel_distance(Gn, node_label, edge_label, gkernel, itr_max, k=4
# init ged.
print('\ninitial:')
time0 = time.time()
params_ged['dataset'] = dataset
params_ged['edit_cost_constant'] = init_costs
ged_vec_init, ged_mat, n_edit_operations = compute_geds(Gn, params_ged,
dataset,
parallel=parallel)
residual_list = [np.sqrt(np.sum(np.square(np.array(ged_vec_init) - dis_k_vec)))]
time_list = [time.time() - time0]
@@ -67,20 +73,21 @@ def fit_GED_to_kernel_distance(Gn, node_label, edge_label, gkernel, itr_max, k=4
time0 = time.time()
# "fit" geds to distances in feature space by tuning edit costs using the
# Least Squares Method.
edit_costs_new, residual = update_costs(nb_cost_mat, dis_k_vec)
edit_costs_new, residual = update_costs(nb_cost_mat, dis_k_vec,
dataset=dataset, cost=params_ged['cost'])
for i in range(len(edit_costs_new)):
if -1e-9 <= edit_costs_new[i] <= 1e-9:
edit_costs_new[i] = 0
if edit_costs_new[i] < 0:
if edit_costs_new[i] > -1e-9:
edit_costs_new[i] = 0
else:
raise ValueError('The edit cost is negative.')
raise ValueError('The edit cost is negative.')
# for i in range(len(edit_costs_new)):
# if edit_costs_new[i] < 0:
# edit_costs_new[i] = 0

# compute new GEDs and numbers of edit operations.
params_ged['edit_cost_constant'] = edit_costs_new
ged_vec, ged_mat, n_edit_operations = compute_geds(Gn, params_ged,
params_ged['edit_cost_constant'] = edit_costs_new # np.array([edit_costs_new[0], edit_costs_new[1], 0.75])
ged_vec, ged_mat, n_edit_operations = compute_geds(Gn, params_ged,
dataset,
parallel=parallel)
residual_list.append(np.sqrt(np.sum(np.square(np.array(ged_vec) - dis_k_vec))))
time_list.append(time.time() - time0)
@@ -94,7 +101,8 @@ def fit_GED_to_kernel_distance(Gn, node_label, edge_label, gkernel, itr_max, k=4
time_list, nb_cost_mat_list


def compute_geds(Gn, params_ged, parallel=False):
def compute_geds(Gn, params_ged, dataset, parallel=False):
get_nb_eo = get_nb_edit_operations_letter if dataset == 'letter' else get_nb_edit_operations
ged_mat = np.zeros((len(Gn), len(Gn)))
if parallel:
# print('parallel')
@@ -112,7 +120,7 @@ def compute_geds(Gn, params_ged, parallel=False):
def init_worker(gn_toshare):
global G_gn
G_gn = gn_toshare
do_partial = partial(_wrapper_compute_ged_parallel, params_ged)
do_partial = partial(_wrapper_compute_ged_parallel, params_ged, get_nb_eo)
pool = Pool(processes=n_jobs, initializer=init_worker, initargs=(Gn,))
iterator = tqdm(pool.imap_unordered(do_partial, itr, chunksize),
desc='computing GEDs', file=sys.stdout)
@@ -138,26 +146,146 @@ def compute_geds(Gn, params_ged, parallel=False):
ged_vec.append(dis)
ged_mat[i][j] = dis
ged_mat[j][i] = dis
n_eo_tmp = get_nb_edit_operations(Gn[i], Gn[j], pi_forward, pi_backward)
n_eo_tmp = get_nb_eo(Gn[i], Gn[j], pi_forward, pi_backward)
n_edit_operations.append(n_eo_tmp)
return ged_vec, ged_mat, n_edit_operations

def _wrapper_compute_ged_parallel(params_ged, itr):
def _wrapper_compute_ged_parallel(params_ged, get_nb_eo, itr):
i = itr[0]
j = itr[1]
dis, n_eo_tmp = _compute_ged_parallel(G_gn[i], G_gn[j], params_ged)
dis, n_eo_tmp = _compute_ged_parallel(G_gn[i], G_gn[j], params_ged, get_nb_eo)
return i, j, dis, n_eo_tmp


def _compute_ged_parallel(g1, g2, params_ged):
def _compute_ged_parallel(g1, g2, params_ged, get_nb_eo):
dis, pi_forward, pi_backward = GED(g1, g2, **params_ged)
n_eo_tmp = get_nb_edit_operations(g1, g2, pi_forward, pi_backward)
n_eo_tmp = get_nb_eo(g1, g2, pi_forward, pi_backward) # [0,0,0,0,0,0]
return dis, n_eo_tmp


def update_costs(nb_cost_mat, dis_k_vec):
def update_costs(nb_cost_mat, dis_k_vec, dataset='monoterpenoides',
cost='CONSTANT', rw_constraints='2constraints'):
if dataset.lower() == 'letter':
if cost == 'LETTER':
pass
# # method 1: set alpha automatically, just tune c_vir and c_eir by
# # LMS using cvxpy.
# alpha = 0.5
# coeff = 100 # np.max(alpha * nb_cost_mat[:,4] / dis_k_vec)
## if np.count_nonzero(nb_cost_mat[:,4]) == 0:
## alpha = 0.75
## else:
## alpha = np.min([dis_k_vec / c_vs for c_vs in nb_cost_mat[:,4] if c_vs != 0])
## alpha = alpha * 0.99
# param_vir = alpha * (nb_cost_mat[:,0] + nb_cost_mat[:,1])
# param_eir = (1 - alpha) * (nb_cost_mat[:,4] + nb_cost_mat[:,5])
# nb_cost_mat_new = np.column_stack((param_vir, param_eir))
# dis_new = coeff * dis_k_vec - alpha * nb_cost_mat[:,3]
#
# x = cp.Variable(nb_cost_mat_new.shape[1])
# cost = cp.sum_squares(nb_cost_mat_new * x - dis_new)
# constraints = [x >= [0.0 for i in range(nb_cost_mat_new.shape[1])]]
# prob = cp.Problem(cp.Minimize(cost), constraints)
# prob.solve()
# edit_costs_new = x.value
# edit_costs_new = np.array([edit_costs_new[0], edit_costs_new[1], alpha])
# residual = np.sqrt(prob.value)
# # method 2: tune c_vir, c_eir and alpha by nonlinear programming by
# # scipy.optimize.minimize.
# w0 = nb_cost_mat[:,0] + nb_cost_mat[:,1]
# w1 = nb_cost_mat[:,4] + nb_cost_mat[:,5]
# w2 = nb_cost_mat[:,3]
# w3 = dis_k_vec
# func_min = lambda x: np.sum((w0 * x[0] * x[3] + w1 * x[1] * (1 - x[2]) \
# + w2 * x[2] - w3 * x[3]) ** 2)
# bounds = ((0, None), (0., None), (0.5, 0.5), (0, None))
# res = minimize(func_min, [0.9, 1.7, 0.75, 10], bounds=bounds)
# edit_costs_new = res.x[0:3]
# residual = res.fun
# method 3: tune c_vir, c_eir and alpha by nonlinear programming using cvxpy.
# # method 4: tune c_vir, c_eir and alpha by QP function
# # scipy.optimize.least_squares. An initial guess is required.
# w0 = nb_cost_mat[:,0] + nb_cost_mat[:,1]
# w1 = nb_cost_mat[:,4] + nb_cost_mat[:,5]
# w2 = nb_cost_mat[:,3]
# w3 = dis_k_vec
# func = lambda x: (w0 * x[0] * x[3] + w1 * x[1] * (1 - x[2]) \
# + w2 * x[2] - w3 * x[3]) ** 2
# res = optimize.root(func, [0.9, 1.7, 0.75, 100])
# edit_costs_new = res.x
# residual = None
elif cost == 'LETTER2':
# # 1. if c_vi != c_vr, c_ei != c_er.
# nb_cost_mat_new = nb_cost_mat[:,[0,1,3,4,5]]
# x = cp.Variable(nb_cost_mat_new.shape[1])
# cost_fun = cp.sum_squares(nb_cost_mat_new * x - dis_k_vec)
## # 1.1 no constraints.
## constraints = [x >= [0.0 for i in range(nb_cost_mat_new.shape[1])]]
# # 1.2 c_vs <= c_vi + c_vr.
# constraints = [x >= [0.0 for i in range(nb_cost_mat_new.shape[1])],
# np.array([1.0, 1.0, -1.0, 0.0, 0.0]).T@x >= 0.0]
## # 2. if c_vi == c_vr, c_ei == c_er.
## nb_cost_mat_new = nb_cost_mat[:,[0,3,4]]
## nb_cost_mat_new[:,0] += nb_cost_mat[:,1]
## nb_cost_mat_new[:,2] += nb_cost_mat[:,5]
## x = cp.Variable(nb_cost_mat_new.shape[1])
## cost_fun = cp.sum_squares(nb_cost_mat_new * x - dis_k_vec)
## # 2.1 no constraints.
## constraints = [x >= [0.0 for i in range(nb_cost_mat_new.shape[1])]]
### # 2.2 c_vs <= c_vi + c_vr.
### constraints = [x >= [0.0 for i in range(nb_cost_mat_new.shape[1])],
### np.array([2.0, -1.0, 0.0]).T@x >= 0.0]
#
# prob = cp.Problem(cp.Minimize(cost_fun), constraints)
# prob.solve()
# edit_costs_new = [x.value[0], x.value[0], x.value[1], x.value[2], x.value[2]]
# edit_costs_new = np.array(edit_costs_new)
# residual = np.sqrt(prob.value)
if rw_constraints == 'inequality':
# c_vs <= c_vi + c_vr.
nb_cost_mat_new = nb_cost_mat[:,[0,1,3,4,5]]
x = cp.Variable(nb_cost_mat_new.shape[1])
cost_fun = cp.sum_squares(nb_cost_mat_new * x - dis_k_vec)
constraints = [x >= [0.01 for i in range(nb_cost_mat_new.shape[1])],
np.array([1.0, 1.0, -1.0, 0.0, 0.0]).T@x >= 0.0]
prob = cp.Problem(cp.Minimize(cost_fun), constraints)
prob.solve()
edit_costs_new = x.value
residual = np.sqrt(prob.value)
elif rw_constraints == '2constraints':
# c_vs <= c_vi + c_vr and c_vi == c_vr, c_ei == c_er.
nb_cost_mat_new = nb_cost_mat[:,[0,1,3,4,5]]
x = cp.Variable(nb_cost_mat_new.shape[1])
cost_fun = cp.sum_squares(nb_cost_mat_new * x - dis_k_vec)
constraints = [x >= [0.01 for i in range(nb_cost_mat_new.shape[1])],
np.array([1.0, 1.0, -1.0, 0.0, 0.0]).T@x >= 0.0,
np.array([1.0, -1.0, 0.0, 0.0, 0.0]).T@x == 0.0,
np.array([0.0, 0.0, 0.0, 1.0, -1.0]).T@x == 0.0]
prob = cp.Problem(cp.Minimize(cost_fun), constraints)
prob.solve()
edit_costs_new = x.value
residual = np.sqrt(prob.value)
# elif method == 'inequality_modified':
# # c_vs <= c_vi + c_vr.
# nb_cost_mat_new = nb_cost_mat[:,[0,1,3,4,5]]
# x = cp.Variable(nb_cost_mat_new.shape[1])
# cost_fun = cp.sum_squares(nb_cost_mat_new * x - dis_k_vec)
# constraints = [x >= [0.0 for i in range(nb_cost_mat_new.shape[1])],
# np.array([1.0, 1.0, -1.0, 0.0, 0.0]).T@x >= 0.0]
# prob = cp.Problem(cp.Minimize(cost_fun), constraints)
# prob.solve()
# # use same costs for insertion and removal rather than the fitted costs.
# edit_costs_new = [x.value[0], x.value[0], x.value[1], x.value[2], x.value[2]]
# edit_costs_new = np.array(edit_costs_new)
# residual = np.sqrt(prob.value)
else:
# # method 1: simple least square method.
# edit_costs_new, residual, _, _ = np.linalg.lstsq(nb_cost_mat, dis_k_vec,
# rcond=None)
@@ -181,16 +309,16 @@ def update_costs(nb_cost_mat, dis_k_vec):
# G = -1 * np.identity(nb_cost_mat.shape[1])
# h = np.array([0 for i in range(nb_cost_mat.shape[1])])
x = cp.Variable(nb_cost_mat.shape[1])
cost = cp.sum_squares(nb_cost_mat * x - dis_k_vec)
constraints = [x >= [0.0001 for i in range(nb_cost_mat.shape[1])],
# np.array([1.0, 1.0, -1.0, 0.0, 0.0]).T@x >= 0.0]
np.array([1.0, 1.0, -1.0, 0.0, 0.0, 0.0]).T@x >= 0.0,
np.array([0.0, 0.0, 0.0, 1.0, 1.0, -1.0]).T@x >= 0.0]
prob = cp.Problem(cp.Minimize(cost), constraints)
prob.solve()
edit_costs_new = x.value
residual = np.sqrt(prob.value)
x = cp.Variable(nb_cost_mat.shape[1])
cost_fun = cp.sum_squares(nb_cost_mat * x - dis_k_vec)
constraints = [x >= [0.0 for i in range(nb_cost_mat.shape[1])],
# np.array([1.0, 1.0, -1.0, 0.0, 0.0]).T@x >= 0.0]
np.array([1.0, 1.0, -1.0, 0.0, 0.0, 0.0]).T@x >= 0.0,
np.array([0.0, 0.0, 0.0, 1.0, 1.0, -1.0]).T@x >= 0.0]
prob = cp.Problem(cp.Minimize(cost_fun), constraints)
prob.solve()
edit_costs_new = x.value
residual = np.sqrt(prob.value)
# method 4:


+ 85
- 14
preimage/ged.py View File

@@ -13,33 +13,46 @@ import multiprocessing
from multiprocessing import Pool
from functools import partial

from gedlibpy_linlin import librariesImport, gedlibpy
#from gedlibpy_linlin import librariesImport, gedlibpy
from libs import *

def GED(g1, g2, lib='gedlibpy', cost='CHEM_1', method='IPFP',
def GED(g1, g2, dataset='monoterpenoides', lib='gedlibpy', cost='CHEM_1', method='IPFP',
edit_cost_constant=[], algo_options='', stabilizer='min', repeat=50):
"""
Compute GED for 2 graphs.
"""
def convertGraph(G):
def convertGraph(G, dataset):
"""Convert a graph to the proper NetworkX format that can be
recognized by library gedlibpy.
"""
G_new = nx.Graph()
for nd, attrs in G.nodes(data=True):
G_new.add_node(str(nd), chem=attrs['atom'])
# G_new.add_node(str(nd), x=str(attrs['attributes'][0]),
# y=str(attrs['attributes'][1]))
for nd1, nd2, attrs in G.edges(data=True):
# G_new.add_edge(str(nd1), str(nd2), valence=attrs['bond_type'])
G_new.add_edge(str(nd1), str(nd2))
if dataset == 'monoterpenoides':
for nd, attrs in G.nodes(data=True):
G_new.add_node(str(nd), chem=attrs['atom'])
for nd1, nd2, attrs in G.edges(data=True):
G_new.add_edge(str(nd1), str(nd2), valence=attrs['bond_type'])
elif dataset == 'letter':
for nd, attrs in G.nodes(data=True):
G_new.add_node(str(nd), x=str(attrs['attributes'][0]),
y=str(attrs['attributes'][1]))
for nd1, nd2, attrs in G.edges(data=True):
G_new.add_edge(str(nd1), str(nd2))
else:
for nd, attrs in G.nodes(data=True):
G_new.add_node(str(nd), chem=attrs['atom'])
for nd1, nd2, attrs in G.edges(data=True):
G_new.add_edge(str(nd1), str(nd2), valence=attrs['bond_type'])
# G_new.add_edge(str(nd1), str(nd2))
return G_new
dataset = dataset.lower()
if lib == 'gedlibpy':
gedlibpy.restart_env()
gedlibpy.add_nx_graph(convertGraph(g1), "")
gedlibpy.add_nx_graph(convertGraph(g2), "")
gedlibpy.add_nx_graph(convertGraph(g1, dataset), "")
gedlibpy.add_nx_graph(convertGraph(g2, dataset), "")

listID = gedlibpy.get_all_graph_ids()
gedlibpy.set_edit_cost(cost, edit_cost_constant=edit_cost_constant)
@@ -320,6 +333,60 @@ def get_nb_edit_operations(g1, g2, forward_map, backward_map):
# one of the nodes is removed, thus the edge is removed.
if forward_map[idx1] == np.inf or forward_map[idx2] == np.inf:
n_er += 1
# corresponding edge is in g2.
elif (forward_map[idx1], forward_map[idx2]) in g2.edges():
nb_edges2_cnted += 1
# edge labels are different.
if g2.edges[((forward_map[idx1], forward_map[idx2]))]['bond_type'] \
!= g1.edges[(n1, n2)]['bond_type']:
n_es += 1
elif (forward_map[idx2], forward_map[idx1]) in g2.edges():
nb_edges2_cnted += 1
# edge labels are different.
if g2.edges[((forward_map[idx2], forward_map[idx1]))]['bond_type'] \
!= g1.edges[(n1, n2)]['bond_type']:
n_es += 1
# corresponding nodes are in g2, however the edge is removed.
else:
n_er += 1
n_ei = nx.number_of_edges(g2) - nb_edges2_cnted
return n_vi, n_vr, n_vs, n_ei, n_er, n_es


def get_nb_edit_operations_letter(g1, g2, forward_map, backward_map):
"""Compute the number of each edit operations.
"""
n_vi = 0
n_vr = 0
n_vs = 0
sod_vs = 0
n_ei = 0
n_er = 0
nodes1 = [n for n in g1.nodes()]
for i, map_i in enumerate(forward_map):
if map_i == np.inf:
n_vr += 1
else:
n_vs += 1
diff_x = float(g1.nodes[i]['x']) - float(g2.nodes[map_i]['x'])
diff_y = float(g1.nodes[i]['y']) - float(g2.nodes[map_i]['y'])
sod_vs += np.sqrt(np.square(diff_x) + np.square(diff_y))
for map_i in backward_map:
if map_i == np.inf:
n_vi += 1
# idx_nodes1 = range(0, len(node1))
edges1 = [e for e in g1.edges()]
nb_edges2_cnted = 0
for n1, n2 in edges1:
idx1 = nodes1.index(n1)
idx2 = nodes1.index(n2)
# one of the nodes is removed, thus the edge is removed.
if forward_map[idx1] == np.inf or forward_map[idx2] == np.inf:
n_er += 1
# corresponding edge is in g2. Edge label is not considered.
elif (forward_map[idx1], forward_map[idx2]) in g2.edges() or \
(forward_map[idx2], forward_map[idx1]) in g2.edges():
@@ -329,4 +396,8 @@ def get_nb_edit_operations(g1, g2, forward_map, backward_map):
n_er += 1
n_ei = nx.number_of_edges(g2) - nb_edges2_cnted
return n_vi, n_vr, n_vs, n_ei, n_er, n_es
return n_vi, n_vr, n_vs, sod_vs, n_ei, n_er


if __name__ == '__main__':
print('check test_ged.py')

+ 5
- 4
preimage/iam.py View File

@@ -436,7 +436,8 @@ def iam_upgraded(Gn_median, Gn_candidate, c_ei=3, c_er=3, c_es=1, ite_max=50,
return G_gen_median_list, sod_gen_median, sod_list, G_set_median_list, sod_set_median


def iam_bash(Gn_names, edit_cost_constant, dataset='monoterpenoides',
def iam_bash(Gn_names, edit_cost_constant, cost='CONSTANT',
dataset='monoterpenoides',
graph_dir='/media/ljia/DATA/research-repo/codes/Linlin/py-graph/datasets/monoterpenoides/'):
"""Compute the iam by c++ implementation (gedlib) through bash.
"""
@@ -467,12 +468,12 @@ def iam_bash(Gn_names, edit_cost_constant, dataset='monoterpenoides',
# graph_dir = '/media/ljia/DATA/research-repo/codes/others/gedlib/tests_linlin/generated_datsets/monoterpenoides/gxl'
command = 'GEDLIB_HOME=\'/media/ljia/DATA/research-repo/codes/others/gedlib/gedlib2\'\n'
command = 'GEDLIB_HOME=\'/media/ljia/DATA/research-repo/codes/Linlin/gedlib\'\n'
command += 'LD_LIBRARY_PATH=$LD_LIBRARY_PATH:$GEDLIB_HOME/lib\n'
command += 'export LD_LIBRARY_PATH\n'
command += 'cd \'/media/ljia/DATA/research-repo/codes/others/gedlib/tests_linlin/bin\'\n'
command += './iam_for_python_bash ' + dataset + ' ' + fn_collection \
+ ' \'' + graph_dir + '\' '
+ ' \'' + graph_dir + '\' ' + ' ' + cost + ' '
if edit_cost_constant is None:
command += 'None'
else:
@@ -484,7 +485,7 @@ def iam_bash(Gn_names, edit_cost_constant, dataset='monoterpenoides',
output = stream.readlines()
# print(output)
sod_sm = float(output[0].strip())
sod_gm= float(output[1].strip())
sod_gm = float(output[1].strip())
fname_sm = '/media/ljia/DATA/research-repo/codes/others/gedlib/tests_linlin/output/tmp_ged/set_median.gxl'
fname_gm = '/media/ljia/DATA/research-repo/codes/others/gedlib/tests_linlin/output/tmp_ged/gen_median.gxl'


+ 46
- 18
preimage/test_k_closest_graphs.py View File

@@ -31,8 +31,9 @@ from fitDistance import fit_GED_to_kernel_distance
def median_on_k_closest_graphs(Gn, node_label, edge_label, gkernel, k, fit_method,
graph_dir='/media/ljia/DATA/research-repo/codes/Linlin/py-graph/datasets/monoterpenoides/',
edit_costs=None, group_min=None, dataset='monoterpenoides',
parallel=True):

cost='CONSTANT', parallel=True):
dataset = dataset.lower()
# # compute distances in kernel space.
# dis_mat, _, _, _ = kernel_distance_matrix(Gn, node_label, edge_label,
# Kmatrix=None, gkernel=gkernel)
@@ -50,32 +51,53 @@ def median_on_k_closest_graphs(Gn, node_label, edge_label, gkernel, k, fit_metho
# group_min = (12, 13, 22, 29) # closest w.r.t path kernel
# group_min = (77, 85, 160, 171) # closest w.r.t ged
# group_min = (0,1,2,3,4,5,6,7,8,9,10,11) # closest w.r.t treelet kernel

Gn_median = [Gn[g].copy() for g in group_min]


# fit edit costs.
if fit_method == 'random': # random
edit_cost_constant = random.sample(range(1, 10), 6)
if cost == 'LETTER':
edit_cost_constant = random.sample(range(1, 10), 3)
edit_cost_constant = [item * 0.1 for item in edit_cost_constant]
elif cost == 'LETTER2':
random.seed(time.time())
edit_cost_constant = random.sample(range(1, 10), 5)
# edit_cost_constant = [item * 0.1 for item in edit_cost_constant]
else:
edit_cost_constant = random.sample(range(1, 10), 6)
print('edit costs used:', edit_cost_constant)
elif fit_method == 'expert': # expert
edit_cost_constant = [3, 3, 1, 3, 3, 1]
elif fit_method == 'k-graphs':
itr_max = 6
algo_options = '--threads 8 --initial-solutions 40 --ratio-runs-from-initial-solutions 1'
params_ged = {'lib': 'gedlibpy', 'cost': 'CONSTANT', 'method': 'IPFP',
'algo_options': algo_options, 'stabilizer': None}
if cost == 'LETTER':
init_costs = [0.9, 1.7, 0.75]
elif cost == 'LETTER2':
init_costs = [0.675, 0.675, 0.75, 0.425, 0.425]
else:
init_costs = [3, 3, 1, 3, 3, 1]
algo_options = '--threads 1 --initial-solutions 40 --ratio-runs-from-initial-solutions 1'
params_ged = {'lib': 'gedlibpy', 'cost': cost, 'method': 'IPFP',
'algo_options': algo_options, 'stabilizer': None}
# fit on k-graph subset
edit_cost_constant, _, _, _, _, _, _ = fit_GED_to_kernel_distance(Gn_median,
node_label, edge_label, gkernel, itr_max, params_ged=params_ged, parallel=True)
node_label, edge_label, gkernel, itr_max, params_ged=params_ged,
init_costs=init_costs, dataset=dataset, parallel=True)
elif fit_method == 'whole-dataset':
itr_max = 6
algo_options = '--threads 8 --initial-solutions 40 --ratio-runs-from-initial-solutions 1'
params_ged = {'lib': 'gedlibpy', 'cost': 'CONSTANT', 'method': 'IPFP',
if cost == 'LETTER':
init_costs = [0.9, 1.7, 0.75]
elif cost == 'LETTER2':
init_costs = [0.675, 0.675, 0.75, 0.425, 0.425]
else:
init_costs = [3, 3, 1, 3, 3, 1]
algo_options = '--threads 1 --initial-solutions 40 --ratio-runs-from-initial-solutions 1'
params_ged = {'lib': 'gedlibpy', 'cost': cost, 'method': 'IPFP',
'algo_options': algo_options, 'stabilizer': None}
# fit on all subset
edit_cost_constant, _, _, _, _, _, _ = fit_GED_to_kernel_distance(Gn,
node_label, edge_label, gkernel, itr_max, params_ged=params_ged, parallel=True)
node_label, edge_label, gkernel, itr_max, params_ged=params_ged,
init_costs=init_costs, dataset=dataset, parallel=True)
elif fit_method == 'precomputed':
edit_cost_constant = edit_costs

@@ -83,14 +105,17 @@ def median_on_k_closest_graphs(Gn, node_label, edge_label, gkernel, k, fit_metho
# compute set median and gen median using IAM (C++ through bash).
group_fnames = [Gn[g].graph['filename'] for g in group_min]
sod_sm, sod_gm, fname_sm, fname_gm = iam_bash(group_fnames, edit_cost_constant,
graph_dir=graph_dir, dataset=dataset)
cost=cost, graph_dir=graph_dir,
dataset=dataset)
# compute distances in kernel space.
Gn_median = [Gn[g].copy() for g in group_min]
set_median = loadGXL(fname_sm)
gen_median = loadGXL(fname_gm)
if dataset == 'Letter':
# print(gen_median.nodes(data=True))
# print(gen_median.edges(data=True))
if dataset == 'letter':
for g in Gn_median:
reform_attributes(g)
reform_attributes(set_median)
@@ -98,16 +123,19 @@ def median_on_k_closest_graphs(Gn, node_label, edge_label, gkernel, k, fit_metho
# compute distance in kernel space for set median.
Kmatrix_sm = compute_kernel([set_median] + Gn_median, gkernel,
None if dataset == 'Letter' else 'chem',
None if dataset == 'Letter' else 'valence',
None if dataset == 'letter' else 'chem',
None if dataset == 'letter' else 'valence',
False)
dis_k_sm = dis_gstar(0, range(1, 1+len(Gn_median)),
[1 / len(Gn_median)] * len(Gn_median), Kmatrix_sm, withterm3=False)
# print(gen_median.nodes(data=True))
# print(gen_median.edges(data=True))
# print(set_median.nodes(data=True))
# print(set_median.edges(data=True))
# compute distance in kernel space for generalized median.
Kmatrix_gm = compute_kernel([gen_median] + Gn_median, gkernel,
None if dataset == 'Letter' else 'chem',
None if dataset == 'Letter' else 'valence',
None if dataset == 'letter' else 'chem',
None if dataset == 'letter' else 'valence',
False)
dis_k_gm = dis_gstar(0, range(1, 1+len(Gn_median)),
[1 / len(Gn_median)] * len(Gn_median), Kmatrix_gm, withterm3=False)


+ 2
- 2
preimage/utils.py View File

@@ -61,8 +61,8 @@ def compute_kernel(Gn, graph_kernel, node_label, edge_label, verbose):
{'symb': deltakernel, 'nsymb': gaussiankernel, 'mix': mixkernel},
n_jobs=multiprocessing.cpu_count(), verbose=verbose)
elif graph_kernel == 'treeletkernel':
# pkernel = functools.partial(polynomialkernel, d=2, c=1e5)
pkernel = functools.partial(gaussiankernel, gamma=1e-6)
pkernel = functools.partial(polynomialkernel, d=2, c=1e5)
# pkernel = functools.partial(gaussiankernel, gamma=1e-6)
mixkernel = functools.partial(kernelproduct, deltakernel, gaussiankernel)
Kmatrix, _ = treeletkernel(Gn, node_label=node_label, edge_label=edge_label,
sub_kernel=pkernel,


+ 281
- 51
preimage/xp_letter_h.py View File

@@ -19,11 +19,13 @@ from preimage.test_k_closest_graphs import median_on_k_closest_graphs, reform_at
from preimage.utils import get_same_item_indices
from preimage.find_best_k import getRelations

def xp_letter_h():
ds = {'name': 'Letter-high',
'dataset': '/media/ljia/DATA/research-repo/codes/others/gedlib/tests_linlin/data/collections/Letter.xml',
def xp_letter_h_LETTER2_cost():
ds = {'dataset': '/media/ljia/DATA/research-repo/codes/others/gedlib/tests_linlin/data/collections/Letter.xml',
'graph_dir': '/media/ljia/DATA/research-repo/codes/others/gedlib/tests_linlin/data/datasets/Letter/HIGH/'} # node/edge symb
Gn, y_all = loadDataset(ds['dataset'], extra_params=ds['graph_dir'])
for G in Gn:
reform_attributes(G)
# ds = {'name': 'Letter-high',
# 'dataset': '../datasets/Letter-high/Letter-high_A.txt'} # node/edge symb
# Gn, y_all = loadDataset(ds['dataset'])
@@ -33,32 +35,35 @@ def xp_letter_h():
edge_label = None
ds_name = 'letter-h'
dir_output = 'results/xp_letter_h/'
save_results = True
cost = 'LETTER2'
repeats = 1
# k_list = range(2, 11)
k_list = [150]
fit_method = 'precomputed'
fit_method = 'k-graphs'
# get indices by classes.
y_idx = get_same_item_indices(y_all)
# create result files.
fn_output_detail = 'results_detail.' + fit_method + '.csv'
f_detail = open(dir_output + fn_output_detail, 'a')
csv.writer(f_detail).writerow(['dataset', 'graph kernel', 'fit method', 'k',
'target', 'repeat', 'SOD SM', 'SOD GM', 'dis_k SM', 'dis_k GM',
'min dis_k gi', 'SOD SM -> GM', 'dis_k SM -> GM', 'dis_k gi -> SM',
'dis_k gi -> GM', 'median set'])
f_detail.close()
fn_output_summary = 'results_summary.csv'
f_summary = open(dir_output + fn_output_summary, 'a')
csv.writer(f_summary).writerow(['dataset', 'graph kernel', 'fit method', 'k',
'target', 'SOD SM', 'SOD GM', 'dis_k SM', 'dis_k GM',
'min dis_k gi', 'SOD SM -> GM', 'dis_k SM -> GM', 'dis_k gi -> SM',
'dis_k gi -> GM', '# SOD SM -> GM', '# dis_k SM -> GM',
'# dis_k gi -> SM', '# dis_k gi -> GM', 'repeats better SOD SM -> GM',
'repeats better dis_k SM -> GM', 'repeats better dis_k gi -> SM',
'repeats better dis_k gi -> GM'])
f_summary.close()
if save_results:
# create result files.
fn_output_detail = 'results_detail.' + ds_name + '.' + gkernel + '.' + fit_method + '.csv'
f_detail = open(dir_output + fn_output_detail, 'a')
csv.writer(f_detail).writerow(['dataset', 'graph kernel', 'fit method', 'k',
'target', 'repeat', 'SOD SM', 'SOD GM', 'dis_k SM', 'dis_k GM',
'min dis_k gi', 'SOD SM -> GM', 'dis_k SM -> GM', 'dis_k gi -> SM',
'dis_k gi -> GM', 'median set'])
f_detail.close()
fn_output_summary = 'results_summary.' + ds_name + '.' + gkernel + '.' + fit_method + '.csv'
f_summary = open(dir_output + fn_output_summary, 'a')
csv.writer(f_summary).writerow(['dataset', 'graph kernel', 'fit method', 'k',
'target', 'SOD SM', 'SOD GM', 'dis_k SM', 'dis_k GM',
'min dis_k gi', 'SOD SM -> GM', 'dis_k SM -> GM', 'dis_k gi -> SM',
'dis_k gi -> GM', '# SOD SM -> GM', '# dis_k SM -> GM',
'# dis_k gi -> SM', '# dis_k gi -> GM', 'repeats better SOD SM -> GM',
'repeats better dis_k SM -> GM', 'repeats better dis_k gi -> SM',
'repeats better dis_k gi -> GM'])
f_summary.close()
random.seed(1)
rdn_seed_list = random.sample(range(0, repeats * 100), repeats)
@@ -82,11 +87,11 @@ def xp_letter_h():
for i, (y, values) in enumerate(y_idx.items()):
print('\ny =', y)
# y = 'I'
# y = 'F'
# values = y_idx[y]
# values = values[0:10]
# k = len(values)
# k = kkk
k = len(values)
sod_sm_list = []
sod_gm_list = []
@@ -114,20 +119,21 @@ def xp_letter_h():
= median_on_k_closest_graphs(Gn_median, node_label, edge_label,
gkernel, k, fit_method=fit_method, graph_dir=ds['graph_dir'],
edit_costs=None, group_min=median_set_idx_idx,
dataset='Letter', parallel=False)
dataset='Letter', cost=cost, parallel=False)
# write result detail.
sod_sm2gm = getRelations(np.sign(sod_gm - sod_sm))
dis_k_sm2gm = getRelations(np.sign(dis_k_gm - dis_k_sm))
dis_k_gi2sm = getRelations(np.sign(dis_k_sm - dis_k_gi_min))
dis_k_gi2gm = getRelations(np.sign(dis_k_gm - dis_k_gi_min))
f_detail = open(dir_output + fn_output_detail, 'a')
csv.writer(f_detail).writerow([ds_name, gkernel, fit_method, k,
y, repeat,
sod_sm, sod_gm, dis_k_sm, dis_k_gm,
dis_k_gi_min, sod_sm2gm, dis_k_sm2gm, dis_k_gi2sm,
dis_k_gi2gm, median_set_idx])
f_detail.close()
if save_results:
f_detail = open(dir_output + fn_output_detail, 'a')
csv.writer(f_detail).writerow([ds_name, gkernel, fit_method, k,
y, repeat,
sod_sm, sod_gm, dis_k_sm, dis_k_gm,
dis_k_gi_min, sod_sm2gm, dis_k_sm2gm, dis_k_gi2sm,
dis_k_gi2gm, median_set_idx])
f_detail.close()
# compute result summary.
sod_sm_list.append(sod_sm)
@@ -170,14 +176,17 @@ def xp_letter_h():
# save median graphs.
fname_sm = '/media/ljia/DATA/research-repo/codes/others/gedlib/tests_linlin/output/tmp_ged/set_median.gxl'
fn_pre_sm_new = dir_output + 'medians/set_median.k' + str(int(k)) + '.y' + y + '.repeat' + str(repeat)
fn_pre_sm_new = dir_output + 'medians/set_median.' + fit_method \
+ '.k' + str(int(k)) + '.y' + y + '.repeat' + str(repeat)
copyfile(fname_sm, fn_pre_sm_new + '.gxl')
fname_gm = '/media/ljia/DATA/research-repo/codes/others/gedlib/tests_linlin/output/tmp_ged/gen_median.gxl'
fn_pre_gm_new = dir_output + 'medians/gen_median.k' + str(int(k)) + '.y' + y + '.repeat' + str(repeat)
fn_pre_gm_new = dir_output + 'medians/gen_median.' + fit_method \
+ '.k' + str(int(k)) + '.y' + y + '.repeat' + str(repeat)
copyfile(fname_gm, fn_pre_gm_new + '.gxl')
G_best_kernel = Gn_median[idx_dis_k_gi_min].copy()
reform_attributes(G_best_kernel)
fn_pre_g_best_kernel = dir_output + 'medians/g_best_kernel.k' + str(int(k)) + '.y' + y + '.repeat' + str(repeat)
fn_pre_g_best_kernel = dir_output + 'medians/g_best_kernel.' + fit_method \
+ '.k' + str(int(k)) + '.y' + y + '.repeat' + str(repeat)
saveGXL(G_best_kernel, fn_pre_g_best_kernel + '.gxl', method='gedlib-letter')
# plot median graphs.
@@ -197,16 +206,17 @@ def xp_letter_h():
dis_k_sm2gm_mean = getRelations(np.sign(dis_k_gm_mean_list[-1] - dis_k_sm_mean_list[-1]))
dis_k_gi2sm_mean = getRelations(np.sign(dis_k_sm_mean_list[-1] - dis_k_gi_min_mean_list[-1]))
dis_k_gi2gm_mean = getRelations(np.sign(dis_k_gm_mean_list[-1] - dis_k_gi_min_mean_list[-1]))
f_summary = open(dir_output + fn_output_summary, 'a')
csv.writer(f_summary).writerow([ds_name, gkernel, fit_method, k, y,
sod_sm_mean_list[-1], sod_gm_mean_list[-1],
dis_k_sm_mean_list[-1], dis_k_gm_mean_list[-1],
dis_k_gi_min_mean_list[-1], sod_sm2gm_mean, dis_k_sm2gm_mean,
dis_k_gi2sm_mean, dis_k_gi2gm_mean, nb_sod_sm2gm,
nb_dis_k_sm2gm, nb_dis_k_gi2sm, nb_dis_k_gi2gm,
repeats_better_sod_sm2gm, repeats_better_dis_k_sm2gm,
repeats_better_dis_k_gi2sm, repeats_better_dis_k_gi2gm])
f_summary.close()
if save_results:
f_summary = open(dir_output + fn_output_summary, 'a')
csv.writer(f_summary).writerow([ds_name, gkernel, fit_method, k, y,
sod_sm_mean_list[-1], sod_gm_mean_list[-1],
dis_k_sm_mean_list[-1], dis_k_gm_mean_list[-1],
dis_k_gi_min_mean_list[-1], sod_sm2gm_mean, dis_k_sm2gm_mean,
dis_k_gi2sm_mean, dis_k_gi2gm_mean, nb_sod_sm2gm,
nb_dis_k_sm2gm, nb_dis_k_gi2sm, nb_dis_k_gi2gm,
repeats_better_sod_sm2gm, repeats_better_dis_k_sm2gm,
repeats_better_dis_k_gi2sm, repeats_better_dis_k_gi2gm])
f_summary.close()

# write result summary for each letter.
@@ -219,13 +229,232 @@ def xp_letter_h():
dis_k_sm2gm_mean = getRelations(np.sign(dis_k_gm_mean - dis_k_sm_mean))
dis_k_gi2sm_mean = getRelations(np.sign(dis_k_sm_mean - dis_k_gi_min_mean))
dis_k_gi2gm_mean = getRelations(np.sign(dis_k_gm_mean - dis_k_gi_min_mean))
if save_results:
f_summary = open(dir_output + fn_output_summary, 'a')
csv.writer(f_summary).writerow([ds_name, gkernel, fit_method, k, 'all',
sod_sm_mean, sod_gm_mean, dis_k_sm_mean, dis_k_gm_mean,
dis_k_gi_min_mean, sod_sm2gm_mean, dis_k_sm2gm_mean,
dis_k_gi2sm_mean, dis_k_gi2gm_mean])
f_summary.close()
print('\ncomplete.')


def xp_letter_h():
ds = {'dataset': '/media/ljia/DATA/research-repo/codes/others/gedlib/tests_linlin/data/collections/Letter.xml',
'graph_dir': '/media/ljia/DATA/research-repo/codes/others/gedlib/tests_linlin/data/datasets/Letter/HIGH/'} # node/edge symb
Gn, y_all = loadDataset(ds['dataset'], extra_params=ds['graph_dir'])
for G in Gn:
reform_attributes(G)
# ds = {'name': 'Letter-high',
# 'dataset': '../datasets/Letter-high/Letter-high_A.txt'} # node/edge symb
# Gn, y_all = loadDataset(ds['dataset'])
# Gn = Gn[0:50]
gkernel = 'structuralspkernel'
node_label = None
edge_label = None
ds_name = 'letter-h'
dir_output = 'results/xp_letter_h/'
save_results = False
repeats = 1
# k_list = range(2, 11)
k_list = [150]
fit_method = 'k-graphs'
# get indices by classes.
y_idx = get_same_item_indices(y_all)
if save_results:
# create result files.
fn_output_detail = 'results_detail.' + ds_name + '.' + gkernel + '.' + fit_method + '.csv'
f_detail = open(dir_output + fn_output_detail, 'a')
csv.writer(f_detail).writerow(['dataset', 'graph kernel', 'fit method', 'k',
'target', 'repeat', 'SOD SM', 'SOD GM', 'dis_k SM', 'dis_k GM',
'min dis_k gi', 'SOD SM -> GM', 'dis_k SM -> GM', 'dis_k gi -> SM',
'dis_k gi -> GM', 'median set'])
f_detail.close()
fn_output_summary = 'results_summary.' + ds_name + '.' + gkernel + '.' + fit_method + '.csv'
f_summary = open(dir_output + fn_output_summary, 'a')
csv.writer(f_summary).writerow([ds_name, gkernel, fit_method, k, 'all',
sod_sm_mean, sod_gm_mean, dis_k_sm_mean, dis_k_gm_mean,
dis_k_gi_min_mean, sod_sm2gm_mean, dis_k_sm2gm_mean,
dis_k_gi2sm_mean, dis_k_gi2gm_mean])
csv.writer(f_summary).writerow(['dataset', 'graph kernel', 'fit method', 'k',
'target', 'SOD SM', 'SOD GM', 'dis_k SM', 'dis_k GM',
'min dis_k gi', 'SOD SM -> GM', 'dis_k SM -> GM', 'dis_k gi -> SM',
'dis_k gi -> GM', '# SOD SM -> GM', '# dis_k SM -> GM',
'# dis_k gi -> SM', '# dis_k gi -> GM', 'repeats better SOD SM -> GM',
'repeats better dis_k SM -> GM', 'repeats better dis_k gi -> SM',
'repeats better dis_k gi -> GM'])
f_summary.close()
random.seed(1)
rdn_seed_list = random.sample(range(0, repeats * 100), repeats)
for k in k_list:
print('\n--------- k =', k, '----------')
sod_sm_mean_list = []
sod_gm_mean_list = []
dis_k_sm_mean_list = []
dis_k_gm_mean_list = []
dis_k_gi_min_mean_list = []
# nb_sod_sm2gm = [0, 0, 0]
# nb_dis_k_sm2gm = [0, 0, 0]
# nb_dis_k_gi2sm = [0, 0, 0]
# nb_dis_k_gi2gm = [0, 0, 0]
# repeats_better_sod_sm2gm = []
# repeats_better_dis_k_sm2gm = []
# repeats_better_dis_k_gi2sm = []
# repeats_better_dis_k_gi2gm = []
for i, (y, values) in enumerate(y_idx.items()):
print('\ny =', y)
# y = 'N'
# values = y_idx[y]
# values = values[0:10]
k = len(values)
sod_sm_list = []
sod_gm_list = []
dis_k_sm_list = []
dis_k_gm_list = []
dis_k_gi_min_list = []
nb_sod_sm2gm = [0, 0, 0]
nb_dis_k_sm2gm = [0, 0, 0]
nb_dis_k_gi2sm = [0, 0, 0]
nb_dis_k_gi2gm = [0, 0, 0]
repeats_better_sod_sm2gm = []
repeats_better_dis_k_sm2gm = []
repeats_better_dis_k_gi2sm = []
repeats_better_dis_k_gi2gm = []
for repeat in range(repeats):
print('\nrepeat =', repeat)
random.seed(rdn_seed_list[repeat])
median_set_idx_idx = random.sample(range(0, len(values)), k)
median_set_idx = [values[idx] for idx in median_set_idx_idx]
print('median set: ', median_set_idx)
Gn_median = [Gn[g] for g in values]
sod_sm, sod_gm, dis_k_sm, dis_k_gm, dis_k_gi, dis_k_gi_min, idx_dis_k_gi_min \
= median_on_k_closest_graphs(Gn_median, node_label, edge_label,
gkernel, k, fit_method=fit_method, graph_dir=ds['graph_dir'],
edit_costs=None, group_min=median_set_idx_idx,
dataset='Letter', parallel=False)
# write result detail.
sod_sm2gm = getRelations(np.sign(sod_gm - sod_sm))
dis_k_sm2gm = getRelations(np.sign(dis_k_gm - dis_k_sm))
dis_k_gi2sm = getRelations(np.sign(dis_k_sm - dis_k_gi_min))
dis_k_gi2gm = getRelations(np.sign(dis_k_gm - dis_k_gi_min))
if save_results:
f_detail = open(dir_output + fn_output_detail, 'a')
csv.writer(f_detail).writerow([ds_name, gkernel, fit_method, k,
y, repeat,
sod_sm, sod_gm, dis_k_sm, dis_k_gm,
dis_k_gi_min, sod_sm2gm, dis_k_sm2gm, dis_k_gi2sm,
dis_k_gi2gm, median_set_idx])
f_detail.close()
# compute result summary.
sod_sm_list.append(sod_sm)
sod_gm_list.append(sod_gm)
dis_k_sm_list.append(dis_k_sm)
dis_k_gm_list.append(dis_k_gm)
dis_k_gi_min_list.append(dis_k_gi_min)
# # SOD SM -> GM
if sod_sm > sod_gm:
nb_sod_sm2gm[0] += 1
repeats_better_sod_sm2gm.append(repeat)
elif sod_sm == sod_gm:
nb_sod_sm2gm[1] += 1
elif sod_sm < sod_gm:
nb_sod_sm2gm[2] += 1
# # dis_k SM -> GM
if dis_k_sm > dis_k_gm:
nb_dis_k_sm2gm[0] += 1
repeats_better_dis_k_sm2gm.append(repeat)
elif dis_k_sm == dis_k_gm:
nb_dis_k_sm2gm[1] += 1
elif dis_k_sm < dis_k_gm:
nb_dis_k_sm2gm[2] += 1
# # dis_k gi -> SM
if dis_k_gi_min > dis_k_sm:
nb_dis_k_gi2sm[0] += 1
repeats_better_dis_k_gi2sm.append(repeat)
elif dis_k_gi_min == dis_k_sm:
nb_dis_k_gi2sm[1] += 1
elif dis_k_gi_min < dis_k_sm:
nb_dis_k_gi2sm[2] += 1
# # dis_k gi -> GM
if dis_k_gi_min > dis_k_gm:
nb_dis_k_gi2gm[0] += 1
repeats_better_dis_k_gi2gm.append(repeat)
elif dis_k_gi_min == dis_k_gm:
nb_dis_k_gi2gm[1] += 1
elif dis_k_gi_min < dis_k_gm:
nb_dis_k_gi2gm[2] += 1
# save median graphs.
fname_sm = '/media/ljia/DATA/research-repo/codes/others/gedlib/tests_linlin/output/tmp_ged/set_median.gxl'
fn_pre_sm_new = dir_output + 'medians/set_median.' + fit_method \
+ '.k' + str(int(k)) + '.y' + y + '.repeat' + str(repeat)
copyfile(fname_sm, fn_pre_sm_new + '.gxl')
fname_gm = '/media/ljia/DATA/research-repo/codes/others/gedlib/tests_linlin/output/tmp_ged/gen_median.gxl'
fn_pre_gm_new = dir_output + 'medians/gen_median.' + fit_method \
+ '.k' + str(int(k)) + '.y' + y + '.repeat' + str(repeat)
copyfile(fname_gm, fn_pre_gm_new + '.gxl')
G_best_kernel = Gn_median[idx_dis_k_gi_min].copy()
reform_attributes(G_best_kernel)
fn_pre_g_best_kernel = dir_output + 'medians/g_best_kernel.' + fit_method \
+ '.k' + str(int(k)) + '.y' + y + '.repeat' + str(repeat)
saveGXL(G_best_kernel, fn_pre_g_best_kernel + '.gxl', method='gedlib-letter')
# plot median graphs.
set_median = loadGXL(fn_pre_sm_new + '.gxl')
gen_median = loadGXL(fn_pre_gm_new + '.gxl')
draw_Letter_graph(set_median, fn_pre_sm_new)
draw_Letter_graph(gen_median, fn_pre_gm_new)
draw_Letter_graph(G_best_kernel, fn_pre_g_best_kernel)
# write result summary for each letter.
sod_sm_mean_list.append(np.mean(sod_sm_list))
sod_gm_mean_list.append(np.mean(sod_gm_list))
dis_k_sm_mean_list.append(np.mean(dis_k_sm_list))
dis_k_gm_mean_list.append(np.mean(dis_k_gm_list))
dis_k_gi_min_mean_list.append(np.mean(dis_k_gi_min_list))
sod_sm2gm_mean = getRelations(np.sign(sod_gm_mean_list[-1] - sod_sm_mean_list[-1]))
dis_k_sm2gm_mean = getRelations(np.sign(dis_k_gm_mean_list[-1] - dis_k_sm_mean_list[-1]))
dis_k_gi2sm_mean = getRelations(np.sign(dis_k_sm_mean_list[-1] - dis_k_gi_min_mean_list[-1]))
dis_k_gi2gm_mean = getRelations(np.sign(dis_k_gm_mean_list[-1] - dis_k_gi_min_mean_list[-1]))
if save_results:
f_summary = open(dir_output + fn_output_summary, 'a')
csv.writer(f_summary).writerow([ds_name, gkernel, fit_method, k, y,
sod_sm_mean_list[-1], sod_gm_mean_list[-1],
dis_k_sm_mean_list[-1], dis_k_gm_mean_list[-1],
dis_k_gi_min_mean_list[-1], sod_sm2gm_mean, dis_k_sm2gm_mean,
dis_k_gi2sm_mean, dis_k_gi2gm_mean, nb_sod_sm2gm,
nb_dis_k_sm2gm, nb_dis_k_gi2sm, nb_dis_k_gi2gm,
repeats_better_sod_sm2gm, repeats_better_dis_k_sm2gm,
repeats_better_dis_k_gi2sm, repeats_better_dis_k_gi2gm])
f_summary.close()

# write result summary for each letter.
sod_sm_mean = np.mean(sod_sm_mean_list)
sod_gm_mean = np.mean(sod_gm_mean_list)
dis_k_sm_mean = np.mean(dis_k_sm_mean_list)
dis_k_gm_mean = np.mean(dis_k_gm_mean_list)
dis_k_gi_min_mean = np.mean(dis_k_gi_min_list)
sod_sm2gm_mean = getRelations(np.sign(sod_gm_mean - sod_sm_mean))
dis_k_sm2gm_mean = getRelations(np.sign(dis_k_gm_mean - dis_k_sm_mean))
dis_k_gi2sm_mean = getRelations(np.sign(dis_k_sm_mean - dis_k_gi_min_mean))
dis_k_gi2gm_mean = getRelations(np.sign(dis_k_gm_mean - dis_k_gi_min_mean))
if save_results:
f_summary = open(dir_output + fn_output_summary, 'a')
csv.writer(f_summary).writerow([ds_name, gkernel, fit_method, k, 'all',
sod_sm_mean, sod_gm_mean, dis_k_sm_mean, dis_k_gm_mean,
dis_k_gi_min_mean, sod_sm2gm_mean, dis_k_sm2gm_mean,
dis_k_gi2sm_mean, dis_k_gi2gm_mean])
f_summary.close()
print('\ncomplete.')
@@ -243,4 +472,5 @@ def draw_Letter_graph(graph, file_prefix):

if __name__ == "__main__":
xp_letter_h()
# xp_letter_h()
xp_letter_h_LETTER2_cost()

Loading…
Cancel
Save