diff --git a/preimage/fitDistance.py b/preimage/fitDistance.py index f07c3f2..458a102 100644 --- a/preimage/fitDistance.py +++ b/preimage/fitDistance.py @@ -15,24 +15,28 @@ import time import random from scipy import optimize +from scipy.optimize import minimize import cvxpy as cp import sys -#sys.path.insert(0, "../") -from ged import GED, get_nb_edit_operations -from utils import kernel_distance_matrix +sys.path.insert(0, "../") +from preimage.ged import GED, get_nb_edit_operations, get_nb_edit_operations_letter +from preimage.utils import kernel_distance_matrix -def fit_GED_to_kernel_distance(Gn, node_label, edge_label, gkernel, itr_max, k=4, +def fit_GED_to_kernel_distance(Gn, node_label, edge_label, gkernel, itr_max, params_ged={'lib': 'gedlibpy', 'cost': 'CONSTANT', 'method': 'IPFP', 'stabilizer': None}, init_costs=[3, 3, 1, 3, 3, 1], + dataset='monoterpenoides', parallel=True): + dataset = dataset.lower() + # c_vi, c_vr, c_vs, c_ei, c_er, c_es or parts of them. # random.seed(1) # cost_rdm = random.sample(range(1, 10), 6) # init_costs = cost_rdm + [0] # init_costs = cost_rdm - init_costs = [3, 3, 1, 3, 3, 1] +# init_costs = [3, 3, 1, 3, 3, 1] # init_costs = [i * 0.01 for i in cost_rdm] + [0] # init_costs = [0.2, 0.2, 0.2, 0.2, 0.2, 0] # init_costs = [0, 0, 0.9544, 0.026, 0.0196, 0] @@ -51,8 +55,10 @@ def fit_GED_to_kernel_distance(Gn, node_label, edge_label, gkernel, itr_max, k=4 # init ged. print('\ninitial:') time0 = time.time() + params_ged['dataset'] = dataset params_ged['edit_cost_constant'] = init_costs ged_vec_init, ged_mat, n_edit_operations = compute_geds(Gn, params_ged, + dataset, parallel=parallel) residual_list = [np.sqrt(np.sum(np.square(np.array(ged_vec_init) - dis_k_vec)))] time_list = [time.time() - time0] @@ -67,20 +73,21 @@ def fit_GED_to_kernel_distance(Gn, node_label, edge_label, gkernel, itr_max, k=4 time0 = time.time() # "fit" geds to distances in feature space by tuning edit costs using the # Least Squares Method. - edit_costs_new, residual = update_costs(nb_cost_mat, dis_k_vec) + edit_costs_new, residual = update_costs(nb_cost_mat, dis_k_vec, + dataset=dataset, cost=params_ged['cost']) for i in range(len(edit_costs_new)): + if -1e-9 <= edit_costs_new[i] <= 1e-9: + edit_costs_new[i] = 0 if edit_costs_new[i] < 0: - if edit_costs_new[i] > -1e-9: - edit_costs_new[i] = 0 - else: - raise ValueError('The edit cost is negative.') + raise ValueError('The edit cost is negative.') # for i in range(len(edit_costs_new)): # if edit_costs_new[i] < 0: # edit_costs_new[i] = 0 # compute new GEDs and numbers of edit operations. - params_ged['edit_cost_constant'] = edit_costs_new - ged_vec, ged_mat, n_edit_operations = compute_geds(Gn, params_ged, + params_ged['edit_cost_constant'] = edit_costs_new # np.array([edit_costs_new[0], edit_costs_new[1], 0.75]) + ged_vec, ged_mat, n_edit_operations = compute_geds(Gn, params_ged, + dataset, parallel=parallel) residual_list.append(np.sqrt(np.sum(np.square(np.array(ged_vec) - dis_k_vec)))) time_list.append(time.time() - time0) @@ -94,7 +101,8 @@ def fit_GED_to_kernel_distance(Gn, node_label, edge_label, gkernel, itr_max, k=4 time_list, nb_cost_mat_list -def compute_geds(Gn, params_ged, parallel=False): +def compute_geds(Gn, params_ged, dataset, parallel=False): + get_nb_eo = get_nb_edit_operations_letter if dataset == 'letter' else get_nb_edit_operations ged_mat = np.zeros((len(Gn), len(Gn))) if parallel: # print('parallel') @@ -112,7 +120,7 @@ def compute_geds(Gn, params_ged, parallel=False): def init_worker(gn_toshare): global G_gn G_gn = gn_toshare - do_partial = partial(_wrapper_compute_ged_parallel, params_ged) + do_partial = partial(_wrapper_compute_ged_parallel, params_ged, get_nb_eo) pool = Pool(processes=n_jobs, initializer=init_worker, initargs=(Gn,)) iterator = tqdm(pool.imap_unordered(do_partial, itr, chunksize), desc='computing GEDs', file=sys.stdout) @@ -138,26 +146,146 @@ def compute_geds(Gn, params_ged, parallel=False): ged_vec.append(dis) ged_mat[i][j] = dis ged_mat[j][i] = dis - n_eo_tmp = get_nb_edit_operations(Gn[i], Gn[j], pi_forward, pi_backward) + n_eo_tmp = get_nb_eo(Gn[i], Gn[j], pi_forward, pi_backward) n_edit_operations.append(n_eo_tmp) return ged_vec, ged_mat, n_edit_operations -def _wrapper_compute_ged_parallel(params_ged, itr): +def _wrapper_compute_ged_parallel(params_ged, get_nb_eo, itr): i = itr[0] j = itr[1] - dis, n_eo_tmp = _compute_ged_parallel(G_gn[i], G_gn[j], params_ged) + dis, n_eo_tmp = _compute_ged_parallel(G_gn[i], G_gn[j], params_ged, get_nb_eo) return i, j, dis, n_eo_tmp -def _compute_ged_parallel(g1, g2, params_ged): +def _compute_ged_parallel(g1, g2, params_ged, get_nb_eo): dis, pi_forward, pi_backward = GED(g1, g2, **params_ged) - n_eo_tmp = get_nb_edit_operations(g1, g2, pi_forward, pi_backward) + n_eo_tmp = get_nb_eo(g1, g2, pi_forward, pi_backward) # [0,0,0,0,0,0] return dis, n_eo_tmp -def update_costs(nb_cost_mat, dis_k_vec): +def update_costs(nb_cost_mat, dis_k_vec, dataset='monoterpenoides', + cost='CONSTANT', rw_constraints='2constraints'): + if dataset.lower() == 'letter': + if cost == 'LETTER': + pass +# # method 1: set alpha automatically, just tune c_vir and c_eir by +# # LMS using cvxpy. +# alpha = 0.5 +# coeff = 100 # np.max(alpha * nb_cost_mat[:,4] / dis_k_vec) +## if np.count_nonzero(nb_cost_mat[:,4]) == 0: +## alpha = 0.75 +## else: +## alpha = np.min([dis_k_vec / c_vs for c_vs in nb_cost_mat[:,4] if c_vs != 0]) +## alpha = alpha * 0.99 +# param_vir = alpha * (nb_cost_mat[:,0] + nb_cost_mat[:,1]) +# param_eir = (1 - alpha) * (nb_cost_mat[:,4] + nb_cost_mat[:,5]) +# nb_cost_mat_new = np.column_stack((param_vir, param_eir)) +# dis_new = coeff * dis_k_vec - alpha * nb_cost_mat[:,3] +# +# x = cp.Variable(nb_cost_mat_new.shape[1]) +# cost = cp.sum_squares(nb_cost_mat_new * x - dis_new) +# constraints = [x >= [0.0 for i in range(nb_cost_mat_new.shape[1])]] +# prob = cp.Problem(cp.Minimize(cost), constraints) +# prob.solve() +# edit_costs_new = x.value +# edit_costs_new = np.array([edit_costs_new[0], edit_costs_new[1], alpha]) +# residual = np.sqrt(prob.value) + +# # method 2: tune c_vir, c_eir and alpha by nonlinear programming by +# # scipy.optimize.minimize. +# w0 = nb_cost_mat[:,0] + nb_cost_mat[:,1] +# w1 = nb_cost_mat[:,4] + nb_cost_mat[:,5] +# w2 = nb_cost_mat[:,3] +# w3 = dis_k_vec +# func_min = lambda x: np.sum((w0 * x[0] * x[3] + w1 * x[1] * (1 - x[2]) \ +# + w2 * x[2] - w3 * x[3]) ** 2) +# bounds = ((0, None), (0., None), (0.5, 0.5), (0, None)) +# res = minimize(func_min, [0.9, 1.7, 0.75, 10], bounds=bounds) +# edit_costs_new = res.x[0:3] +# residual = res.fun + + # method 3: tune c_vir, c_eir and alpha by nonlinear programming using cvxpy. + + +# # method 4: tune c_vir, c_eir and alpha by QP function +# # scipy.optimize.least_squares. An initial guess is required. +# w0 = nb_cost_mat[:,0] + nb_cost_mat[:,1] +# w1 = nb_cost_mat[:,4] + nb_cost_mat[:,5] +# w2 = nb_cost_mat[:,3] +# w3 = dis_k_vec +# func = lambda x: (w0 * x[0] * x[3] + w1 * x[1] * (1 - x[2]) \ +# + w2 * x[2] - w3 * x[3]) ** 2 +# res = optimize.root(func, [0.9, 1.7, 0.75, 100]) +# edit_costs_new = res.x +# residual = None + elif cost == 'LETTER2': +# # 1. if c_vi != c_vr, c_ei != c_er. +# nb_cost_mat_new = nb_cost_mat[:,[0,1,3,4,5]] +# x = cp.Variable(nb_cost_mat_new.shape[1]) +# cost_fun = cp.sum_squares(nb_cost_mat_new * x - dis_k_vec) +## # 1.1 no constraints. +## constraints = [x >= [0.0 for i in range(nb_cost_mat_new.shape[1])]] +# # 1.2 c_vs <= c_vi + c_vr. +# constraints = [x >= [0.0 for i in range(nb_cost_mat_new.shape[1])], +# np.array([1.0, 1.0, -1.0, 0.0, 0.0]).T@x >= 0.0] +## # 2. if c_vi == c_vr, c_ei == c_er. +## nb_cost_mat_new = nb_cost_mat[:,[0,3,4]] +## nb_cost_mat_new[:,0] += nb_cost_mat[:,1] +## nb_cost_mat_new[:,2] += nb_cost_mat[:,5] +## x = cp.Variable(nb_cost_mat_new.shape[1]) +## cost_fun = cp.sum_squares(nb_cost_mat_new * x - dis_k_vec) +## # 2.1 no constraints. +## constraints = [x >= [0.0 for i in range(nb_cost_mat_new.shape[1])]] +### # 2.2 c_vs <= c_vi + c_vr. +### constraints = [x >= [0.0 for i in range(nb_cost_mat_new.shape[1])], +### np.array([2.0, -1.0, 0.0]).T@x >= 0.0] +# +# prob = cp.Problem(cp.Minimize(cost_fun), constraints) +# prob.solve() +# edit_costs_new = [x.value[0], x.value[0], x.value[1], x.value[2], x.value[2]] +# edit_costs_new = np.array(edit_costs_new) +# residual = np.sqrt(prob.value) + if rw_constraints == 'inequality': + # c_vs <= c_vi + c_vr. + nb_cost_mat_new = nb_cost_mat[:,[0,1,3,4,5]] + x = cp.Variable(nb_cost_mat_new.shape[1]) + cost_fun = cp.sum_squares(nb_cost_mat_new * x - dis_k_vec) + constraints = [x >= [0.01 for i in range(nb_cost_mat_new.shape[1])], + np.array([1.0, 1.0, -1.0, 0.0, 0.0]).T@x >= 0.0] + prob = cp.Problem(cp.Minimize(cost_fun), constraints) + prob.solve() + edit_costs_new = x.value + residual = np.sqrt(prob.value) + elif rw_constraints == '2constraints': + # c_vs <= c_vi + c_vr and c_vi == c_vr, c_ei == c_er. + nb_cost_mat_new = nb_cost_mat[:,[0,1,3,4,5]] + x = cp.Variable(nb_cost_mat_new.shape[1]) + cost_fun = cp.sum_squares(nb_cost_mat_new * x - dis_k_vec) + constraints = [x >= [0.01 for i in range(nb_cost_mat_new.shape[1])], + np.array([1.0, 1.0, -1.0, 0.0, 0.0]).T@x >= 0.0, + np.array([1.0, -1.0, 0.0, 0.0, 0.0]).T@x == 0.0, + np.array([0.0, 0.0, 0.0, 1.0, -1.0]).T@x == 0.0] + prob = cp.Problem(cp.Minimize(cost_fun), constraints) + prob.solve() + edit_costs_new = x.value + residual = np.sqrt(prob.value) +# elif method == 'inequality_modified': +# # c_vs <= c_vi + c_vr. +# nb_cost_mat_new = nb_cost_mat[:,[0,1,3,4,5]] +# x = cp.Variable(nb_cost_mat_new.shape[1]) +# cost_fun = cp.sum_squares(nb_cost_mat_new * x - dis_k_vec) +# constraints = [x >= [0.0 for i in range(nb_cost_mat_new.shape[1])], +# np.array([1.0, 1.0, -1.0, 0.0, 0.0]).T@x >= 0.0] +# prob = cp.Problem(cp.Minimize(cost_fun), constraints) +# prob.solve() +# # use same costs for insertion and removal rather than the fitted costs. +# edit_costs_new = [x.value[0], x.value[0], x.value[1], x.value[2], x.value[2]] +# edit_costs_new = np.array(edit_costs_new) +# residual = np.sqrt(prob.value) + + else: # # method 1: simple least square method. # edit_costs_new, residual, _, _ = np.linalg.lstsq(nb_cost_mat, dis_k_vec, # rcond=None) @@ -181,16 +309,16 @@ def update_costs(nb_cost_mat, dis_k_vec): # G = -1 * np.identity(nb_cost_mat.shape[1]) # h = np.array([0 for i in range(nb_cost_mat.shape[1])]) - x = cp.Variable(nb_cost_mat.shape[1]) - cost = cp.sum_squares(nb_cost_mat * x - dis_k_vec) - constraints = [x >= [0.0001 for i in range(nb_cost_mat.shape[1])], -# np.array([1.0, 1.0, -1.0, 0.0, 0.0]).T@x >= 0.0] - np.array([1.0, 1.0, -1.0, 0.0, 0.0, 0.0]).T@x >= 0.0, - np.array([0.0, 0.0, 0.0, 1.0, 1.0, -1.0]).T@x >= 0.0] - prob = cp.Problem(cp.Minimize(cost), constraints) - prob.solve() - edit_costs_new = x.value - residual = np.sqrt(prob.value) + x = cp.Variable(nb_cost_mat.shape[1]) + cost_fun = cp.sum_squares(nb_cost_mat * x - dis_k_vec) + constraints = [x >= [0.0 for i in range(nb_cost_mat.shape[1])], + # np.array([1.0, 1.0, -1.0, 0.0, 0.0]).T@x >= 0.0] + np.array([1.0, 1.0, -1.0, 0.0, 0.0, 0.0]).T@x >= 0.0, + np.array([0.0, 0.0, 0.0, 1.0, 1.0, -1.0]).T@x >= 0.0] + prob = cp.Problem(cp.Minimize(cost_fun), constraints) + prob.solve() + edit_costs_new = x.value + residual = np.sqrt(prob.value) # method 4: diff --git a/preimage/ged.py b/preimage/ged.py index 073fae6..156036f 100644 --- a/preimage/ged.py +++ b/preimage/ged.py @@ -13,33 +13,46 @@ import multiprocessing from multiprocessing import Pool from functools import partial -from gedlibpy_linlin import librariesImport, gedlibpy +#from gedlibpy_linlin import librariesImport, gedlibpy +from libs import * -def GED(g1, g2, lib='gedlibpy', cost='CHEM_1', method='IPFP', +def GED(g1, g2, dataset='monoterpenoides', lib='gedlibpy', cost='CHEM_1', method='IPFP', edit_cost_constant=[], algo_options='', stabilizer='min', repeat=50): """ Compute GED for 2 graphs. """ - def convertGraph(G): + def convertGraph(G, dataset): """Convert a graph to the proper NetworkX format that can be recognized by library gedlibpy. """ G_new = nx.Graph() - for nd, attrs in G.nodes(data=True): - G_new.add_node(str(nd), chem=attrs['atom']) -# G_new.add_node(str(nd), x=str(attrs['attributes'][0]), -# y=str(attrs['attributes'][1])) - for nd1, nd2, attrs in G.edges(data=True): -# G_new.add_edge(str(nd1), str(nd2), valence=attrs['bond_type']) - G_new.add_edge(str(nd1), str(nd2)) + if dataset == 'monoterpenoides': + for nd, attrs in G.nodes(data=True): + G_new.add_node(str(nd), chem=attrs['atom']) + for nd1, nd2, attrs in G.edges(data=True): + G_new.add_edge(str(nd1), str(nd2), valence=attrs['bond_type']) + elif dataset == 'letter': + for nd, attrs in G.nodes(data=True): + G_new.add_node(str(nd), x=str(attrs['attributes'][0]), + y=str(attrs['attributes'][1])) + for nd1, nd2, attrs in G.edges(data=True): + G_new.add_edge(str(nd1), str(nd2)) + else: + for nd, attrs in G.nodes(data=True): + G_new.add_node(str(nd), chem=attrs['atom']) + for nd1, nd2, attrs in G.edges(data=True): + G_new.add_edge(str(nd1), str(nd2), valence=attrs['bond_type']) +# G_new.add_edge(str(nd1), str(nd2)) return G_new - + + + dataset = dataset.lower() if lib == 'gedlibpy': gedlibpy.restart_env() - gedlibpy.add_nx_graph(convertGraph(g1), "") - gedlibpy.add_nx_graph(convertGraph(g2), "") + gedlibpy.add_nx_graph(convertGraph(g1, dataset), "") + gedlibpy.add_nx_graph(convertGraph(g2, dataset), "") listID = gedlibpy.get_all_graph_ids() gedlibpy.set_edit_cost(cost, edit_cost_constant=edit_cost_constant) @@ -320,6 +333,60 @@ def get_nb_edit_operations(g1, g2, forward_map, backward_map): # one of the nodes is removed, thus the edge is removed. if forward_map[idx1] == np.inf or forward_map[idx2] == np.inf: n_er += 1 + # corresponding edge is in g2. + elif (forward_map[idx1], forward_map[idx2]) in g2.edges(): + nb_edges2_cnted += 1 + # edge labels are different. + if g2.edges[((forward_map[idx1], forward_map[idx2]))]['bond_type'] \ + != g1.edges[(n1, n2)]['bond_type']: + n_es += 1 + elif (forward_map[idx2], forward_map[idx1]) in g2.edges(): + nb_edges2_cnted += 1 + # edge labels are different. + if g2.edges[((forward_map[idx2], forward_map[idx1]))]['bond_type'] \ + != g1.edges[(n1, n2)]['bond_type']: + n_es += 1 + # corresponding nodes are in g2, however the edge is removed. + else: + n_er += 1 + n_ei = nx.number_of_edges(g2) - nb_edges2_cnted + + return n_vi, n_vr, n_vs, n_ei, n_er, n_es + + +def get_nb_edit_operations_letter(g1, g2, forward_map, backward_map): + """Compute the number of each edit operations. + """ + n_vi = 0 + n_vr = 0 + n_vs = 0 + sod_vs = 0 + n_ei = 0 + n_er = 0 + + nodes1 = [n for n in g1.nodes()] + for i, map_i in enumerate(forward_map): + if map_i == np.inf: + n_vr += 1 + else: + n_vs += 1 + diff_x = float(g1.nodes[i]['x']) - float(g2.nodes[map_i]['x']) + diff_y = float(g1.nodes[i]['y']) - float(g2.nodes[map_i]['y']) + sod_vs += np.sqrt(np.square(diff_x) + np.square(diff_y)) + for map_i in backward_map: + if map_i == np.inf: + n_vi += 1 + +# idx_nodes1 = range(0, len(node1)) + + edges1 = [e for e in g1.edges()] + nb_edges2_cnted = 0 + for n1, n2 in edges1: + idx1 = nodes1.index(n1) + idx2 = nodes1.index(n2) + # one of the nodes is removed, thus the edge is removed. + if forward_map[idx1] == np.inf or forward_map[idx2] == np.inf: + n_er += 1 # corresponding edge is in g2. Edge label is not considered. elif (forward_map[idx1], forward_map[idx2]) in g2.edges() or \ (forward_map[idx2], forward_map[idx1]) in g2.edges(): @@ -329,4 +396,8 @@ def get_nb_edit_operations(g1, g2, forward_map, backward_map): n_er += 1 n_ei = nx.number_of_edges(g2) - nb_edges2_cnted - return n_vi, n_vr, n_vs, n_ei, n_er, n_es \ No newline at end of file + return n_vi, n_vr, n_vs, sod_vs, n_ei, n_er + + +if __name__ == '__main__': + print('check test_ged.py') \ No newline at end of file diff --git a/preimage/iam.py b/preimage/iam.py index 0a63b98..19b646c 100644 --- a/preimage/iam.py +++ b/preimage/iam.py @@ -436,7 +436,8 @@ def iam_upgraded(Gn_median, Gn_candidate, c_ei=3, c_er=3, c_es=1, ite_max=50, return G_gen_median_list, sod_gen_median, sod_list, G_set_median_list, sod_set_median -def iam_bash(Gn_names, edit_cost_constant, dataset='monoterpenoides', +def iam_bash(Gn_names, edit_cost_constant, cost='CONSTANT', + dataset='monoterpenoides', graph_dir='/media/ljia/DATA/research-repo/codes/Linlin/py-graph/datasets/monoterpenoides/'): """Compute the iam by c++ implementation (gedlib) through bash. """ @@ -467,12 +468,12 @@ def iam_bash(Gn_names, edit_cost_constant, dataset='monoterpenoides', # graph_dir = '/media/ljia/DATA/research-repo/codes/others/gedlib/tests_linlin/generated_datsets/monoterpenoides/gxl' - command = 'GEDLIB_HOME=\'/media/ljia/DATA/research-repo/codes/others/gedlib/gedlib2\'\n' + command = 'GEDLIB_HOME=\'/media/ljia/DATA/research-repo/codes/Linlin/gedlib\'\n' command += 'LD_LIBRARY_PATH=$LD_LIBRARY_PATH:$GEDLIB_HOME/lib\n' command += 'export LD_LIBRARY_PATH\n' command += 'cd \'/media/ljia/DATA/research-repo/codes/others/gedlib/tests_linlin/bin\'\n' command += './iam_for_python_bash ' + dataset + ' ' + fn_collection \ - + ' \'' + graph_dir + '\' ' + + ' \'' + graph_dir + '\' ' + ' ' + cost + ' ' if edit_cost_constant is None: command += 'None' else: @@ -484,7 +485,7 @@ def iam_bash(Gn_names, edit_cost_constant, dataset='monoterpenoides', output = stream.readlines() # print(output) sod_sm = float(output[0].strip()) - sod_gm= float(output[1].strip()) + sod_gm = float(output[1].strip()) fname_sm = '/media/ljia/DATA/research-repo/codes/others/gedlib/tests_linlin/output/tmp_ged/set_median.gxl' fname_gm = '/media/ljia/DATA/research-repo/codes/others/gedlib/tests_linlin/output/tmp_ged/gen_median.gxl' diff --git a/preimage/test_k_closest_graphs.py b/preimage/test_k_closest_graphs.py index 8d7d27a..39301aa 100644 --- a/preimage/test_k_closest_graphs.py +++ b/preimage/test_k_closest_graphs.py @@ -31,8 +31,9 @@ from fitDistance import fit_GED_to_kernel_distance def median_on_k_closest_graphs(Gn, node_label, edge_label, gkernel, k, fit_method, graph_dir='/media/ljia/DATA/research-repo/codes/Linlin/py-graph/datasets/monoterpenoides/', edit_costs=None, group_min=None, dataset='monoterpenoides', - parallel=True): - + cost='CONSTANT', parallel=True): + dataset = dataset.lower() + # # compute distances in kernel space. # dis_mat, _, _, _ = kernel_distance_matrix(Gn, node_label, edge_label, # Kmatrix=None, gkernel=gkernel) @@ -50,32 +51,53 @@ def median_on_k_closest_graphs(Gn, node_label, edge_label, gkernel, k, fit_metho # group_min = (12, 13, 22, 29) # closest w.r.t path kernel # group_min = (77, 85, 160, 171) # closest w.r.t ged # group_min = (0,1,2,3,4,5,6,7,8,9,10,11) # closest w.r.t treelet kernel - Gn_median = [Gn[g].copy() for g in group_min] # fit edit costs. if fit_method == 'random': # random - edit_cost_constant = random.sample(range(1, 10), 6) + if cost == 'LETTER': + edit_cost_constant = random.sample(range(1, 10), 3) + edit_cost_constant = [item * 0.1 for item in edit_cost_constant] + elif cost == 'LETTER2': + random.seed(time.time()) + edit_cost_constant = random.sample(range(1, 10), 5) +# edit_cost_constant = [item * 0.1 for item in edit_cost_constant] + else: + edit_cost_constant = random.sample(range(1, 10), 6) print('edit costs used:', edit_cost_constant) elif fit_method == 'expert': # expert edit_cost_constant = [3, 3, 1, 3, 3, 1] elif fit_method == 'k-graphs': itr_max = 6 - algo_options = '--threads 8 --initial-solutions 40 --ratio-runs-from-initial-solutions 1' - params_ged = {'lib': 'gedlibpy', 'cost': 'CONSTANT', 'method': 'IPFP', - 'algo_options': algo_options, 'stabilizer': None} + if cost == 'LETTER': + init_costs = [0.9, 1.7, 0.75] + elif cost == 'LETTER2': + init_costs = [0.675, 0.675, 0.75, 0.425, 0.425] + else: + init_costs = [3, 3, 1, 3, 3, 1] + algo_options = '--threads 1 --initial-solutions 40 --ratio-runs-from-initial-solutions 1' + params_ged = {'lib': 'gedlibpy', 'cost': cost, 'method': 'IPFP', + 'algo_options': algo_options, 'stabilizer': None} # fit on k-graph subset edit_cost_constant, _, _, _, _, _, _ = fit_GED_to_kernel_distance(Gn_median, - node_label, edge_label, gkernel, itr_max, params_ged=params_ged, parallel=True) + node_label, edge_label, gkernel, itr_max, params_ged=params_ged, + init_costs=init_costs, dataset=dataset, parallel=True) elif fit_method == 'whole-dataset': itr_max = 6 - algo_options = '--threads 8 --initial-solutions 40 --ratio-runs-from-initial-solutions 1' - params_ged = {'lib': 'gedlibpy', 'cost': 'CONSTANT', 'method': 'IPFP', + if cost == 'LETTER': + init_costs = [0.9, 1.7, 0.75] + elif cost == 'LETTER2': + init_costs = [0.675, 0.675, 0.75, 0.425, 0.425] + else: + init_costs = [3, 3, 1, 3, 3, 1] + algo_options = '--threads 1 --initial-solutions 40 --ratio-runs-from-initial-solutions 1' + params_ged = {'lib': 'gedlibpy', 'cost': cost, 'method': 'IPFP', 'algo_options': algo_options, 'stabilizer': None} # fit on all subset edit_cost_constant, _, _, _, _, _, _ = fit_GED_to_kernel_distance(Gn, - node_label, edge_label, gkernel, itr_max, params_ged=params_ged, parallel=True) + node_label, edge_label, gkernel, itr_max, params_ged=params_ged, + init_costs=init_costs, dataset=dataset, parallel=True) elif fit_method == 'precomputed': edit_cost_constant = edit_costs @@ -83,14 +105,17 @@ def median_on_k_closest_graphs(Gn, node_label, edge_label, gkernel, k, fit_metho # compute set median and gen median using IAM (C++ through bash). group_fnames = [Gn[g].graph['filename'] for g in group_min] sod_sm, sod_gm, fname_sm, fname_gm = iam_bash(group_fnames, edit_cost_constant, - graph_dir=graph_dir, dataset=dataset) + cost=cost, graph_dir=graph_dir, + dataset=dataset) # compute distances in kernel space. Gn_median = [Gn[g].copy() for g in group_min] set_median = loadGXL(fname_sm) gen_median = loadGXL(fname_gm) - if dataset == 'Letter': +# print(gen_median.nodes(data=True)) +# print(gen_median.edges(data=True)) + if dataset == 'letter': for g in Gn_median: reform_attributes(g) reform_attributes(set_median) @@ -98,16 +123,19 @@ def median_on_k_closest_graphs(Gn, node_label, edge_label, gkernel, k, fit_metho # compute distance in kernel space for set median. Kmatrix_sm = compute_kernel([set_median] + Gn_median, gkernel, - None if dataset == 'Letter' else 'chem', - None if dataset == 'Letter' else 'valence', + None if dataset == 'letter' else 'chem', + None if dataset == 'letter' else 'valence', False) dis_k_sm = dis_gstar(0, range(1, 1+len(Gn_median)), [1 / len(Gn_median)] * len(Gn_median), Kmatrix_sm, withterm3=False) - +# print(gen_median.nodes(data=True)) +# print(gen_median.edges(data=True)) +# print(set_median.nodes(data=True)) +# print(set_median.edges(data=True)) # compute distance in kernel space for generalized median. Kmatrix_gm = compute_kernel([gen_median] + Gn_median, gkernel, - None if dataset == 'Letter' else 'chem', - None if dataset == 'Letter' else 'valence', + None if dataset == 'letter' else 'chem', + None if dataset == 'letter' else 'valence', False) dis_k_gm = dis_gstar(0, range(1, 1+len(Gn_median)), [1 / len(Gn_median)] * len(Gn_median), Kmatrix_gm, withterm3=False) diff --git a/preimage/utils.py b/preimage/utils.py index 51d4edf..ed6959e 100644 --- a/preimage/utils.py +++ b/preimage/utils.py @@ -61,8 +61,8 @@ def compute_kernel(Gn, graph_kernel, node_label, edge_label, verbose): {'symb': deltakernel, 'nsymb': gaussiankernel, 'mix': mixkernel}, n_jobs=multiprocessing.cpu_count(), verbose=verbose) elif graph_kernel == 'treeletkernel': -# pkernel = functools.partial(polynomialkernel, d=2, c=1e5) - pkernel = functools.partial(gaussiankernel, gamma=1e-6) + pkernel = functools.partial(polynomialkernel, d=2, c=1e5) +# pkernel = functools.partial(gaussiankernel, gamma=1e-6) mixkernel = functools.partial(kernelproduct, deltakernel, gaussiankernel) Kmatrix, _ = treeletkernel(Gn, node_label=node_label, edge_label=edge_label, sub_kernel=pkernel, diff --git a/preimage/xp_letter_h.py b/preimage/xp_letter_h.py index 71496e4..4a707af 100644 --- a/preimage/xp_letter_h.py +++ b/preimage/xp_letter_h.py @@ -19,11 +19,13 @@ from preimage.test_k_closest_graphs import median_on_k_closest_graphs, reform_at from preimage.utils import get_same_item_indices from preimage.find_best_k import getRelations -def xp_letter_h(): - ds = {'name': 'Letter-high', - 'dataset': '/media/ljia/DATA/research-repo/codes/others/gedlib/tests_linlin/data/collections/Letter.xml', + +def xp_letter_h_LETTER2_cost(): + ds = {'dataset': '/media/ljia/DATA/research-repo/codes/others/gedlib/tests_linlin/data/collections/Letter.xml', 'graph_dir': '/media/ljia/DATA/research-repo/codes/others/gedlib/tests_linlin/data/datasets/Letter/HIGH/'} # node/edge symb Gn, y_all = loadDataset(ds['dataset'], extra_params=ds['graph_dir']) + for G in Gn: + reform_attributes(G) # ds = {'name': 'Letter-high', # 'dataset': '../datasets/Letter-high/Letter-high_A.txt'} # node/edge symb # Gn, y_all = loadDataset(ds['dataset']) @@ -33,32 +35,35 @@ def xp_letter_h(): edge_label = None ds_name = 'letter-h' dir_output = 'results/xp_letter_h/' + save_results = True + cost = 'LETTER2' repeats = 1 # k_list = range(2, 11) k_list = [150] - fit_method = 'precomputed' + fit_method = 'k-graphs' # get indices by classes. y_idx = get_same_item_indices(y_all) - # create result files. - fn_output_detail = 'results_detail.' + fit_method + '.csv' - f_detail = open(dir_output + fn_output_detail, 'a') - csv.writer(f_detail).writerow(['dataset', 'graph kernel', 'fit method', 'k', - 'target', 'repeat', 'SOD SM', 'SOD GM', 'dis_k SM', 'dis_k GM', - 'min dis_k gi', 'SOD SM -> GM', 'dis_k SM -> GM', 'dis_k gi -> SM', - 'dis_k gi -> GM', 'median set']) - f_detail.close() - fn_output_summary = 'results_summary.csv' - f_summary = open(dir_output + fn_output_summary, 'a') - csv.writer(f_summary).writerow(['dataset', 'graph kernel', 'fit method', 'k', - 'target', 'SOD SM', 'SOD GM', 'dis_k SM', 'dis_k GM', - 'min dis_k gi', 'SOD SM -> GM', 'dis_k SM -> GM', 'dis_k gi -> SM', - 'dis_k gi -> GM', '# SOD SM -> GM', '# dis_k SM -> GM', - '# dis_k gi -> SM', '# dis_k gi -> GM', 'repeats better SOD SM -> GM', - 'repeats better dis_k SM -> GM', 'repeats better dis_k gi -> SM', - 'repeats better dis_k gi -> GM']) - f_summary.close() + if save_results: + # create result files. + fn_output_detail = 'results_detail.' + ds_name + '.' + gkernel + '.' + fit_method + '.csv' + f_detail = open(dir_output + fn_output_detail, 'a') + csv.writer(f_detail).writerow(['dataset', 'graph kernel', 'fit method', 'k', + 'target', 'repeat', 'SOD SM', 'SOD GM', 'dis_k SM', 'dis_k GM', + 'min dis_k gi', 'SOD SM -> GM', 'dis_k SM -> GM', 'dis_k gi -> SM', + 'dis_k gi -> GM', 'median set']) + f_detail.close() + fn_output_summary = 'results_summary.' + ds_name + '.' + gkernel + '.' + fit_method + '.csv' + f_summary = open(dir_output + fn_output_summary, 'a') + csv.writer(f_summary).writerow(['dataset', 'graph kernel', 'fit method', 'k', + 'target', 'SOD SM', 'SOD GM', 'dis_k SM', 'dis_k GM', + 'min dis_k gi', 'SOD SM -> GM', 'dis_k SM -> GM', 'dis_k gi -> SM', + 'dis_k gi -> GM', '# SOD SM -> GM', '# dis_k SM -> GM', + '# dis_k gi -> SM', '# dis_k gi -> GM', 'repeats better SOD SM -> GM', + 'repeats better dis_k SM -> GM', 'repeats better dis_k gi -> SM', + 'repeats better dis_k gi -> GM']) + f_summary.close() random.seed(1) rdn_seed_list = random.sample(range(0, repeats * 100), repeats) @@ -82,11 +87,11 @@ def xp_letter_h(): for i, (y, values) in enumerate(y_idx.items()): print('\ny =', y) -# y = 'I' +# y = 'F' # values = y_idx[y] +# values = values[0:10] -# k = len(values) -# k = kkk + k = len(values) sod_sm_list = [] sod_gm_list = [] @@ -114,20 +119,21 @@ def xp_letter_h(): = median_on_k_closest_graphs(Gn_median, node_label, edge_label, gkernel, k, fit_method=fit_method, graph_dir=ds['graph_dir'], edit_costs=None, group_min=median_set_idx_idx, - dataset='Letter', parallel=False) + dataset='Letter', cost=cost, parallel=False) # write result detail. sod_sm2gm = getRelations(np.sign(sod_gm - sod_sm)) dis_k_sm2gm = getRelations(np.sign(dis_k_gm - dis_k_sm)) dis_k_gi2sm = getRelations(np.sign(dis_k_sm - dis_k_gi_min)) dis_k_gi2gm = getRelations(np.sign(dis_k_gm - dis_k_gi_min)) - f_detail = open(dir_output + fn_output_detail, 'a') - csv.writer(f_detail).writerow([ds_name, gkernel, fit_method, k, - y, repeat, - sod_sm, sod_gm, dis_k_sm, dis_k_gm, - dis_k_gi_min, sod_sm2gm, dis_k_sm2gm, dis_k_gi2sm, - dis_k_gi2gm, median_set_idx]) - f_detail.close() + if save_results: + f_detail = open(dir_output + fn_output_detail, 'a') + csv.writer(f_detail).writerow([ds_name, gkernel, fit_method, k, + y, repeat, + sod_sm, sod_gm, dis_k_sm, dis_k_gm, + dis_k_gi_min, sod_sm2gm, dis_k_sm2gm, dis_k_gi2sm, + dis_k_gi2gm, median_set_idx]) + f_detail.close() # compute result summary. sod_sm_list.append(sod_sm) @@ -170,14 +176,17 @@ def xp_letter_h(): # save median graphs. fname_sm = '/media/ljia/DATA/research-repo/codes/others/gedlib/tests_linlin/output/tmp_ged/set_median.gxl' - fn_pre_sm_new = dir_output + 'medians/set_median.k' + str(int(k)) + '.y' + y + '.repeat' + str(repeat) + fn_pre_sm_new = dir_output + 'medians/set_median.' + fit_method \ + + '.k' + str(int(k)) + '.y' + y + '.repeat' + str(repeat) copyfile(fname_sm, fn_pre_sm_new + '.gxl') fname_gm = '/media/ljia/DATA/research-repo/codes/others/gedlib/tests_linlin/output/tmp_ged/gen_median.gxl' - fn_pre_gm_new = dir_output + 'medians/gen_median.k' + str(int(k)) + '.y' + y + '.repeat' + str(repeat) + fn_pre_gm_new = dir_output + 'medians/gen_median.' + fit_method \ + + '.k' + str(int(k)) + '.y' + y + '.repeat' + str(repeat) copyfile(fname_gm, fn_pre_gm_new + '.gxl') G_best_kernel = Gn_median[idx_dis_k_gi_min].copy() reform_attributes(G_best_kernel) - fn_pre_g_best_kernel = dir_output + 'medians/g_best_kernel.k' + str(int(k)) + '.y' + y + '.repeat' + str(repeat) + fn_pre_g_best_kernel = dir_output + 'medians/g_best_kernel.' + fit_method \ + + '.k' + str(int(k)) + '.y' + y + '.repeat' + str(repeat) saveGXL(G_best_kernel, fn_pre_g_best_kernel + '.gxl', method='gedlib-letter') # plot median graphs. @@ -197,16 +206,17 @@ def xp_letter_h(): dis_k_sm2gm_mean = getRelations(np.sign(dis_k_gm_mean_list[-1] - dis_k_sm_mean_list[-1])) dis_k_gi2sm_mean = getRelations(np.sign(dis_k_sm_mean_list[-1] - dis_k_gi_min_mean_list[-1])) dis_k_gi2gm_mean = getRelations(np.sign(dis_k_gm_mean_list[-1] - dis_k_gi_min_mean_list[-1])) - f_summary = open(dir_output + fn_output_summary, 'a') - csv.writer(f_summary).writerow([ds_name, gkernel, fit_method, k, y, - sod_sm_mean_list[-1], sod_gm_mean_list[-1], - dis_k_sm_mean_list[-1], dis_k_gm_mean_list[-1], - dis_k_gi_min_mean_list[-1], sod_sm2gm_mean, dis_k_sm2gm_mean, - dis_k_gi2sm_mean, dis_k_gi2gm_mean, nb_sod_sm2gm, - nb_dis_k_sm2gm, nb_dis_k_gi2sm, nb_dis_k_gi2gm, - repeats_better_sod_sm2gm, repeats_better_dis_k_sm2gm, - repeats_better_dis_k_gi2sm, repeats_better_dis_k_gi2gm]) - f_summary.close() + if save_results: + f_summary = open(dir_output + fn_output_summary, 'a') + csv.writer(f_summary).writerow([ds_name, gkernel, fit_method, k, y, + sod_sm_mean_list[-1], sod_gm_mean_list[-1], + dis_k_sm_mean_list[-1], dis_k_gm_mean_list[-1], + dis_k_gi_min_mean_list[-1], sod_sm2gm_mean, dis_k_sm2gm_mean, + dis_k_gi2sm_mean, dis_k_gi2gm_mean, nb_sod_sm2gm, + nb_dis_k_sm2gm, nb_dis_k_gi2sm, nb_dis_k_gi2gm, + repeats_better_sod_sm2gm, repeats_better_dis_k_sm2gm, + repeats_better_dis_k_gi2sm, repeats_better_dis_k_gi2gm]) + f_summary.close() # write result summary for each letter. @@ -219,13 +229,232 @@ def xp_letter_h(): dis_k_sm2gm_mean = getRelations(np.sign(dis_k_gm_mean - dis_k_sm_mean)) dis_k_gi2sm_mean = getRelations(np.sign(dis_k_sm_mean - dis_k_gi_min_mean)) dis_k_gi2gm_mean = getRelations(np.sign(dis_k_gm_mean - dis_k_gi_min_mean)) + if save_results: + f_summary = open(dir_output + fn_output_summary, 'a') + csv.writer(f_summary).writerow([ds_name, gkernel, fit_method, k, 'all', + sod_sm_mean, sod_gm_mean, dis_k_sm_mean, dis_k_gm_mean, + dis_k_gi_min_mean, sod_sm2gm_mean, dis_k_sm2gm_mean, + dis_k_gi2sm_mean, dis_k_gi2gm_mean]) + f_summary.close() + + print('\ncomplete.') + + +def xp_letter_h(): + ds = {'dataset': '/media/ljia/DATA/research-repo/codes/others/gedlib/tests_linlin/data/collections/Letter.xml', + 'graph_dir': '/media/ljia/DATA/research-repo/codes/others/gedlib/tests_linlin/data/datasets/Letter/HIGH/'} # node/edge symb + Gn, y_all = loadDataset(ds['dataset'], extra_params=ds['graph_dir']) + for G in Gn: + reform_attributes(G) +# ds = {'name': 'Letter-high', +# 'dataset': '../datasets/Letter-high/Letter-high_A.txt'} # node/edge symb +# Gn, y_all = loadDataset(ds['dataset']) +# Gn = Gn[0:50] + gkernel = 'structuralspkernel' + node_label = None + edge_label = None + ds_name = 'letter-h' + dir_output = 'results/xp_letter_h/' + save_results = False + + repeats = 1 +# k_list = range(2, 11) + k_list = [150] + fit_method = 'k-graphs' + # get indices by classes. + y_idx = get_same_item_indices(y_all) + + if save_results: + # create result files. + fn_output_detail = 'results_detail.' + ds_name + '.' + gkernel + '.' + fit_method + '.csv' + f_detail = open(dir_output + fn_output_detail, 'a') + csv.writer(f_detail).writerow(['dataset', 'graph kernel', 'fit method', 'k', + 'target', 'repeat', 'SOD SM', 'SOD GM', 'dis_k SM', 'dis_k GM', + 'min dis_k gi', 'SOD SM -> GM', 'dis_k SM -> GM', 'dis_k gi -> SM', + 'dis_k gi -> GM', 'median set']) + f_detail.close() + fn_output_summary = 'results_summary.' + ds_name + '.' + gkernel + '.' + fit_method + '.csv' f_summary = open(dir_output + fn_output_summary, 'a') - csv.writer(f_summary).writerow([ds_name, gkernel, fit_method, k, 'all', - sod_sm_mean, sod_gm_mean, dis_k_sm_mean, dis_k_gm_mean, - dis_k_gi_min_mean, sod_sm2gm_mean, dis_k_sm2gm_mean, - dis_k_gi2sm_mean, dis_k_gi2gm_mean]) + csv.writer(f_summary).writerow(['dataset', 'graph kernel', 'fit method', 'k', + 'target', 'SOD SM', 'SOD GM', 'dis_k SM', 'dis_k GM', + 'min dis_k gi', 'SOD SM -> GM', 'dis_k SM -> GM', 'dis_k gi -> SM', + 'dis_k gi -> GM', '# SOD SM -> GM', '# dis_k SM -> GM', + '# dis_k gi -> SM', '# dis_k gi -> GM', 'repeats better SOD SM -> GM', + 'repeats better dis_k SM -> GM', 'repeats better dis_k gi -> SM', + 'repeats better dis_k gi -> GM']) f_summary.close() + + random.seed(1) + rdn_seed_list = random.sample(range(0, repeats * 100), repeats) + + for k in k_list: + print('\n--------- k =', k, '----------') + + sod_sm_mean_list = [] + sod_gm_mean_list = [] + dis_k_sm_mean_list = [] + dis_k_gm_mean_list = [] + dis_k_gi_min_mean_list = [] +# nb_sod_sm2gm = [0, 0, 0] +# nb_dis_k_sm2gm = [0, 0, 0] +# nb_dis_k_gi2sm = [0, 0, 0] +# nb_dis_k_gi2gm = [0, 0, 0] +# repeats_better_sod_sm2gm = [] +# repeats_better_dis_k_sm2gm = [] +# repeats_better_dis_k_gi2sm = [] +# repeats_better_dis_k_gi2gm = [] + + for i, (y, values) in enumerate(y_idx.items()): + print('\ny =', y) +# y = 'N' +# values = y_idx[y] +# values = values[0:10] + + k = len(values) + + sod_sm_list = [] + sod_gm_list = [] + dis_k_sm_list = [] + dis_k_gm_list = [] + dis_k_gi_min_list = [] + nb_sod_sm2gm = [0, 0, 0] + nb_dis_k_sm2gm = [0, 0, 0] + nb_dis_k_gi2sm = [0, 0, 0] + nb_dis_k_gi2gm = [0, 0, 0] + repeats_better_sod_sm2gm = [] + repeats_better_dis_k_sm2gm = [] + repeats_better_dis_k_gi2sm = [] + repeats_better_dis_k_gi2gm = [] + + for repeat in range(repeats): + print('\nrepeat =', repeat) + random.seed(rdn_seed_list[repeat]) + median_set_idx_idx = random.sample(range(0, len(values)), k) + median_set_idx = [values[idx] for idx in median_set_idx_idx] + print('median set: ', median_set_idx) + Gn_median = [Gn[g] for g in values] + + sod_sm, sod_gm, dis_k_sm, dis_k_gm, dis_k_gi, dis_k_gi_min, idx_dis_k_gi_min \ + = median_on_k_closest_graphs(Gn_median, node_label, edge_label, + gkernel, k, fit_method=fit_method, graph_dir=ds['graph_dir'], + edit_costs=None, group_min=median_set_idx_idx, + dataset='Letter', parallel=False) + + # write result detail. + sod_sm2gm = getRelations(np.sign(sod_gm - sod_sm)) + dis_k_sm2gm = getRelations(np.sign(dis_k_gm - dis_k_sm)) + dis_k_gi2sm = getRelations(np.sign(dis_k_sm - dis_k_gi_min)) + dis_k_gi2gm = getRelations(np.sign(dis_k_gm - dis_k_gi_min)) + if save_results: + f_detail = open(dir_output + fn_output_detail, 'a') + csv.writer(f_detail).writerow([ds_name, gkernel, fit_method, k, + y, repeat, + sod_sm, sod_gm, dis_k_sm, dis_k_gm, + dis_k_gi_min, sod_sm2gm, dis_k_sm2gm, dis_k_gi2sm, + dis_k_gi2gm, median_set_idx]) + f_detail.close() + + # compute result summary. + sod_sm_list.append(sod_sm) + sod_gm_list.append(sod_gm) + dis_k_sm_list.append(dis_k_sm) + dis_k_gm_list.append(dis_k_gm) + dis_k_gi_min_list.append(dis_k_gi_min) + # # SOD SM -> GM + if sod_sm > sod_gm: + nb_sod_sm2gm[0] += 1 + repeats_better_sod_sm2gm.append(repeat) + elif sod_sm == sod_gm: + nb_sod_sm2gm[1] += 1 + elif sod_sm < sod_gm: + nb_sod_sm2gm[2] += 1 + # # dis_k SM -> GM + if dis_k_sm > dis_k_gm: + nb_dis_k_sm2gm[0] += 1 + repeats_better_dis_k_sm2gm.append(repeat) + elif dis_k_sm == dis_k_gm: + nb_dis_k_sm2gm[1] += 1 + elif dis_k_sm < dis_k_gm: + nb_dis_k_sm2gm[2] += 1 + # # dis_k gi -> SM + if dis_k_gi_min > dis_k_sm: + nb_dis_k_gi2sm[0] += 1 + repeats_better_dis_k_gi2sm.append(repeat) + elif dis_k_gi_min == dis_k_sm: + nb_dis_k_gi2sm[1] += 1 + elif dis_k_gi_min < dis_k_sm: + nb_dis_k_gi2sm[2] += 1 + # # dis_k gi -> GM + if dis_k_gi_min > dis_k_gm: + nb_dis_k_gi2gm[0] += 1 + repeats_better_dis_k_gi2gm.append(repeat) + elif dis_k_gi_min == dis_k_gm: + nb_dis_k_gi2gm[1] += 1 + elif dis_k_gi_min < dis_k_gm: + nb_dis_k_gi2gm[2] += 1 + + # save median graphs. + fname_sm = '/media/ljia/DATA/research-repo/codes/others/gedlib/tests_linlin/output/tmp_ged/set_median.gxl' + fn_pre_sm_new = dir_output + 'medians/set_median.' + fit_method \ + + '.k' + str(int(k)) + '.y' + y + '.repeat' + str(repeat) + copyfile(fname_sm, fn_pre_sm_new + '.gxl') + fname_gm = '/media/ljia/DATA/research-repo/codes/others/gedlib/tests_linlin/output/tmp_ged/gen_median.gxl' + fn_pre_gm_new = dir_output + 'medians/gen_median.' + fit_method \ + + '.k' + str(int(k)) + '.y' + y + '.repeat' + str(repeat) + copyfile(fname_gm, fn_pre_gm_new + '.gxl') + G_best_kernel = Gn_median[idx_dis_k_gi_min].copy() + reform_attributes(G_best_kernel) + fn_pre_g_best_kernel = dir_output + 'medians/g_best_kernel.' + fit_method \ + + '.k' + str(int(k)) + '.y' + y + '.repeat' + str(repeat) + saveGXL(G_best_kernel, fn_pre_g_best_kernel + '.gxl', method='gedlib-letter') + + # plot median graphs. + set_median = loadGXL(fn_pre_sm_new + '.gxl') + gen_median = loadGXL(fn_pre_gm_new + '.gxl') + draw_Letter_graph(set_median, fn_pre_sm_new) + draw_Letter_graph(gen_median, fn_pre_gm_new) + draw_Letter_graph(G_best_kernel, fn_pre_g_best_kernel) + + # write result summary for each letter. + sod_sm_mean_list.append(np.mean(sod_sm_list)) + sod_gm_mean_list.append(np.mean(sod_gm_list)) + dis_k_sm_mean_list.append(np.mean(dis_k_sm_list)) + dis_k_gm_mean_list.append(np.mean(dis_k_gm_list)) + dis_k_gi_min_mean_list.append(np.mean(dis_k_gi_min_list)) + sod_sm2gm_mean = getRelations(np.sign(sod_gm_mean_list[-1] - sod_sm_mean_list[-1])) + dis_k_sm2gm_mean = getRelations(np.sign(dis_k_gm_mean_list[-1] - dis_k_sm_mean_list[-1])) + dis_k_gi2sm_mean = getRelations(np.sign(dis_k_sm_mean_list[-1] - dis_k_gi_min_mean_list[-1])) + dis_k_gi2gm_mean = getRelations(np.sign(dis_k_gm_mean_list[-1] - dis_k_gi_min_mean_list[-1])) + if save_results: + f_summary = open(dir_output + fn_output_summary, 'a') + csv.writer(f_summary).writerow([ds_name, gkernel, fit_method, k, y, + sod_sm_mean_list[-1], sod_gm_mean_list[-1], + dis_k_sm_mean_list[-1], dis_k_gm_mean_list[-1], + dis_k_gi_min_mean_list[-1], sod_sm2gm_mean, dis_k_sm2gm_mean, + dis_k_gi2sm_mean, dis_k_gi2gm_mean, nb_sod_sm2gm, + nb_dis_k_sm2gm, nb_dis_k_gi2sm, nb_dis_k_gi2gm, + repeats_better_sod_sm2gm, repeats_better_dis_k_sm2gm, + repeats_better_dis_k_gi2sm, repeats_better_dis_k_gi2gm]) + f_summary.close() + + # write result summary for each letter. + sod_sm_mean = np.mean(sod_sm_mean_list) + sod_gm_mean = np.mean(sod_gm_mean_list) + dis_k_sm_mean = np.mean(dis_k_sm_mean_list) + dis_k_gm_mean = np.mean(dis_k_gm_mean_list) + dis_k_gi_min_mean = np.mean(dis_k_gi_min_list) + sod_sm2gm_mean = getRelations(np.sign(sod_gm_mean - sod_sm_mean)) + dis_k_sm2gm_mean = getRelations(np.sign(dis_k_gm_mean - dis_k_sm_mean)) + dis_k_gi2sm_mean = getRelations(np.sign(dis_k_sm_mean - dis_k_gi_min_mean)) + dis_k_gi2gm_mean = getRelations(np.sign(dis_k_gm_mean - dis_k_gi_min_mean)) + if save_results: + f_summary = open(dir_output + fn_output_summary, 'a') + csv.writer(f_summary).writerow([ds_name, gkernel, fit_method, k, 'all', + sod_sm_mean, sod_gm_mean, dis_k_sm_mean, dis_k_gm_mean, + dis_k_gi_min_mean, sod_sm2gm_mean, dis_k_sm2gm_mean, + dis_k_gi2sm_mean, dis_k_gi2gm_mean]) + f_summary.close() print('\ncomplete.') @@ -243,4 +472,5 @@ def draw_Letter_graph(graph, file_prefix): if __name__ == "__main__": - xp_letter_h() \ No newline at end of file +# xp_letter_h() + xp_letter_h_LETTER2_cost() \ No newline at end of file