|
- #!/usr/bin/env python3
- # -*- coding: utf-8 -*-
- """
- Created on Thu Oct 24 11:50:56 2019
-
- @author: ljia
- """
- from matplotlib import pyplot as plt
- import numpy as np
- from tqdm import tqdm
-
- from gklearn.utils.graphfiles import loadDataset
- from gklearn.preimage.utils import remove_edges
- from gklearn.preimage.fitDistance import fit_GED_to_kernel_distance
- from gklearn.preimage.utils import normalize_distance_matrix
-
-
- def test_update_costs():
- from preimage.fitDistance import update_costs
- import cvxpy as cp
-
- ds = np.load('results/xp_fit_method/fit_data_debug4.gm.npz')
- nb_cost_mat = ds['nb_cost_mat']
- dis_k_vec = ds['dis_k_vec']
- n_edit_operations = ds['n_edit_operations']
- ged_vec_init = ds['ged_vec_init']
- ged_mat = ds['ged_mat']
-
- nb_cost_mat_new = nb_cost_mat[:,[2,3,4]]
- x = cp.Variable(nb_cost_mat_new.shape[1])
- cost_fun = cp.sum_squares(nb_cost_mat_new * x - dis_k_vec)
- # constraints = [x >= [0.000 for i in range(nb_cost_mat_new.shape[1])],
- # np.array([1.0, 1.0, -1.0, 0.0, 0.0]).T@x >= 0.0]
- # constraints = [x >= [0.000 for i in range(nb_cost_mat_new.shape[1])],
- # np.array([1.0, 1.0, -1.0, 0.0, 0.0]).T@x >= 0.0,
- # np.array([0.0, 0.0, 0.0, 1.0, -1.0]).T@x == 0.0]
- constraints = [x >= [0.00 for i in range(nb_cost_mat_new.shape[1])],
- np.array([0.0, 1.0, -1.0]).T@x == 0.0]
- # constraints = [x >= [0.00000 for i in range(nb_cost_mat_new.shape[1])]]
- prob = cp.Problem(cp.Minimize(cost_fun), constraints)
- prob.solve()
- print(x.value)
- edit_costs_new = np.concatenate((x.value, np.array([0.0])))
- residual = np.sqrt(prob.value)
-
-
- def median_paper_clcpc_python_best():
- """c_vs <= c_vi + c_vr, c_es <= c_ei + c_er with ged computation with
- python invoking the c++ code by bash command (with updated library).
- """
- # ds = {'name': 'monoterpenoides',
- # 'dataset': '../datasets/monoterpenoides/dataset_10+.ds'} # node/edge symb
- # _, y_all = loadDataset(ds['dataset'])
- gkernel = 'untilhpathkernel'
- node_label = 'atom'
- edge_label = 'bond_type'
- itr_max = 6
- algo_options = '--threads 8 --initial-solutions 40 --ratio-runs-from-initial-solutions 1'
- params_ged = {'lib': 'gedlibpy', 'cost': 'CONSTANT', 'method': 'IPFP',
- 'algo_options': algo_options, 'stabilizer': None}
-
- y_all = ['3', '1', '4', '6', '7', '8', '9', '2']
- repeats = 50
- collection_path = os.path.dirname(os.path.realpath(__file__)) + '/cpp_ext/generated_datsets/monoterpenoides/'
- graph_dir = collection_path + 'gxl/'
-
- fn_edit_costs_output = 'results/median_paper/edit_costs_output.python_init40.k10.txt'
-
- for y in y_all:
- for repeat in range(repeats):
- edit_costs_output_file = open(fn_edit_costs_output, 'a')
- collection_file = collection_path + 'monoterpenoides_' + y + '_' + str(repeat) + '.xml'
- Gn, _ = loadDataset(collection_file, extra_params=graph_dir)
- edit_costs, residual_list, edit_cost_list, dis_k_mat, ged_mat, time_list, \
- nb_cost_mat_list = fit_GED_to_kernel_distance(Gn, node_label, edge_label,
- gkernel, itr_max, params_ged=params_ged,
- parallel=True)
- total_time = np.sum(time_list)
- # print('\nedit_costs:', edit_costs)
- # print('\nresidual_list:', residual_list)
- # print('\nedit_cost_list:', edit_cost_list)
- # print('\ndistance matrix in kernel space:', dis_k_mat)
- # print('\nged matrix:', ged_mat)
- # print('\ntotal time:', total_time)
- # print('\nnb_cost_mat:', nb_cost_mat_list[-1])
- np.savez('results/median_paper/fit_distance.clcpc.python_init40.monot.elabeled.uhpkernel.y'
- + y + '.repeat' + str(repeat) + '.k10..gm',
- edit_costs=edit_costs,
- residual_list=residual_list, edit_cost_list=edit_cost_list,
- dis_k_mat=dis_k_mat, ged_mat=ged_mat, time_list=time_list,
- total_time=total_time, nb_cost_mat_list=nb_cost_mat_list)
-
- for ec in edit_costs:
- edit_costs_output_file.write(str(ec) + ' ')
- edit_costs_output_file.write('\n')
- edit_costs_output_file.close()
-
-
- # # normalized distance matrices.
- # gmfile = np.load('results/fit_distance.cs_leq_ci_plus_cr.cost_leq_1en2.monot.elabeled.uhpkernel.gm.npz')
- # edit_costs = gmfile['edit_costs']
- # residual_list = gmfile['residual_list']
- # edit_cost_list = gmfile['edit_cost_list']
- # dis_k_mat = gmfile['dis_k_mat']
- # ged_mat = gmfile['ged_mat']
- # total_time = gmfile['total_time']
- # nb_cost_mat_list = gmfile['nb_cost_mat_list']
-
- nb_consistent, nb_inconsistent, ratio_consistent = pairwise_substitution_consistence(dis_k_mat, ged_mat)
- print(nb_consistent, nb_inconsistent, ratio_consistent)
-
- # norm_dis_k_mat = normalize_distance_matrix(dis_k_mat)
- # plt.imshow(norm_dis_k_mat)
- # plt.colorbar()
- # plt.savefig('results/median_paper/norm_dis_k_mat.clcpc.python_best.monot.elabeled.uhpkernel.y'
- # + y + '.repeat' + str(repeat) + '.eps', format='eps', dpi=300)
- # plt.savefig('results/median_paper/norm_dis_k_mat.clcpc.python_best.monot.elabeled.uhpkernel.y'
- # + y + '.repeat' + str(repeat) + '.png', format='png')
- # # plt.show()
- # plt.clf()
- #
- # norm_ged_mat = normalize_distance_matrix(ged_mat)
- # plt.imshow(norm_ged_mat)
- # plt.colorbar()
- # plt.savefig('results/median_paper/norm_ged_mat.clcpc.python_best.monot.elabeled.uhpkernel.y'
- # + y + '.repeat' + str(repeat) + '.eps', format='eps', dpi=300)
- # plt.savefig('results/median_paper/norm_ged_mat.clcpc.python_best.monot.elabeled.uhpkernel.y'
- # + y + '.repeat' + str(repeat) + '.png', format='png')
- # # plt.show()
- # plt.clf()
- #
- # norm_diff = norm_ged_mat - norm_dis_k_mat
- # plt.imshow(norm_diff)
- # plt.colorbar()
- # plt.savefig('results/median_paper/diff_mat_norm_ged_dis_k.clcpc.python_best.monot.elabeled.uhpkernel.y'
- # + y + '.repeat' + str(repeat) + '.eps', format='eps', dpi=300)
- # plt.savefig('results/median_paper/diff_mat_norm_ged_dis_k.clcpc.python_best.monot.elabeled.uhpkernel.y'
- # + y + '.repeat' + str(repeat) + '.png', format='png')
- # # plt.show()
- # plt.clf()
- # # draw_count_bar(norm_diff)
-
-
- def median_paper_clcpc_python_bash_cpp():
- """c_vs <= c_vi + c_vr, c_es <= c_ei + c_er with ged computation with
- python invoking the c++ code by bash command (with updated library).
- """
- # ds = {'name': 'monoterpenoides',
- # 'dataset': '../datasets/monoterpenoides/dataset_10+.ds'} # node/edge symb
- # _, y_all = loadDataset(ds['dataset'])
- gkernel = 'untilhpathkernel'
- node_label = 'atom'
- edge_label = 'bond_type'
- itr_max = 20
- algo_options = '--threads 6 --initial-solutions 10 --ratio-runs-from-initial-solutions .5'
- params_ged = {'lib': 'gedlib-bash', 'cost': 'CONSTANT', 'method': 'IPFP',
- 'algo_options': algo_options}
-
- y_all = ['3', '1', '4', '6', '7', '8', '9', '2']
- repeats = 50
- collection_path = os.path.dirname(os.path.realpath(__file__)) + '/cpp_ext/generated_datsets/monoterpenoides/'
- graph_dir = collection_path + 'gxl/'
-
- fn_edit_costs_output = 'results/median_paper/edit_costs_output.txt'
-
- for y in y_all:
- for repeat in range(repeats):
- edit_costs_output_file = open(fn_edit_costs_output, 'a')
- collection_file = collection_path + 'monoterpenoides_' + y + '_' + str(repeat) + '.xml'
- Gn, _ = loadDataset(collection_file, extra_params=graph_dir)
- edit_costs, residual_list, edit_cost_list, dis_k_mat, ged_mat, time_list, \
- nb_cost_mat_list, coef_dk = fit_GED_to_kernel_distance(Gn, node_label, edge_label,
- gkernel, itr_max, params_ged=params_ged,
- parallel=False)
- total_time = np.sum(time_list)
- # print('\nedit_costs:', edit_costs)
- # print('\nresidual_list:', residual_list)
- # print('\nedit_cost_list:', edit_cost_list)
- # print('\ndistance matrix in kernel space:', dis_k_mat)
- # print('\nged matrix:', ged_mat)
- # print('\ntotal time:', total_time)
- # print('\nnb_cost_mat:', nb_cost_mat_list[-1])
- np.savez('results/median_paper/fit_distance.clcpc.python_bash_cpp.monot.elabeled.uhpkernel.y'
- + y + '.repeat' + str(repeat) + '.gm',
- edit_costs=edit_costs,
- residual_list=residual_list, edit_cost_list=edit_cost_list,
- dis_k_mat=dis_k_mat, ged_mat=ged_mat, time_list=time_list,
- total_time=total_time, nb_cost_mat_list=nb_cost_mat_list,
- coef_dk=coef_dk)
-
- for ec in edit_costs:
- edit_costs_output_file.write(str(ec) + ' ')
- edit_costs_output_file.write('\n')
- edit_costs_output_file.close()
-
-
- # # normalized distance matrices.
- # gmfile = np.load('results/fit_distance.cs_leq_ci_plus_cr.cost_leq_1en2.monot.elabeled.uhpkernel.gm.npz')
- # edit_costs = gmfile['edit_costs']
- # residual_list = gmfile['residual_list']
- # edit_cost_list = gmfile['edit_cost_list']
- # dis_k_mat = gmfile['dis_k_mat']
- # ged_mat = gmfile['ged_mat']
- # total_time = gmfile['total_time']
- # nb_cost_mat_list = gmfile['nb_cost_mat_list']
- # coef_dk = gmfile['coef_dk']
-
- nb_consistent, nb_inconsistent, ratio_consistent = pairwise_substitution_consistence(dis_k_mat, ged_mat)
- print(nb_consistent, nb_inconsistent, ratio_consistent)
-
- # norm_dis_k_mat = normalize_distance_matrix(dis_k_mat)
- # plt.imshow(norm_dis_k_mat)
- # plt.colorbar()
- # plt.savefig('results/median_paper/norm_dis_k_mat.clcpc.python_bash_cpp.monot.elabeled.uhpkernel.y'
- # + y + '.repeat' + str(repeat) + '.eps', format='eps', dpi=300)
- # plt.savefig('results/median_paper/norm_dis_k_mat.clcpc.python_bash_cpp.monot.elabeled.uhpkernel.y'
- # + y + '.repeat' + str(repeat) + '.png', format='png')
- # # plt.show()
- # plt.clf()
- #
- # norm_ged_mat = normalize_distance_matrix(ged_mat)
- # plt.imshow(norm_ged_mat)
- # plt.colorbar()
- # plt.savefig('results/median_paper/norm_ged_mat.clcpc.python_bash_cpp.monot.elabeled.uhpkernel.y'
- # + y + '.repeat' + str(repeat) + '.eps', format='eps', dpi=300)
- # plt.savefig('results/median_paper/norm_ged_mat.clcpc.python_bash_cpp.monot.elabeled.uhpkernel.y'
- # + y + '.repeat' + str(repeat) + '.png', format='png')
- # # plt.show()
- # plt.clf()
- #
- # norm_diff = norm_ged_mat - norm_dis_k_mat
- # plt.imshow(norm_diff)
- # plt.colorbar()
- # plt.savefig('results/median_paper/diff_mat_norm_ged_dis_k.clcpc.python_bash_cpp.monot.elabeled.uhpkernel.y'
- # + y + '.repeat' + str(repeat) + '.eps', format='eps', dpi=300)
- # plt.savefig('results/median_paper/diff_mat_norm_ged_dis_k.clcpc.python_bash_cpp.monot.elabeled.uhpkernel.y'
- # + y + '.repeat' + str(repeat) + '.png', format='png')
- # # plt.show()
- # plt.clf()
- # # draw_count_bar(norm_diff)
-
-
-
-
-
- def test_cs_leq_ci_plus_cr_python_bash_cpp():
- """c_vs <= c_vi + c_vr, c_es <= c_ei + c_er with ged computation with
- python invoking the c++ code by bash command (with updated library).
- """
- ds = {'name': 'monoterpenoides',
- 'dataset': '../datasets/monoterpenoides/dataset_10+.ds'} # node/edge symb
- Gn, y_all = loadDataset(ds['dataset'])
- # Gn = Gn[0:10]
- gkernel = 'untilhpathkernel'
- node_label = 'atom'
- edge_label = 'bond_type'
- itr_max = 10
- algo_options = '--threads 6 --initial-solutions 10 --ratio-runs-from-initial-solutions .5'
- params_ged = {'lib': 'gedlib-bash', 'cost': 'CONSTANT', 'method': 'IPFP',
- 'algo_options': algo_options}
- edit_costs, residual_list, edit_cost_list, dis_k_mat, ged_mat, time_list, \
- nb_cost_mat_list, coef_dk = fit_GED_to_kernel_distance(Gn, node_label, edge_label,
- gkernel, itr_max, params_ged=params_ged,
- parallel=False)
- total_time = np.sum(time_list)
- print('\nedit_costs:', edit_costs)
- print('\nresidual_list:', residual_list)
- print('\nedit_cost_list:', edit_cost_list)
- print('\ndistance matrix in kernel space:', dis_k_mat)
- print('\nged matrix:', ged_mat)
- print('\ntotal time:', total_time)
- print('\nnb_cost_mat:', nb_cost_mat_list[-1])
- np.savez('results/fit_distance.cs_leq_ci_plus_cr.python_bash_cpp.monot.elabeled.uhpkernel.gm',
- edit_costs=edit_costs,
- residual_list=residual_list, edit_cost_list=edit_cost_list,
- dis_k_mat=dis_k_mat, ged_mat=ged_mat, time_list=time_list,
- total_time=total_time, nb_cost_mat_list=nb_cost_mat_list,
- coef_dk=coef_dk)
-
- # ds = {'name': 'MUTAG', 'dataset': '../datasets/MUTAG/MUTAG_A.txt',
- # 'extra_params': {}} # node/edge symb
- # Gn, y_all = loadDataset(ds['dataset'], extra_params=ds['extra_params'])
- ## Gn = Gn[0:10]
- ## remove_edges(Gn)
- # gkernel = 'untilhpathkernel'
- # node_label = 'atom'
- # edge_label = 'bond_type'
- # itr_max = 10
- # edit_costs, residual_list, edit_cost_list, dis_k_mat, ged_mat, time_list, \
- # nb_cost_mat_list, coef_dk = fit_GED_to_kernel_distance(Gn, node_label, edge_label,
- # gkernel, itr_max)
- # total_time = np.sum(time_list)
- # print('\nedit_costs:', edit_costs)
- # print('\nresidual_list:', residual_list)
- # print('\nedit_cost_list:', edit_cost_list)
- # print('\ndistance matrix in kernel space:', dis_k_mat)
- # print('\nged matrix:', ged_mat)
- # print('\ntotal time:', total_time)
- # print('\nnb_cost_mat:', nb_cost_mat_list[-1])
- # np.savez('results/fit_distance.cs_leq_ci_plus_cr.mutag.elabeled.uhpkernel.gm',
- # edit_costs=edit_costs,
- # residual_list=residual_list, edit_cost_list=edit_cost_list,
- # dis_k_mat=dis_k_mat, ged_mat=ged_mat, time_list=time_list,
- # total_time=total_time, nb_cost_mat_list=nb_cost_mat_list, coef_dk)
-
-
- # # normalized distance matrices.
- # gmfile = np.load('results/fit_distance.cs_leq_ci_plus_cr.monot.elabeled.uhpkernel.gm.npz')
- # edit_costs = gmfile['edit_costs']
- # residual_list = gmfile['residual_list']
- # edit_cost_list = gmfile['edit_cost_list']
- # dis_k_mat = gmfile['dis_k_mat']
- # ged_mat = gmfile['ged_mat']
- # total_time = gmfile['total_time']
- # nb_cost_mat_list = gmfile['nb_cost_mat_list']
- # coef_dk = gmfile['coef_dk']
-
- nb_consistent, nb_inconsistent, ratio_consistent = pairwise_substitution_consistence(dis_k_mat, ged_mat)
- print(nb_consistent, nb_inconsistent, ratio_consistent)
-
- # dis_k_sub = pairwise_substitution(dis_k_mat)
- # ged_sub = pairwise_substitution(ged_mat)
- # np.savez('results/sub_dis_mat.cs_leq_ci_plus_cr.gm',
- # dis_k_sub=dis_k_sub, ged_sub=ged_sub)
-
-
- norm_dis_k_mat = normalize_distance_matrix(dis_k_mat)
- plt.imshow(norm_dis_k_mat)
- plt.colorbar()
- plt.savefig('results/norm_dis_k_mat.cs_leq_ci_plus_cr.python_bash_cpp.monot.elabeled.uhpkernel'
- + '.eps', format='eps', dpi=300)
- plt.savefig('results/norm_dis_k_mat.cs_leq_ci_plus_cr.python_bash_cpp.monot.elabeled.uhpkernel'
- + '.png', format='png')
- # plt.show()
- plt.clf()
-
- norm_ged_mat = normalize_distance_matrix(ged_mat)
- plt.imshow(norm_ged_mat)
- plt.colorbar()
- plt.savefig('results/norm_ged_mat.cs_leq_ci_plus_cr.python_bash_cpp.monot.elabeled.uhpkernel'
- + '.eps', format='eps', dpi=300)
- plt.savefig('results/norm_ged_mat.cs_leq_ci_plus_cr.python_bash_cpp.monot.elabeled.uhpkernel'
- + '.png', format='png')
- # plt.show()
- plt.clf()
-
- norm_diff = norm_ged_mat - norm_dis_k_mat
- plt.imshow(norm_diff)
- plt.colorbar()
- plt.savefig('results/diff_mat_norm_ged_dis_k.cs_leq_ci_plus_cr.python_bash_cpp.monot.elabeled.uhpkernel'
- + '.eps', format='eps', dpi=300)
- plt.savefig('results/diff_mat_norm_ged_dis_k.cs_leq_ci_plus_cr.python_bash_cpp.monot.elabeled.uhpkernel'
- + '.png', format='png')
- # plt.show()
- plt.clf()
- # draw_count_bar(norm_diff)
-
-
- def test_anycosts():
- ds = {'name': 'MUTAG', 'dataset': '../datasets/MUTAG/MUTAG_A.txt',
- 'extra_params': {}} # node/edge symb
- Gn, y_all = loadDataset(ds['dataset'], extra_params=ds['extra_params'])
- # Gn = Gn[0:10]
- remove_edges(Gn)
- gkernel = 'marginalizedkernel'
- itr_max = 10
- edit_costs, residual_list, edit_cost_list, dis_k_mat, ged_mat, time_list, \
- nb_cost_mat_list, coef_dk = fit_GED_to_kernel_distance(Gn, gkernel, itr_max)
- total_time = np.sum(time_list)
- print('\nedit_costs:', edit_costs)
- print('\nresidual_list:', residual_list)
- print('\nedit_cost_list:', edit_cost_list)
- print('\ndistance matrix in kernel space:', dis_k_mat)
- print('\nged matrix:', ged_mat)
- print('\ntotal time:', total_time)
- print('\nnb_cost_mat:', nb_cost_mat_list[-1])
- np.savez('results/fit_distance.any_costs.gm', edit_costs=edit_costs,
- residual_list=residual_list, edit_cost_list=edit_cost_list,
- dis_k_mat=dis_k_mat, ged_mat=ged_mat, time_list=time_list,
- total_time=total_time, nb_cost_mat_list=nb_cost_mat_list)
-
- # # normalized distance matrices.
- # gmfile = np.load('results/fit_distance.any_costs.gm.npz')
- # edit_costs = gmfile['edit_costs']
- # residual_list = gmfile['residual_list']
- # edit_cost_list = gmfile['edit_cost_list']
- # dis_k_mat = gmfile['dis_k_mat']
- # ged_mat = gmfile['ged_mat']
- # total_time = gmfile['total_time']
- ## nb_cost_mat_list = gmfile['nb_cost_mat_list']
-
- norm_dis_k_mat = normalize_distance_matrix(dis_k_mat)
- plt.imshow(norm_dis_k_mat)
- plt.colorbar()
- plt.savefig('results/norm_dis_k_mat.any_costs' + '.eps', format='eps', dpi=300)
- # plt.savefig('results/norm_dis_k_mat.any_costs' + '.png', format='png')
- # plt.show()
- plt.clf()
-
- norm_ged_mat = normalize_distance_matrix(ged_mat)
- plt.imshow(norm_ged_mat)
- plt.colorbar()
- plt.savefig('results/norm_ged_mat.any_costs' + '.eps', format='eps', dpi=300)
- # plt.savefig('results/norm_ged_mat.any_costs' + '.png', format='png')
- # plt.show()
- plt.clf()
-
- norm_diff = norm_ged_mat - norm_dis_k_mat
- plt.imshow(norm_diff)
- plt.colorbar()
- plt.savefig('results/diff_mat_norm_ged_dis_k.any_costs' + '.eps', format='eps', dpi=300)
- # plt.savefig('results/diff_mat_norm_ged_dis_k.any_costs' + '.png', format='png')
- # plt.show()
- plt.clf()
- # draw_count_bar(norm_diff)
-
-
- def test_cs_leq_ci_plus_cr():
- """c_vs <= c_vi + c_vr, c_es <= c_ei + c_er
- """
- ds = {'name': 'monoterpenoides',
- 'dataset': '../datasets/monoterpenoides/dataset_10+.ds'} # node/edge symb
- Gn, y_all = loadDataset(ds['dataset'])
- # Gn = Gn[0:10]
- gkernel = 'untilhpathkernel'
- node_label = 'atom'
- edge_label = 'bond_type'
- itr_max = 10
- edit_costs, residual_list, edit_cost_list, dis_k_mat, ged_mat, time_list, \
- nb_cost_mat_list, coef_dk = fit_GED_to_kernel_distance(Gn, node_label, edge_label,
- gkernel, itr_max,
- fitkernel='gaussian')
- total_time = np.sum(time_list)
- print('\nedit_costs:', edit_costs)
- print('\nresidual_list:', residual_list)
- print('\nedit_cost_list:', edit_cost_list)
- print('\ndistance matrix in kernel space:', dis_k_mat)
- print('\nged matrix:', ged_mat)
- print('\ntotal time:', total_time)
- print('\nnb_cost_mat:', nb_cost_mat_list[-1])
- np.savez('results/fit_distance.cs_leq_ci_plus_cr.gaussian.cost_leq_1en2.monot.elabeled.uhpkernel.gm',
- edit_costs=edit_costs,
- residual_list=residual_list, edit_cost_list=edit_cost_list,
- dis_k_mat=dis_k_mat, ged_mat=ged_mat, time_list=time_list,
- total_time=total_time, nb_cost_mat_list=nb_cost_mat_list,
- coef_dk=coef_dk)
-
- # ds = {'name': 'MUTAG', 'dataset': '../datasets/MUTAG/MUTAG_A.txt',
- # 'extra_params': {}} # node/edge symb
- # Gn, y_all = loadDataset(ds['dataset'], extra_params=ds['extra_params'])
- ## Gn = Gn[0:10]
- ## remove_edges(Gn)
- # gkernel = 'untilhpathkernel'
- # node_label = 'atom'
- # edge_label = 'bond_type'
- # itr_max = 10
- # edit_costs, residual_list, edit_cost_list, dis_k_mat, ged_mat, time_list, \
- # nb_cost_mat_list, coef_dk = fit_GED_to_kernel_distance(Gn, node_label, edge_label,
- # gkernel, itr_max)
- # total_time = np.sum(time_list)
- # print('\nedit_costs:', edit_costs)
- # print('\nresidual_list:', residual_list)
- # print('\nedit_cost_list:', edit_cost_list)
- # print('\ndistance matrix in kernel space:', dis_k_mat)
- # print('\nged matrix:', ged_mat)
- # print('\ntotal time:', total_time)
- # print('\nnb_cost_mat:', nb_cost_mat_list[-1])
- # np.savez('results/fit_distance.cs_leq_ci_plus_cr.cost_leq_1en2.mutag.elabeled.uhpkernel.gm',
- # edit_costs=edit_costs,
- # residual_list=residual_list, edit_cost_list=edit_cost_list,
- # dis_k_mat=dis_k_mat, ged_mat=ged_mat, time_list=time_list,
- # total_time=total_time, nb_cost_mat_list=nb_cost_mat_list, coef_dk)
-
-
- # # normalized distance matrices.
- # gmfile = np.load('results/fit_distance.cs_leq_ci_plus_cr.cost_leq_1en2.monot.elabeled.uhpkernel.gm.npz')
- # edit_costs = gmfile['edit_costs']
- # residual_list = gmfile['residual_list']
- # edit_cost_list = gmfile['edit_cost_list']
- # dis_k_mat = gmfile['dis_k_mat']
- # ged_mat = gmfile['ged_mat']
- # total_time = gmfile['total_time']
- # nb_cost_mat_list = gmfile['nb_cost_mat_list']
- # coef_dk = gmfile['coef_dk']
-
- nb_consistent, nb_inconsistent, ratio_consistent = pairwise_substitution_consistence(dis_k_mat, ged_mat)
- print(nb_consistent, nb_inconsistent, ratio_consistent)
-
- # dis_k_sub = pairwise_substitution(dis_k_mat)
- # ged_sub = pairwise_substitution(ged_mat)
- # np.savez('results/sub_dis_mat.cs_leq_ci_plus_cr.cost_leq_1en2.gm',
- # dis_k_sub=dis_k_sub, ged_sub=ged_sub)
-
-
- norm_dis_k_mat = normalize_distance_matrix(dis_k_mat)
- plt.imshow(norm_dis_k_mat)
- plt.colorbar()
- plt.savefig('results/norm_dis_k_mat.cs_leq_ci_plus_cr.gaussian.cost_leq_1en2.monot.elabeled.uhpkernel'
- + '.eps', format='eps', dpi=300)
- plt.savefig('results/norm_dis_k_mat.cs_leq_ci_plus_cr.gaussian.cost_leq_1en2.monot.elabeled.uhpkernel'
- + '.png', format='png')
- # plt.show()
- plt.clf()
-
- norm_ged_mat = normalize_distance_matrix(ged_mat)
- plt.imshow(norm_ged_mat)
- plt.colorbar()
- plt.savefig('results/norm_ged_mat.cs_leq_ci_plus_cr.gaussian.cost_leq_1en2.monot.elabeled.uhpkernel'
- + '.eps', format='eps', dpi=300)
- plt.savefig('results/norm_ged_mat.cs_leq_ci_plus_cr.gaussian.cost_leq_1en2.monot.elabeled.uhpkernel'
- + '.png', format='png')
- # plt.show()
- plt.clf()
-
- norm_diff = norm_ged_mat - norm_dis_k_mat
- plt.imshow(norm_diff)
- plt.colorbar()
- plt.savefig('results/diff_mat_norm_ged_dis_k.cs_leq_ci_plus_cr.gaussian.cost_leq_1en2.monot.elabeled.uhpkernel'
- + '.eps', format='eps', dpi=300)
- plt.savefig('results/diff_mat_norm_ged_dis_k.cs_leq_ci_plus_cr.gaussian.cost_leq_1en2.monot.elabeled.uhpkernel'
- + '.png', format='png')
- # plt.show()
- plt.clf()
- # draw_count_bar(norm_diff)
-
-
- def test_unfitted():
- """unfitted.
- """
- from fitDistance import compute_geds
- from utils import kernel_distance_matrix
- ds = {'name': 'monoterpenoides',
- 'dataset': '../datasets/monoterpenoides/dataset_10+.ds'} # node/edge symb
- Gn, y_all = loadDataset(ds['dataset'])
- # Gn = Gn[0:10]
- gkernel = 'untilhpathkernel'
- node_label = 'atom'
- edge_label = 'bond_type'
-
-
- # ds = {'name': 'MUTAG', 'dataset': '../datasets/MUTAG/MUTAG_A.txt',
- # 'extra_params': {}} # node/edge symb
- # Gn, y_all = loadDataset(ds['dataset'], extra_params=ds['extra_params'])
- ## Gn = Gn[0:10]
- ## remove_edges(Gn)
- # gkernel = 'marginalizedkernel'
-
- dis_k_mat, _, _, _ = kernel_distance_matrix(Gn, node_label, edge_label, gkernel=gkernel)
- ged_all, ged_mat, n_edit_operations = compute_geds(Gn, [3, 3, 1, 3, 3, 1],
- [0, 1, 2, 3, 4, 5], parallel=True)
- print('\ndistance matrix in kernel space:', dis_k_mat)
- print('\nged matrix:', ged_mat)
- # np.savez('results/fit_distance.cs_leq_ci_plus_cr.cost_leq_1en2.gm', edit_costs=edit_costs,
- # residual_list=residual_list, edit_cost_list=edit_cost_list,
- # dis_k_mat=dis_k_mat, ged_mat=ged_mat, time_list=time_list,
- # total_time=total_time, nb_cost_mat_list=nb_cost_mat_list)
-
- # normalized distance matrices.
- # gmfile = np.load('results/fit_distance.cs_leq_ci_plus_cr.cost_leq_1en3.gm.npz')
- # edit_costs = gmfile['edit_costs']
- # residual_list = gmfile['residual_list']
- # edit_cost_list = gmfile['edit_cost_list']
- # dis_k_mat = gmfile['dis_k_mat']
- # ged_mat = gmfile['ged_mat']
- # total_time = gmfile['total_time']
- # nb_cost_mat_list = gmfile['nb_cost_mat_list']
-
- nb_consistent, nb_inconsistent, ratio_consistent = pairwise_substitution_consistence(dis_k_mat, ged_mat)
- print(nb_consistent, nb_inconsistent, ratio_consistent)
-
- norm_dis_k_mat = normalize_distance_matrix(dis_k_mat)
- plt.imshow(norm_dis_k_mat)
- plt.colorbar()
- plt.savefig('results/norm_dis_k_mat.unfitted.MUTAG' + '.eps', format='eps', dpi=300)
- plt.savefig('results/norm_dis_k_mat.unfitted.MUTAG' + '.png', format='png')
- # plt.show()
- plt.clf()
-
- norm_ged_mat = normalize_distance_matrix(ged_mat)
- plt.imshow(norm_ged_mat)
- plt.colorbar()
- plt.savefig('results/norm_ged_mat.unfitted.MUTAG' + '.eps', format='eps', dpi=300)
- plt.savefig('results/norm_ged_mat.unfitted.MUTAG' + '.png', format='png')
- # plt.show()
- plt.clf()
-
- norm_diff = norm_ged_mat - norm_dis_k_mat
- plt.imshow(norm_diff)
- plt.colorbar()
- plt.savefig('results/diff_mat_norm_ged_dis_k.unfitted.MUTAG' + '.eps', format='eps', dpi=300)
- plt.savefig('results/diff_mat_norm_ged_dis_k.unfitted.MUTAG' + '.png', format='png')
- # plt.show()
- plt.clf()
- draw_count_bar(norm_diff)
-
-
- def pairwise_substitution_consistence(mat1, mat2):
- """
- """
- nb_consistent = 0
- nb_inconsistent = 0
- # the matrix is considered symmetric.
- upper_tri1 = mat1[np.triu_indices_from(mat1)]
- upper_tri2 = mat2[np.tril_indices_from(mat2)]
- for i in tqdm(range(len(upper_tri1)), desc='computing consistence', file=sys.stdout):
- for j in range(i, len(upper_tri1)):
- if np.sign(upper_tri1[i] - upper_tri1[j]) == np.sign(upper_tri2[i] - upper_tri2[j]):
- nb_consistent += 1
- else:
- nb_inconsistent += 1
- return nb_consistent, nb_inconsistent, nb_consistent / (nb_consistent + nb_inconsistent)
-
-
- def pairwise_substitution(mat):
- # the matrix is considered symmetric.
- upper_tri = mat[np.triu_indices_from(mat)]
- sub_list = []
- for i in tqdm(range(len(upper_tri)), desc='computing', file=sys.stdout):
- for j in range(i, len(upper_tri)):
- sub_list.append(upper_tri[i] - upper_tri[j])
- return sub_list
-
-
- def draw_count_bar(norm_diff):
- import pandas
- from collections import Counter, OrderedDict
- norm_diff_cnt = norm_diff.flatten()
- norm_diff_cnt = norm_diff_cnt * 10
- norm_diff_cnt = np.floor(norm_diff_cnt)
- norm_diff_cnt = Counter(norm_diff_cnt)
- norm_diff_cnt = OrderedDict(sorted(norm_diff_cnt.items()))
- df = pandas.DataFrame.from_dict(norm_diff_cnt, orient='index')
- df.plot(kind='bar')
-
-
- if __name__ == '__main__':
- # test_anycosts()
- # test_cs_leq_ci_plus_cr()
- # test_unfitted()
-
- # test_cs_leq_ci_plus_cr_python_bash_cpp()
- # median_paper_clcpc_python_bash_cpp()
- # median_paper_clcpc_python_best()
-
- # x = np.array([[1,2,3],[4,5,6],[7,8,9]])
- # xx = pairwise_substitution(x)
-
- test_update_costs()
|