From e3985c5481a7a16bdfd1308351e1695e152df4e5 Mon Sep 17 00:00:00 2001 From: jajupmochi Date: Thu, 24 Oct 2019 12:00:17 +0200 Subject: [PATCH] fitDistance.py: print and save results, update codes of quadratic program. This is the last version with edit costs having no other constraints than being no smaller than 0. --- preimage/fitDistance.py | 58 +++++++++++++--------- preimage/test_fitDistance.py | 114 +++++++++++++++++++++++++++++++++++++++++++ preimage/utils.py | 8 ++- 3 files changed, 155 insertions(+), 25 deletions(-) create mode 100644 preimage/test_fitDistance.py diff --git a/preimage/fitDistance.py b/preimage/fitDistance.py index 42b9889..81ef711 100644 --- a/preimage/fitDistance.py +++ b/preimage/fitDistance.py @@ -19,7 +19,6 @@ import cvxpy as cp import sys sys.path.insert(0, "../") -from pygraph.utils.graphfiles import loadDataset from ged import GED, get_nb_edit_operations from utils import kernel_distance_matrix @@ -43,9 +42,11 @@ def fit_GED_to_kernel_distance(Gn, gkernel, itr_max): residual_list = [] edit_cost_list = [] + time_list = [] for itr in range(itr_max): print('\niteration', itr) + time0 = time.time() # compute GEDs and numbers of edit operations. edit_cost_constant = [i for i in edit_costs] edit_cost_list.append(edit_cost_constant) @@ -71,11 +72,20 @@ def fit_GED_to_kernel_distance(Gn, gkernel, itr_max): for idx, item in enumerate(idx_nonzeros): edit_costs[item] = edit_costs_new[idx] + + time_list.append(time.time() - time0) print('edit_costs:', edit_costs) print('residual_list:', residual_list) + + + edit_cost_list.append(edit_costs) + ged_all, ged_mat, n_edit_operations = compute_geds(Gn, edit_costs, + idx_nonzeros, parallel=True) + residual = np.sqrt(np.sum(np.square(np.array(ged_all) - dis_k_vec))) + residual_list.append(residual) - return edit_costs, residual_list, edit_cost_list, dis_k_mat, ged_mat + return edit_costs, residual_list, edit_cost_list, dis_k_mat, ged_mat, time_list def compute_geds(Gn, edit_cost_constant, idx_nonzeros, parallel=False): @@ -166,33 +176,33 @@ def compute_better_costs(nb_cost_mat, dis_k_vec): # edit_costs_new, residual = optimize.nnls(nb_cost_mat, dis_k_vec) # method 3: solve as a quadratic program with constraints: x_i >= 0, sum(x) = 1. - P = np.dot(nb_cost_mat.T, nb_cost_mat) - q_T = -2 * np.dot(dis_k_vec.T, nb_cost_mat) - G = -1 * np.identity(nb_cost_mat.shape[1]) - h = np.array([0 for i in range(nb_cost_mat.shape[1])]) - A = np.array([1 for i in range(nb_cost_mat.shape[1])]) - b = 1 +# P = np.dot(nb_cost_mat.T, nb_cost_mat) +# q_T = -2 * np.dot(dis_k_vec.T, nb_cost_mat) +# G = -1 * np.identity(nb_cost_mat.shape[1]) +# h = np.array([0 for i in range(nb_cost_mat.shape[1])]) +# A = np.array([1 for i in range(nb_cost_mat.shape[1])]) +# b = 1 +# x = cp.Variable(nb_cost_mat.shape[1]) +# prob = cp.Problem(cp.Minimize(cp.quad_form(x, P) + q_T@x), +# [G@x <= h]) +# prob.solve() +# edit_costs_new = x.value +# residual = prob.value - np.dot(dis_k_vec.T, dis_k_vec) + +# G = -1 * np.identity(nb_cost_mat.shape[1]) +# h = np.array([0 for i in range(nb_cost_mat.shape[1])]) x = cp.Variable(nb_cost_mat.shape[1]) - prob = cp.Problem(cp.Minimize(cp.quad_form(x, P) + q_T@x), - [G@x <= h]) + cost = cp.sum_squares(nb_cost_mat * x - dis_k_vec) + constraints = [x >= [0 for i in range(nb_cost_mat.shape[1])]] + prob = cp.Problem(cp.Minimize(cost), constraints) prob.solve() edit_costs_new = x.value - residual = prob.value - np.dot(dis_k_vec.T, dis_k_vec) + residual = np.sqrt(prob.value) + + # method 4: return edit_costs_new, residual if __name__ == '__main__': - from utils import remove_edges - ds = {'name': 'MUTAG', 'dataset': '../datasets/MUTAG/MUTAG_A.txt', - 'extra_params': {}} # node/edge symb - Gn, y_all = loadDataset(ds['dataset'], extra_params=ds['extra_params']) -# Gn = Gn[0:10] - remove_edges(Gn) - gkernel = 'marginalizedkernel' - itr_max = 10 - time0 = time.time() - edit_costs, residual_list, edit_cost_list, dis_k_mat, ged_mat = \ - fit_GED_to_kernel_distance(Gn, gkernel, itr_max) - total_time = time.time() - time0 - print('total time:', total_time) \ No newline at end of file + print('check test_fitDistance.py') \ No newline at end of file diff --git a/preimage/test_fitDistance.py b/preimage/test_fitDistance.py new file mode 100644 index 0000000..4ee26b6 --- /dev/null +++ b/preimage/test_fitDistance.py @@ -0,0 +1,114 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +""" +Created on Thu Oct 24 11:50:56 2019 + +@author: ljia +""" +from matplotlib import pyplot as plt +import numpy as np + +from pygraph.utils.graphfiles import loadDataset +from utils import remove_edges +from fitDistance import fit_GED_to_kernel_distance +from utils import normalize_distance_matrix + +def test_anycosts(): + ds = {'name': 'MUTAG', 'dataset': '../datasets/MUTAG/MUTAG_A.txt', + 'extra_params': {}} # node/edge symb + Gn, y_all = loadDataset(ds['dataset'], extra_params=ds['extra_params']) +# Gn = Gn[0:10] + remove_edges(Gn) + gkernel = 'marginalizedkernel' + itr_max = 10 + edit_costs, residual_list, edit_cost_list, dis_k_mat, ged_mat, time_list = \ + fit_GED_to_kernel_distance(Gn, gkernel, itr_max) + total_time = np.sum(time_list) + print('\nedit_costs:', edit_costs) + print('\nresidual_list:', residual_list) + print('\nedit_cost_list:', edit_cost_list) + print('\ndistance matrix in kernel space:', dis_k_mat) + print('\nged matrix:', ged_mat) + print('total time:', total_time) + np.savez('results/fit_distance.any_costs.gm', edit_costs=edit_costs, + residual_list=residual_list, edit_cost_list=edit_cost_list, + dis_k_mat=dis_k_mat, ged_mat=ged_mat, time_list=time_list, + total_time=total_time) + + # normalized distance matrices. +# gmfile = np.load('results/fit_distance.any_costs.gm.npz') +# edit_costs = gmfile['edit_costs'] +# residual_list = gmfile['residual_list'] +# edit_cost_list = gmfile['edit_cost_list'] +# dis_k_mat = gmfile['dis_k_mat'] +# ged_mat = gmfile['ged_mat'] +# total_time = gmfile['total_time'] + + norm_dis_k_mat = normalize_distance_matrix(dis_k_mat) + plt.imshow(norm_dis_k_mat) + plt.colorbar() + plt.savefig('results/norm_dis_k_mat.any_costs' + '.eps', format='eps', dpi=300) +# plt.savefig('results/norm_dis_k_mat.any_costs' + '.jpg', format='jpg') +# plt.show() + plt.clf() + norm_ged_mat = normalize_distance_matrix(ged_mat) + plt.imshow(norm_ged_mat) + plt.colorbar() + plt.savefig('results/norm_ged_mat.any_costs' + '.eps', format='eps', dpi=300) +# plt.savefig('results/norm_ged_mat.any_costs' + '.jpg', format='jpg') +# plt.show() + plt.clf() + + +def test_cs_leq_ci_plus_cr(): + """c_vs <= c_vi + c_vr, c_es <= c_ei + c_er + """ + ds = {'name': 'MUTAG', 'dataset': '../datasets/MUTAG/MUTAG_A.txt', + 'extra_params': {}} # node/edge symb + Gn, y_all = loadDataset(ds['dataset'], extra_params=ds['extra_params']) +# Gn = Gn[0:10] + remove_edges(Gn) + gkernel = 'marginalizedkernel' + itr_max = 10 + edit_costs, residual_list, edit_cost_list, dis_k_mat, ged_mat, time_list = \ + fit_GED_to_kernel_distance(Gn, gkernel, itr_max) + total_time = np.sum(time_list) + print('\nedit_costs:', edit_costs) + print('\nresidual_list:', residual_list) + print('\nedit_cost_list:', edit_cost_list) + print('\ndistance matrix in kernel space:', dis_k_mat) + print('\nged matrix:', ged_mat) + print('total time:', total_time) + np.savez('results/fit_distance.cs_leq_ci_plus_cr.gm', edit_costs=edit_costs, + residual_list=residual_list, edit_cost_list=edit_cost_list, + dis_k_mat=dis_k_mat, ged_mat=ged_mat, time_list=time_list, + total_time=total_time) + + # normalized distance matrices. +# gmfile = np.load('results/fit_distance.cs_leq_ci_plus_cr.gm.npz') +# edit_costs = gmfile['edit_costs'] +# residual_list = gmfile['residual_list'] +# edit_cost_list = gmfile['edit_cost_list'] +# dis_k_mat = gmfile['dis_k_mat'] +# ged_mat = gmfile['ged_mat'] +# total_time = gmfile['total_time'] + + norm_dis_k_mat = normalize_distance_matrix(dis_k_mat) + plt.imshow(norm_dis_k_mat) + plt.colorbar() + plt.savefig('results/norm_dis_k_mat.cs_leq_ci_plus_cr' + '.eps', format='eps', dpi=300) +# plt.savefig('results/norm_dis_k_mat.cs_leq_ci_plus_cr' + '.jpg', format='jpg') +# plt.show() + plt.clf() + norm_ged_mat = normalize_distance_matrix(ged_mat) + plt.imshow(norm_ged_mat) + plt.colorbar() + plt.savefig('results/norm_ged_mat.cs_leq_ci_plus_cr' + '.eps', format='eps', dpi=300) +# plt.savefig('results/norm_ged_mat.cs_leq_ci_plus_cr' + '.jpg', format='jpg') +# plt.show() + plt.clf() + + +if __name__ == '__main__': + test_anycosts() + test_cs_leq_ci_plus_cr() \ No newline at end of file diff --git a/preimage/utils.py b/preimage/utils.py index 58431e3..70cb6f5 100644 --- a/preimage/utils.py +++ b/preimage/utils.py @@ -106,4 +106,10 @@ def get_same_item_indices(ls): idx_dict[item].append(idx) else: idx_dict[item] = [idx] - return idx_dict \ No newline at end of file + return idx_dict + + +def normalize_distance_matrix(D): + max_value = np.amax(D) + min_value = np.amin(D) + return (D - min_value) / (max_value - min_value) \ No newline at end of file