@@ -19,7 +19,6 @@ import cvxpy as cp | |||||
import sys | import sys | ||||
sys.path.insert(0, "../") | sys.path.insert(0, "../") | ||||
from pygraph.utils.graphfiles import loadDataset | |||||
from ged import GED, get_nb_edit_operations | from ged import GED, get_nb_edit_operations | ||||
from utils import kernel_distance_matrix | from utils import kernel_distance_matrix | ||||
@@ -43,9 +42,11 @@ def fit_GED_to_kernel_distance(Gn, gkernel, itr_max): | |||||
residual_list = [] | residual_list = [] | ||||
edit_cost_list = [] | edit_cost_list = [] | ||||
time_list = [] | |||||
for itr in range(itr_max): | for itr in range(itr_max): | ||||
print('\niteration', itr) | print('\niteration', itr) | ||||
time0 = time.time() | |||||
# compute GEDs and numbers of edit operations. | # compute GEDs and numbers of edit operations. | ||||
edit_cost_constant = [i for i in edit_costs] | edit_cost_constant = [i for i in edit_costs] | ||||
edit_cost_list.append(edit_cost_constant) | edit_cost_list.append(edit_cost_constant) | ||||
@@ -71,11 +72,20 @@ def fit_GED_to_kernel_distance(Gn, gkernel, itr_max): | |||||
for idx, item in enumerate(idx_nonzeros): | for idx, item in enumerate(idx_nonzeros): | ||||
edit_costs[item] = edit_costs_new[idx] | edit_costs[item] = edit_costs_new[idx] | ||||
time_list.append(time.time() - time0) | |||||
print('edit_costs:', edit_costs) | print('edit_costs:', edit_costs) | ||||
print('residual_list:', residual_list) | print('residual_list:', residual_list) | ||||
edit_cost_list.append(edit_costs) | |||||
ged_all, ged_mat, n_edit_operations = compute_geds(Gn, edit_costs, | |||||
idx_nonzeros, parallel=True) | |||||
residual = np.sqrt(np.sum(np.square(np.array(ged_all) - dis_k_vec))) | |||||
residual_list.append(residual) | |||||
return edit_costs, residual_list, edit_cost_list, dis_k_mat, ged_mat | |||||
return edit_costs, residual_list, edit_cost_list, dis_k_mat, ged_mat, time_list | |||||
def compute_geds(Gn, edit_cost_constant, idx_nonzeros, parallel=False): | def compute_geds(Gn, edit_cost_constant, idx_nonzeros, parallel=False): | ||||
@@ -166,33 +176,33 @@ def compute_better_costs(nb_cost_mat, dis_k_vec): | |||||
# edit_costs_new, residual = optimize.nnls(nb_cost_mat, dis_k_vec) | # edit_costs_new, residual = optimize.nnls(nb_cost_mat, dis_k_vec) | ||||
# method 3: solve as a quadratic program with constraints: x_i >= 0, sum(x) = 1. | # method 3: solve as a quadratic program with constraints: x_i >= 0, sum(x) = 1. | ||||
P = np.dot(nb_cost_mat.T, nb_cost_mat) | |||||
q_T = -2 * np.dot(dis_k_vec.T, nb_cost_mat) | |||||
G = -1 * np.identity(nb_cost_mat.shape[1]) | |||||
h = np.array([0 for i in range(nb_cost_mat.shape[1])]) | |||||
A = np.array([1 for i in range(nb_cost_mat.shape[1])]) | |||||
b = 1 | |||||
# P = np.dot(nb_cost_mat.T, nb_cost_mat) | |||||
# q_T = -2 * np.dot(dis_k_vec.T, nb_cost_mat) | |||||
# G = -1 * np.identity(nb_cost_mat.shape[1]) | |||||
# h = np.array([0 for i in range(nb_cost_mat.shape[1])]) | |||||
# A = np.array([1 for i in range(nb_cost_mat.shape[1])]) | |||||
# b = 1 | |||||
# x = cp.Variable(nb_cost_mat.shape[1]) | |||||
# prob = cp.Problem(cp.Minimize(cp.quad_form(x, P) + q_T@x), | |||||
# [G@x <= h]) | |||||
# prob.solve() | |||||
# edit_costs_new = x.value | |||||
# residual = prob.value - np.dot(dis_k_vec.T, dis_k_vec) | |||||
# G = -1 * np.identity(nb_cost_mat.shape[1]) | |||||
# h = np.array([0 for i in range(nb_cost_mat.shape[1])]) | |||||
x = cp.Variable(nb_cost_mat.shape[1]) | x = cp.Variable(nb_cost_mat.shape[1]) | ||||
prob = cp.Problem(cp.Minimize(cp.quad_form(x, P) + q_T@x), | |||||
[G@x <= h]) | |||||
cost = cp.sum_squares(nb_cost_mat * x - dis_k_vec) | |||||
constraints = [x >= [0 for i in range(nb_cost_mat.shape[1])]] | |||||
prob = cp.Problem(cp.Minimize(cost), constraints) | |||||
prob.solve() | prob.solve() | ||||
edit_costs_new = x.value | edit_costs_new = x.value | ||||
residual = prob.value - np.dot(dis_k_vec.T, dis_k_vec) | |||||
residual = np.sqrt(prob.value) | |||||
# method 4: | |||||
return edit_costs_new, residual | return edit_costs_new, residual | ||||
if __name__ == '__main__': | if __name__ == '__main__': | ||||
from utils import remove_edges | |||||
ds = {'name': 'MUTAG', 'dataset': '../datasets/MUTAG/MUTAG_A.txt', | |||||
'extra_params': {}} # node/edge symb | |||||
Gn, y_all = loadDataset(ds['dataset'], extra_params=ds['extra_params']) | |||||
# Gn = Gn[0:10] | |||||
remove_edges(Gn) | |||||
gkernel = 'marginalizedkernel' | |||||
itr_max = 10 | |||||
time0 = time.time() | |||||
edit_costs, residual_list, edit_cost_list, dis_k_mat, ged_mat = \ | |||||
fit_GED_to_kernel_distance(Gn, gkernel, itr_max) | |||||
total_time = time.time() - time0 | |||||
print('total time:', total_time) | |||||
print('check test_fitDistance.py') |
@@ -0,0 +1,114 @@ | |||||
#!/usr/bin/env python3 | |||||
# -*- coding: utf-8 -*- | |||||
""" | |||||
Created on Thu Oct 24 11:50:56 2019 | |||||
@author: ljia | |||||
""" | |||||
from matplotlib import pyplot as plt | |||||
import numpy as np | |||||
from pygraph.utils.graphfiles import loadDataset | |||||
from utils import remove_edges | |||||
from fitDistance import fit_GED_to_kernel_distance | |||||
from utils import normalize_distance_matrix | |||||
def test_anycosts(): | |||||
ds = {'name': 'MUTAG', 'dataset': '../datasets/MUTAG/MUTAG_A.txt', | |||||
'extra_params': {}} # node/edge symb | |||||
Gn, y_all = loadDataset(ds['dataset'], extra_params=ds['extra_params']) | |||||
# Gn = Gn[0:10] | |||||
remove_edges(Gn) | |||||
gkernel = 'marginalizedkernel' | |||||
itr_max = 10 | |||||
edit_costs, residual_list, edit_cost_list, dis_k_mat, ged_mat, time_list = \ | |||||
fit_GED_to_kernel_distance(Gn, gkernel, itr_max) | |||||
total_time = np.sum(time_list) | |||||
print('\nedit_costs:', edit_costs) | |||||
print('\nresidual_list:', residual_list) | |||||
print('\nedit_cost_list:', edit_cost_list) | |||||
print('\ndistance matrix in kernel space:', dis_k_mat) | |||||
print('\nged matrix:', ged_mat) | |||||
print('total time:', total_time) | |||||
np.savez('results/fit_distance.any_costs.gm', edit_costs=edit_costs, | |||||
residual_list=residual_list, edit_cost_list=edit_cost_list, | |||||
dis_k_mat=dis_k_mat, ged_mat=ged_mat, time_list=time_list, | |||||
total_time=total_time) | |||||
# normalized distance matrices. | |||||
# gmfile = np.load('results/fit_distance.any_costs.gm.npz') | |||||
# edit_costs = gmfile['edit_costs'] | |||||
# residual_list = gmfile['residual_list'] | |||||
# edit_cost_list = gmfile['edit_cost_list'] | |||||
# dis_k_mat = gmfile['dis_k_mat'] | |||||
# ged_mat = gmfile['ged_mat'] | |||||
# total_time = gmfile['total_time'] | |||||
norm_dis_k_mat = normalize_distance_matrix(dis_k_mat) | |||||
plt.imshow(norm_dis_k_mat) | |||||
plt.colorbar() | |||||
plt.savefig('results/norm_dis_k_mat.any_costs' + '.eps', format='eps', dpi=300) | |||||
# plt.savefig('results/norm_dis_k_mat.any_costs' + '.jpg', format='jpg') | |||||
# plt.show() | |||||
plt.clf() | |||||
norm_ged_mat = normalize_distance_matrix(ged_mat) | |||||
plt.imshow(norm_ged_mat) | |||||
plt.colorbar() | |||||
plt.savefig('results/norm_ged_mat.any_costs' + '.eps', format='eps', dpi=300) | |||||
# plt.savefig('results/norm_ged_mat.any_costs' + '.jpg', format='jpg') | |||||
# plt.show() | |||||
plt.clf() | |||||
def test_cs_leq_ci_plus_cr(): | |||||
"""c_vs <= c_vi + c_vr, c_es <= c_ei + c_er | |||||
""" | |||||
ds = {'name': 'MUTAG', 'dataset': '../datasets/MUTAG/MUTAG_A.txt', | |||||
'extra_params': {}} # node/edge symb | |||||
Gn, y_all = loadDataset(ds['dataset'], extra_params=ds['extra_params']) | |||||
# Gn = Gn[0:10] | |||||
remove_edges(Gn) | |||||
gkernel = 'marginalizedkernel' | |||||
itr_max = 10 | |||||
edit_costs, residual_list, edit_cost_list, dis_k_mat, ged_mat, time_list = \ | |||||
fit_GED_to_kernel_distance(Gn, gkernel, itr_max) | |||||
total_time = np.sum(time_list) | |||||
print('\nedit_costs:', edit_costs) | |||||
print('\nresidual_list:', residual_list) | |||||
print('\nedit_cost_list:', edit_cost_list) | |||||
print('\ndistance matrix in kernel space:', dis_k_mat) | |||||
print('\nged matrix:', ged_mat) | |||||
print('total time:', total_time) | |||||
np.savez('results/fit_distance.cs_leq_ci_plus_cr.gm', edit_costs=edit_costs, | |||||
residual_list=residual_list, edit_cost_list=edit_cost_list, | |||||
dis_k_mat=dis_k_mat, ged_mat=ged_mat, time_list=time_list, | |||||
total_time=total_time) | |||||
# normalized distance matrices. | |||||
# gmfile = np.load('results/fit_distance.cs_leq_ci_plus_cr.gm.npz') | |||||
# edit_costs = gmfile['edit_costs'] | |||||
# residual_list = gmfile['residual_list'] | |||||
# edit_cost_list = gmfile['edit_cost_list'] | |||||
# dis_k_mat = gmfile['dis_k_mat'] | |||||
# ged_mat = gmfile['ged_mat'] | |||||
# total_time = gmfile['total_time'] | |||||
norm_dis_k_mat = normalize_distance_matrix(dis_k_mat) | |||||
plt.imshow(norm_dis_k_mat) | |||||
plt.colorbar() | |||||
plt.savefig('results/norm_dis_k_mat.cs_leq_ci_plus_cr' + '.eps', format='eps', dpi=300) | |||||
# plt.savefig('results/norm_dis_k_mat.cs_leq_ci_plus_cr' + '.jpg', format='jpg') | |||||
# plt.show() | |||||
plt.clf() | |||||
norm_ged_mat = normalize_distance_matrix(ged_mat) | |||||
plt.imshow(norm_ged_mat) | |||||
plt.colorbar() | |||||
plt.savefig('results/norm_ged_mat.cs_leq_ci_plus_cr' + '.eps', format='eps', dpi=300) | |||||
# plt.savefig('results/norm_ged_mat.cs_leq_ci_plus_cr' + '.jpg', format='jpg') | |||||
# plt.show() | |||||
plt.clf() | |||||
if __name__ == '__main__': | |||||
test_anycosts() | |||||
test_cs_leq_ci_plus_cr() |
@@ -106,4 +106,10 @@ def get_same_item_indices(ls): | |||||
idx_dict[item].append(idx) | idx_dict[item].append(idx) | ||||
else: | else: | ||||
idx_dict[item] = [idx] | idx_dict[item] = [idx] | ||||
return idx_dict | |||||
return idx_dict | |||||
def normalize_distance_matrix(D): | |||||
max_value = np.amax(D) | |||||
min_value = np.amin(D) | |||||
return (D - min_value) / (max_value - min_value) |