Browse Source

fitDistance.py: print and save results, update codes of quadratic program. This is the last version with edit costs having no other constraints than being no smaller than 0.

v0.1
jajupmochi 5 years ago
parent
commit
e3985c5481
3 changed files with 155 additions and 25 deletions
  1. +34
    -24
      preimage/fitDistance.py
  2. +114
    -0
      preimage/test_fitDistance.py
  3. +7
    -1
      preimage/utils.py

+ 34
- 24
preimage/fitDistance.py View File

@@ -19,7 +19,6 @@ import cvxpy as cp

import sys
sys.path.insert(0, "../")
from pygraph.utils.graphfiles import loadDataset
from ged import GED, get_nb_edit_operations
from utils import kernel_distance_matrix

@@ -43,9 +42,11 @@ def fit_GED_to_kernel_distance(Gn, gkernel, itr_max):
residual_list = []
edit_cost_list = []
time_list = []
for itr in range(itr_max):
print('\niteration', itr)
time0 = time.time()
# compute GEDs and numbers of edit operations.
edit_cost_constant = [i for i in edit_costs]
edit_cost_list.append(edit_cost_constant)
@@ -71,11 +72,20 @@ def fit_GED_to_kernel_distance(Gn, gkernel, itr_max):
for idx, item in enumerate(idx_nonzeros):
edit_costs[item] = edit_costs_new[idx]
time_list.append(time.time() - time0)
print('edit_costs:', edit_costs)
print('residual_list:', residual_list)
edit_cost_list.append(edit_costs)
ged_all, ged_mat, n_edit_operations = compute_geds(Gn, edit_costs,
idx_nonzeros, parallel=True)
residual = np.sqrt(np.sum(np.square(np.array(ged_all) - dis_k_vec)))
residual_list.append(residual)
return edit_costs, residual_list, edit_cost_list, dis_k_mat, ged_mat
return edit_costs, residual_list, edit_cost_list, dis_k_mat, ged_mat, time_list


def compute_geds(Gn, edit_cost_constant, idx_nonzeros, parallel=False):
@@ -166,33 +176,33 @@ def compute_better_costs(nb_cost_mat, dis_k_vec):
# edit_costs_new, residual = optimize.nnls(nb_cost_mat, dis_k_vec)
# method 3: solve as a quadratic program with constraints: x_i >= 0, sum(x) = 1.
P = np.dot(nb_cost_mat.T, nb_cost_mat)
q_T = -2 * np.dot(dis_k_vec.T, nb_cost_mat)
G = -1 * np.identity(nb_cost_mat.shape[1])
h = np.array([0 for i in range(nb_cost_mat.shape[1])])
A = np.array([1 for i in range(nb_cost_mat.shape[1])])
b = 1
# P = np.dot(nb_cost_mat.T, nb_cost_mat)
# q_T = -2 * np.dot(dis_k_vec.T, nb_cost_mat)
# G = -1 * np.identity(nb_cost_mat.shape[1])
# h = np.array([0 for i in range(nb_cost_mat.shape[1])])
# A = np.array([1 for i in range(nb_cost_mat.shape[1])])
# b = 1
# x = cp.Variable(nb_cost_mat.shape[1])
# prob = cp.Problem(cp.Minimize(cp.quad_form(x, P) + q_T@x),
# [G@x <= h])
# prob.solve()
# edit_costs_new = x.value
# residual = prob.value - np.dot(dis_k_vec.T, dis_k_vec)
# G = -1 * np.identity(nb_cost_mat.shape[1])
# h = np.array([0 for i in range(nb_cost_mat.shape[1])])
x = cp.Variable(nb_cost_mat.shape[1])
prob = cp.Problem(cp.Minimize(cp.quad_form(x, P) + q_T@x),
[G@x <= h])
cost = cp.sum_squares(nb_cost_mat * x - dis_k_vec)
constraints = [x >= [0 for i in range(nb_cost_mat.shape[1])]]
prob = cp.Problem(cp.Minimize(cost), constraints)
prob.solve()
edit_costs_new = x.value
residual = prob.value - np.dot(dis_k_vec.T, dis_k_vec)
residual = np.sqrt(prob.value)
# method 4:
return edit_costs_new, residual


if __name__ == '__main__':
from utils import remove_edges
ds = {'name': 'MUTAG', 'dataset': '../datasets/MUTAG/MUTAG_A.txt',
'extra_params': {}} # node/edge symb
Gn, y_all = loadDataset(ds['dataset'], extra_params=ds['extra_params'])
# Gn = Gn[0:10]
remove_edges(Gn)
gkernel = 'marginalizedkernel'
itr_max = 10
time0 = time.time()
edit_costs, residual_list, edit_cost_list, dis_k_mat, ged_mat = \
fit_GED_to_kernel_distance(Gn, gkernel, itr_max)
total_time = time.time() - time0
print('total time:', total_time)
print('check test_fitDistance.py')

+ 114
- 0
preimage/test_fitDistance.py View File

@@ -0,0 +1,114 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Thu Oct 24 11:50:56 2019

@author: ljia
"""
from matplotlib import pyplot as plt
import numpy as np

from pygraph.utils.graphfiles import loadDataset
from utils import remove_edges
from fitDistance import fit_GED_to_kernel_distance
from utils import normalize_distance_matrix

def test_anycosts():
ds = {'name': 'MUTAG', 'dataset': '../datasets/MUTAG/MUTAG_A.txt',
'extra_params': {}} # node/edge symb
Gn, y_all = loadDataset(ds['dataset'], extra_params=ds['extra_params'])
# Gn = Gn[0:10]
remove_edges(Gn)
gkernel = 'marginalizedkernel'
itr_max = 10
edit_costs, residual_list, edit_cost_list, dis_k_mat, ged_mat, time_list = \
fit_GED_to_kernel_distance(Gn, gkernel, itr_max)
total_time = np.sum(time_list)
print('\nedit_costs:', edit_costs)
print('\nresidual_list:', residual_list)
print('\nedit_cost_list:', edit_cost_list)
print('\ndistance matrix in kernel space:', dis_k_mat)
print('\nged matrix:', ged_mat)
print('total time:', total_time)
np.savez('results/fit_distance.any_costs.gm', edit_costs=edit_costs,
residual_list=residual_list, edit_cost_list=edit_cost_list,
dis_k_mat=dis_k_mat, ged_mat=ged_mat, time_list=time_list,
total_time=total_time)
# normalized distance matrices.
# gmfile = np.load('results/fit_distance.any_costs.gm.npz')
# edit_costs = gmfile['edit_costs']
# residual_list = gmfile['residual_list']
# edit_cost_list = gmfile['edit_cost_list']
# dis_k_mat = gmfile['dis_k_mat']
# ged_mat = gmfile['ged_mat']
# total_time = gmfile['total_time']
norm_dis_k_mat = normalize_distance_matrix(dis_k_mat)
plt.imshow(norm_dis_k_mat)
plt.colorbar()
plt.savefig('results/norm_dis_k_mat.any_costs' + '.eps', format='eps', dpi=300)
# plt.savefig('results/norm_dis_k_mat.any_costs' + '.jpg', format='jpg')
# plt.show()
plt.clf()
norm_ged_mat = normalize_distance_matrix(ged_mat)
plt.imshow(norm_ged_mat)
plt.colorbar()
plt.savefig('results/norm_ged_mat.any_costs' + '.eps', format='eps', dpi=300)
# plt.savefig('results/norm_ged_mat.any_costs' + '.jpg', format='jpg')
# plt.show()
plt.clf()

def test_cs_leq_ci_plus_cr():
"""c_vs <= c_vi + c_vr, c_es <= c_ei + c_er
"""
ds = {'name': 'MUTAG', 'dataset': '../datasets/MUTAG/MUTAG_A.txt',
'extra_params': {}} # node/edge symb
Gn, y_all = loadDataset(ds['dataset'], extra_params=ds['extra_params'])
# Gn = Gn[0:10]
remove_edges(Gn)
gkernel = 'marginalizedkernel'
itr_max = 10
edit_costs, residual_list, edit_cost_list, dis_k_mat, ged_mat, time_list = \
fit_GED_to_kernel_distance(Gn, gkernel, itr_max)
total_time = np.sum(time_list)
print('\nedit_costs:', edit_costs)
print('\nresidual_list:', residual_list)
print('\nedit_cost_list:', edit_cost_list)
print('\ndistance matrix in kernel space:', dis_k_mat)
print('\nged matrix:', ged_mat)
print('total time:', total_time)
np.savez('results/fit_distance.cs_leq_ci_plus_cr.gm', edit_costs=edit_costs,
residual_list=residual_list, edit_cost_list=edit_cost_list,
dis_k_mat=dis_k_mat, ged_mat=ged_mat, time_list=time_list,
total_time=total_time)
# normalized distance matrices.
# gmfile = np.load('results/fit_distance.cs_leq_ci_plus_cr.gm.npz')
# edit_costs = gmfile['edit_costs']
# residual_list = gmfile['residual_list']
# edit_cost_list = gmfile['edit_cost_list']
# dis_k_mat = gmfile['dis_k_mat']
# ged_mat = gmfile['ged_mat']
# total_time = gmfile['total_time']
norm_dis_k_mat = normalize_distance_matrix(dis_k_mat)
plt.imshow(norm_dis_k_mat)
plt.colorbar()
plt.savefig('results/norm_dis_k_mat.cs_leq_ci_plus_cr' + '.eps', format='eps', dpi=300)
# plt.savefig('results/norm_dis_k_mat.cs_leq_ci_plus_cr' + '.jpg', format='jpg')
# plt.show()
plt.clf()
norm_ged_mat = normalize_distance_matrix(ged_mat)
plt.imshow(norm_ged_mat)
plt.colorbar()
plt.savefig('results/norm_ged_mat.cs_leq_ci_plus_cr' + '.eps', format='eps', dpi=300)
# plt.savefig('results/norm_ged_mat.cs_leq_ci_plus_cr' + '.jpg', format='jpg')
# plt.show()
plt.clf()
if __name__ == '__main__':
test_anycosts()
test_cs_leq_ci_plus_cr()

+ 7
- 1
preimage/utils.py View File

@@ -106,4 +106,10 @@ def get_same_item_indices(ls):
idx_dict[item].append(idx)
else:
idx_dict[item] = [idx]
return idx_dict
return idx_dict


def normalize_distance_matrix(D):
max_value = np.amax(D)
min_value = np.amin(D)
return (D - min_value) / (max_value - min_value)

Loading…
Cancel
Save