|
|
@@ -8,6 +8,9 @@ Created on Wed Oct 16 14:20:06 2019 |
|
|
|
import numpy as np |
|
|
|
from tqdm import tqdm |
|
|
|
|
|
|
|
from scipy import optimize |
|
|
|
import cvxpy as cp |
|
|
|
|
|
|
|
import sys |
|
|
|
sys.path.insert(0, "../") |
|
|
|
from pygraph.utils.graphfiles import loadDataset |
|
|
@@ -15,12 +18,9 @@ from ged import GED, get_nb_edit_operations |
|
|
|
from utils import kernel_distance_matrix |
|
|
|
|
|
|
|
def fit_GED_to_kernel_distance(Gn, gkernel, itr_max): |
|
|
|
c_vi = 1 |
|
|
|
c_vr = 1 |
|
|
|
c_vs = 1 |
|
|
|
c_ei = 1 |
|
|
|
c_er = 1 |
|
|
|
c_es = 1 |
|
|
|
# c_vi, c_vr, c_vs, c_ei, c_er, c_es or parts of them. |
|
|
|
edit_costs = [0.2, 0.2, 0.2, 0.2, 0.2, 0] |
|
|
|
idx_nonzeros = [i for i, item in enumerate(edit_costs) if item != 0] |
|
|
|
|
|
|
|
# compute distances in feature space. |
|
|
|
dis_k_mat, _, _, _ = kernel_distance_matrix(Gn, gkernel=gkernel) |
|
|
@@ -36,14 +36,9 @@ def fit_GED_to_kernel_distance(Gn, gkernel, itr_max): |
|
|
|
for itr in range(itr_max): |
|
|
|
print('iteration', itr) |
|
|
|
ged_all = [] |
|
|
|
n_vi_all = [] |
|
|
|
n_vr_all = [] |
|
|
|
n_vs_all = [] |
|
|
|
n_ei_all = [] |
|
|
|
n_er_all = [] |
|
|
|
n_es_all = [] |
|
|
|
n_edit_operations = [[] for i in range(len(idx_nonzeros))] |
|
|
|
# compute GEDs and numbers of edit operations. |
|
|
|
edit_cost_constant = [c_vi, c_vr, c_vs, c_ei, c_er, c_es] |
|
|
|
edit_cost_constant = [i for i in edit_costs] |
|
|
|
edit_cost_list.append(edit_cost_constant) |
|
|
|
for i in tqdm(range(len(Gn)), desc='computing GEDs', file=sys.stdout): |
|
|
|
# for i in range(len(Gn)): |
|
|
@@ -53,41 +48,58 @@ def fit_GED_to_kernel_distance(Gn, gkernel, itr_max): |
|
|
|
edit_cost_constant=edit_cost_constant, stabilizer='min', |
|
|
|
repeat=30) |
|
|
|
ged_all.append(dis) |
|
|
|
n_vi, n_vr, n_vs, n_ei, n_er, n_es = get_nb_edit_operations(Gn[i], |
|
|
|
n_eo_tmp = get_nb_edit_operations(Gn[i], |
|
|
|
Gn[j], pi_forward, pi_backward) |
|
|
|
n_vi_all.append(n_vi) |
|
|
|
n_vr_all.append(n_vr) |
|
|
|
n_vs_all.append(n_vs) |
|
|
|
n_ei_all.append(n_ei) |
|
|
|
n_er_all.append(n_er) |
|
|
|
n_es_all.append(n_es) |
|
|
|
for idx, item in enumerate(idx_nonzeros): |
|
|
|
n_edit_operations[idx].append(n_eo_tmp[item]) |
|
|
|
|
|
|
|
residual = np.sqrt(np.sum(np.square(np.array(ged_all) - dis_k_vec))) |
|
|
|
residual_list.append(residual) |
|
|
|
|
|
|
|
# "fit" geds to distances in feature space by tuning edit costs using the |
|
|
|
# Least Squares Method. |
|
|
|
nb_cost_mat = np.column_stack((np.array(n_vi_all), np.array(n_vr_all), |
|
|
|
np.array(n_vs_all), np.array(n_ei_all), |
|
|
|
np.array(n_er_all), np.array(n_es_all))) |
|
|
|
edit_costs, residual, _, _ = np.linalg.lstsq(nb_cost_mat, dis_k_vec, |
|
|
|
rcond=None) |
|
|
|
for i in range(len(edit_costs)): |
|
|
|
if edit_costs[i] < 0: |
|
|
|
if edit_costs[i] > -1e-3: |
|
|
|
edit_costs[i] = 0 |
|
|
|
# else: |
|
|
|
# raise ValueError('The edit cost is negative.') |
|
|
|
|
|
|
|
c_vi = edit_costs[0] |
|
|
|
c_vr = edit_costs[1] |
|
|
|
c_vs = edit_costs[2] |
|
|
|
c_ei = edit_costs[3] |
|
|
|
c_er = edit_costs[4] |
|
|
|
c_es = edit_costs[5] |
|
|
|
nb_cost_mat = np.array(n_edit_operations).T |
|
|
|
edit_costs_new, residual = get_better_costs(nb_cost_mat, dis_k_vec) |
|
|
|
|
|
|
|
print(residual) |
|
|
|
for i in range(len(edit_costs_new)): |
|
|
|
if edit_costs_new[i] < 0: |
|
|
|
if edit_costs_new[i] > -1e-6: |
|
|
|
edit_costs_new[i] = 0 |
|
|
|
else: |
|
|
|
raise ValueError('The edit cost is negative.') |
|
|
|
|
|
|
|
for idx, item in enumerate(idx_nonzeros): |
|
|
|
edit_costs[item] = edit_costs_new[idx] |
|
|
|
|
|
|
|
return c_vi, c_vr, c_vs, c_ei, c_er, c_es, residual_list, edit_cost_list |
|
|
|
return edit_costs, residual_list, edit_cost_list |
|
|
|
|
|
|
|
|
|
|
|
def get_better_costs(nb_cost_mat, dis_k_vec): |
|
|
|
# # method 1: simple least square method. |
|
|
|
# edit_costs_new, residual, _, _ = np.linalg.lstsq(nb_cost_mat, dis_k_vec, |
|
|
|
# rcond=None) |
|
|
|
|
|
|
|
# # method 2: least square method with x_i >= 0. |
|
|
|
# edit_costs_new, residual = optimize.nnls(nb_cost_mat, dis_k_vec) |
|
|
|
|
|
|
|
# method 3: solve as a quadratic program with constraints: x_i >= 0, sum(x) = 1. |
|
|
|
P = np.dot(nb_cost_mat.T, nb_cost_mat) |
|
|
|
q_T = -2 * np.dot(dis_k_vec.T, nb_cost_mat) |
|
|
|
G = -1 * np.identity(nb_cost_mat.shape[1]) |
|
|
|
h = np.array([0 for i in range(nb_cost_mat.shape[1])]) |
|
|
|
A = np.array([1 for i in range(nb_cost_mat.shape[1])]) |
|
|
|
b = 1 |
|
|
|
x = cp.Variable(nb_cost_mat.shape[1]) |
|
|
|
prob = cp.Problem(cp.Minimize(cp.quad_form(x, P) + q_T@x), |
|
|
|
[G@x <= h, |
|
|
|
A@x == b]) |
|
|
|
prob.solve() |
|
|
|
edit_costs_new = x.value |
|
|
|
residual = prob.value - np.dot(dis_k_vec.T, dis_k_vec) |
|
|
|
|
|
|
|
# p = program(minimize(norm2(nb_cost_mat*x-dis_k_vec)),[equals(sum(x),1),geq(x,0)]) |
|
|
|
return edit_costs_new, residual |
|
|
|
|
|
|
|
|
|
|
|
if __name__ == '__main__': |
|
|
@@ -95,9 +107,9 @@ if __name__ == '__main__': |
|
|
|
ds = {'name': 'MUTAG', 'dataset': '../datasets/MUTAG/MUTAG_A.txt', |
|
|
|
'extra_params': {}} # node/edge symb |
|
|
|
Gn, y_all = loadDataset(ds['dataset'], extra_params=ds['extra_params']) |
|
|
|
Gn = Gn[0:10] |
|
|
|
# Gn = Gn[0:10] |
|
|
|
remove_edges(Gn) |
|
|
|
gkernel = 'marginalizedkernel' |
|
|
|
itr_max = 10 |
|
|
|
c_vi, c_vr, c_vs, c_ei, c_er, c_es, residual_list, edit_cost_list = \ |
|
|
|
edit_costs, residual_list, edit_cost_list = \ |
|
|
|
fit_GED_to_kernel_distance(Gn, gkernel, itr_max) |