@@ -7,6 +7,12 @@ Created on Wed Oct 16 14:20:06 2019
"""
"""
import numpy as np
import numpy as np
from tqdm import tqdm
from tqdm import tqdm
from itertools import combinations_with_replacement
import multiprocessing
from multiprocessing import Pool
from functools import partial
import time
import random
from scipy import optimize
from scipy import optimize
import cvxpy as cp
import cvxpy as cp
@@ -19,7 +25,12 @@ from utils import kernel_distance_matrix
def fit_GED_to_kernel_distance(Gn, gkernel, itr_max):
def fit_GED_to_kernel_distance(Gn, gkernel, itr_max):
# c_vi, c_vr, c_vs, c_ei, c_er, c_es or parts of them.
# c_vi, c_vr, c_vs, c_ei, c_er, c_es or parts of them.
edit_costs = [0.2, 0.2, 0.2, 0.2, 0.2, 0]
random.seed(1)
cost_rdm = random.sample(range(1, 10), 5)
edit_costs = cost_rdm + [0]
# edit_costs = [0.2, 0.2, 0.2, 0.2, 0.2, 0]
# edit_costs = [0, 0, 0.9544, 0.026, 0.0196, 0]
# edit_costs = [0.008429912251810438, 0.025461055985319694, 0.2047320869225948, 0.004148727085832133, 0.0, 0]
idx_nonzeros = [i for i, item in enumerate(edit_costs) if item != 0]
idx_nonzeros = [i for i, item in enumerate(edit_costs) if item != 0]
# compute distances in feature space.
# compute distances in feature space.
@@ -34,24 +45,13 @@ def fit_GED_to_kernel_distance(Gn, gkernel, itr_max):
edit_cost_list = []
edit_cost_list = []
for itr in range(itr_max):
for itr in range(itr_max):
print('iteration', itr)
ged_all = []
n_edit_operations = [[] for i in range(len(idx_nonzeros))]
print('\niteration', itr)
# compute GEDs and numbers of edit operations.
# compute GEDs and numbers of edit operations.
edit_cost_constant = [i for i in edit_costs]
edit_cost_constant = [i for i in edit_costs]
edit_cost_list.append(edit_cost_constant)
edit_cost_list.append(edit_cost_constant)
for i in tqdm(range(len(Gn)), desc='computing GEDs', file=sys.stdout):
# for i in range(len(Gn)):
for j in range(i, len(Gn)):
dis, pi_forward, pi_backward = GED(Gn[i], Gn[j], lib='gedlibpy',
cost='CONSTANT', method='IPFP',
edit_cost_constant=edit_cost_constant, stabilizer='min',
repeat=30)
ged_all.append(dis)
n_eo_tmp = get_nb_edit_operations(Gn[i],
Gn[j], pi_forward, pi_backward)
for idx, item in enumerate(idx_nonzeros):
n_edit_operations[idx].append(n_eo_tmp[item])
ged_all, ged_mat, n_edit_operations = compute_geds(Gn, edit_cost_constant,
idx_nonzeros, parallel=True)
residual = np.sqrt(np.sum(np.square(np.array(ged_all) - dis_k_vec)))
residual = np.sqrt(np.sum(np.square(np.array(ged_all) - dis_k_vec)))
residual_list.append(residual)
residual_list.append(residual)
@@ -59,23 +59,105 @@ def fit_GED_to_kernel_distance(Gn, gkernel, itr_max):
# "fit" geds to distances in feature space by tuning edit costs using the
# "fit" geds to distances in feature space by tuning edit costs using the
# Least Squares Method.
# Least Squares Method.
nb_cost_mat = np.array(n_edit_operations).T
nb_cost_mat = np.array(n_edit_operations).T
edit_costs_new, residual = get _better_costs(nb_cost_mat, dis_k_vec)
edit_costs_new, residual = compute _better_costs(nb_cost_mat, dis_k_vec)
print(residual)
print('pseudo residual:', residual)
for i in range(len(edit_costs_new)):
for i in range(len(edit_costs_new)):
if edit_costs_new[i] < 0:
if edit_costs_new[i] < 0:
if edit_costs_new[i] > -1e-6 :
if edit_costs_new[i] > -1e-9 :
edit_costs_new[i] = 0
edit_costs_new[i] = 0
else:
else:
raise ValueError('The edit cost is negative.')
raise ValueError('The edit cost is negative.')
for idx, item in enumerate(idx_nonzeros):
for idx, item in enumerate(idx_nonzeros):
edit_costs[item] = edit_costs_new[idx]
edit_costs[item] = edit_costs_new[idx]
print('edit_costs:', edit_costs)
print('residual_list:', residual_list)
return edit_costs, residual_list, edit_cost_list
return edit_costs, residual_list, edit_cost_list, dis_k_mat, ged_mat
def compute_geds(Gn, edit_cost_constant, idx_nonzeros, parallel=False):
ged_mat = np.zeros((len(Gn), len(Gn)))
if parallel:
# print('parallel')
len_itr = int(len(Gn) * (len(Gn) + 1) / 2)
ged_all = [0 for i in range(len_itr)]
n_edit_operations = [[0 for i in range(len_itr)] for j in
range(len(idx_nonzeros))]
itr = combinations_with_replacement(range(0, len(Gn)), 2)
n_jobs = multiprocessing.cpu_count()
if len_itr < 100 * n_jobs:
chunksize = int(len_itr / n_jobs) + 1
else:
chunksize = 100
def init_worker(gn_toshare):
global G_gn
G_gn = gn_toshare
do_partial = partial(_wrapper_compute_ged_parallel, edit_cost_constant,
idx_nonzeros)
pool = Pool(processes=n_jobs, initializer=init_worker, initargs=(Gn,))
iterator = tqdm(pool.imap_unordered(do_partial, itr, chunksize),
desc='computing GEDs', file=sys.stdout)
# iterator = pool.imap_unordered(do_partial, itr, chunksize)
for i, j, dis, n_eo_tmp in iterator:
idx_itr = int(len(Gn) * i + j - i * (i + 1) / 2)
ged_all[idx_itr] = dis
ged_mat[i][j] = dis
ged_mat[j][i] = dis
for idx, item in enumerate(idx_nonzeros):
n_edit_operations[idx][idx_itr] = n_eo_tmp[item]
# print('\n-------------------------------------------')
# print(i, j, idx_itr, dis)
pool.close()
pool.join()
else:
ged_all = []
n_edit_operations = [[] for i in range(len(idx_nonzeros))]
for i in tqdm(range(len(Gn)), desc='computing GEDs', file=sys.stdout):
# for i in range(len(Gn)):
for j in range(i, len(Gn)):
# time0 = time.time()
dis, pi_forward, pi_backward = GED(Gn[i], Gn[j], lib='gedlibpy',
cost='CONSTANT', method='IPFP',
edit_cost_constant=edit_cost_constant, stabilizer='min',
repeat=50)
# time1 = time.time() - time0
# time0 = time.time()
ged_all.append(dis)
ged_mat[i][j] = dis
ged_mat[j][i] = dis
n_eo_tmp = get_nb_edit_operations(Gn[i], Gn[j], pi_forward, pi_backward)
for idx, item in enumerate(idx_nonzeros):
n_edit_operations[idx].append(n_eo_tmp[item])
# time2 = time.time() - time0
# print(time1, time2, time1 / time2)
return ged_all, ged_mat, n_edit_operations
def _wrapper_compute_ged_parallel(edit_cost_constant, idx_nonzeros, itr):
i = itr[0]
j = itr[1]
dis, n_eo_tmp = _compute_ged_parallel(G_gn[i], G_gn[j], edit_cost_constant,
idx_nonzeros)
return i, j, dis, n_eo_tmp
def _compute_ged_parallel(g1, g2, edit_cost_constant, idx_nonzeros):
dis, pi_forward, pi_backward = GED(g1, g2, lib='gedlibpy',
cost='CONSTANT', method='IPFP',
edit_cost_constant=edit_cost_constant, stabilizer='min',
repeat=50)
n_eo_tmp = get_nb_edit_operations(g1, g2, pi_forward, pi_backward)
return dis, n_eo_tmp
def get_better_costs(nb_cost_mat, dis_k_vec):
def compute _better_costs(nb_cost_mat, dis_k_vec):
# # method 1: simple least square method.
# # method 1: simple least square method.
# edit_costs_new, residual, _, _ = np.linalg.lstsq(nb_cost_mat, dis_k_vec,
# edit_costs_new, residual, _, _ = np.linalg.lstsq(nb_cost_mat, dis_k_vec,
# rcond=None)
# rcond=None)
@@ -92,13 +174,11 @@ def get_better_costs(nb_cost_mat, dis_k_vec):
b = 1
b = 1
x = cp.Variable(nb_cost_mat.shape[1])
x = cp.Variable(nb_cost_mat.shape[1])
prob = cp.Problem(cp.Minimize(cp.quad_form(x, P) + q_T@x),
prob = cp.Problem(cp.Minimize(cp.quad_form(x, P) + q_T@x),
[G@x <= h,
A@x == b])
[G@x <= h])
prob.solve()
prob.solve()
edit_costs_new = x.value
edit_costs_new = x.value
residual = prob.value - np.dot(dis_k_vec.T, dis_k_vec)
residual = prob.value - np.dot(dis_k_vec.T, dis_k_vec)
# p = program(minimize(norm2(nb_cost_mat*x-dis_k_vec)),[equals(sum(x),1),geq(x,0)])
return edit_costs_new, residual
return edit_costs_new, residual
@@ -111,5 +191,8 @@ if __name__ == '__main__':
remove_edges(Gn)
remove_edges(Gn)
gkernel = 'marginalizedkernel'
gkernel = 'marginalizedkernel'
itr_max = 10
itr_max = 10
edit_costs, residual_list, edit_cost_list = \
fit_GED_to_kernel_distance(Gn, gkernel, itr_max)
time0 = time.time()
edit_costs, residual_list, edit_cost_list, dis_k_mat, ged_mat = \
fit_GED_to_kernel_distance(Gn, gkernel, itr_max)
total_time = time.time() - time0
print('total time:', total_time)