|
- #!/usr/bin/env python3
- # -*- coding: utf-8 -*-
- """
- Created on Wed Oct 16 14:20:06 2019
-
- @author: ljia
- """
- import numpy as np
- from tqdm import tqdm
-
- import sys
- sys.path.insert(0, "../")
- from pygraph.utils.graphfiles import loadDataset
- from ged import GED, get_nb_edit_operations
- from utils import kernel_distance_matrix
-
- def fit_GED_to_kernel_distance(Gn, gkernel, itr_max):
- c_vi = 1
- c_vr = 1
- c_vs = 1
- c_ei = 1
- c_er = 1
- c_es = 1
-
- # compute distances in feature space.
- dis_k_mat, _, _, _ = kernel_distance_matrix(Gn, gkernel=gkernel)
- dis_k_vec = []
- for i in range(len(dis_k_mat)):
- for j in range(i, len(dis_k_mat)):
- dis_k_vec.append(dis_k_mat[i, j])
- dis_k_vec = np.array(dis_k_vec)
-
- residual_list = []
- edit_cost_list = []
-
- for itr in range(itr_max):
- print('iteration', itr)
- ged_all = []
- n_vi_all = []
- n_vr_all = []
- n_vs_all = []
- n_ei_all = []
- n_er_all = []
- n_es_all = []
- # compute GEDs and numbers of edit operations.
- edit_cost_constant = [c_vi, c_vr, c_vs, c_ei, c_er, c_es]
- edit_cost_list.append(edit_cost_constant)
- for i in tqdm(range(len(Gn)), desc='computing GEDs', file=sys.stdout):
- # for i in range(len(Gn)):
- for j in range(i, len(Gn)):
- dis, pi_forward, pi_backward = GED(Gn[i], Gn[j], lib='gedlibpy',
- cost='CONSTANT', method='IPFP',
- edit_cost_constant=edit_cost_constant, stabilizer='min',
- repeat=30)
- ged_all.append(dis)
- n_vi, n_vr, n_vs, n_ei, n_er, n_es = get_nb_edit_operations(Gn[i],
- Gn[j], pi_forward, pi_backward)
- n_vi_all.append(n_vi)
- n_vr_all.append(n_vr)
- n_vs_all.append(n_vs)
- n_ei_all.append(n_ei)
- n_er_all.append(n_er)
- n_es_all.append(n_es)
-
- residual = np.sqrt(np.sum(np.square(np.array(ged_all) - dis_k_vec)))
- residual_list.append(residual)
-
- # "fit" geds to distances in feature space by tuning edit costs using the
- # Least Squares Method.
- nb_cost_mat = np.column_stack((np.array(n_vi_all), np.array(n_vr_all),
- np.array(n_vs_all), np.array(n_ei_all),
- np.array(n_er_all), np.array(n_es_all)))
- edit_costs, residual, _, _ = np.linalg.lstsq(nb_cost_mat, dis_k_vec,
- rcond=None)
- for i in range(len(edit_costs)):
- if edit_costs[i] < 0:
- if edit_costs[i] > -1e-3:
- edit_costs[i] = 0
- # else:
- # raise ValueError('The edit cost is negative.')
-
- c_vi = edit_costs[0]
- c_vr = edit_costs[1]
- c_vs = edit_costs[2]
- c_ei = edit_costs[3]
- c_er = edit_costs[4]
- c_es = edit_costs[5]
-
- return c_vi, c_vr, c_vs, c_ei, c_er, c_es, residual_list, edit_cost_list
-
-
-
- if __name__ == '__main__':
- from utils import remove_edges
- ds = {'name': 'MUTAG', 'dataset': '../datasets/MUTAG/MUTAG_A.txt',
- 'extra_params': {}} # node/edge symb
- Gn, y_all = loadDataset(ds['dataset'], extra_params=ds['extra_params'])
- Gn = Gn[0:10]
- remove_edges(Gn)
- gkernel = 'marginalizedkernel'
- itr_max = 10
- c_vi, c_vr, c_vs, c_ei, c_er, c_es, residual_list, edit_cost_list = \
- fit_GED_to_kernel_distance(Gn, gkernel, itr_max)
|