You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

fitDistance.py 4.3 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115
  1. #!/usr/bin/env python3
  2. # -*- coding: utf-8 -*-
  3. """
  4. Created on Wed Oct 16 14:20:06 2019
  5. @author: ljia
  6. """
  7. import numpy as np
  8. from tqdm import tqdm
  9. from scipy import optimize
  10. import cvxpy as cp
  11. import sys
  12. sys.path.insert(0, "../")
  13. from pygraph.utils.graphfiles import loadDataset
  14. from ged import GED, get_nb_edit_operations
  15. from utils import kernel_distance_matrix
  16. def fit_GED_to_kernel_distance(Gn, gkernel, itr_max):
  17. # c_vi, c_vr, c_vs, c_ei, c_er, c_es or parts of them.
  18. edit_costs = [0.2, 0.2, 0.2, 0.2, 0.2, 0]
  19. idx_nonzeros = [i for i, item in enumerate(edit_costs) if item != 0]
  20. # compute distances in feature space.
  21. dis_k_mat, _, _, _ = kernel_distance_matrix(Gn, gkernel=gkernel)
  22. dis_k_vec = []
  23. for i in range(len(dis_k_mat)):
  24. for j in range(i, len(dis_k_mat)):
  25. dis_k_vec.append(dis_k_mat[i, j])
  26. dis_k_vec = np.array(dis_k_vec)
  27. residual_list = []
  28. edit_cost_list = []
  29. for itr in range(itr_max):
  30. print('iteration', itr)
  31. ged_all = []
  32. n_edit_operations = [[] for i in range(len(idx_nonzeros))]
  33. # compute GEDs and numbers of edit operations.
  34. edit_cost_constant = [i for i in edit_costs]
  35. edit_cost_list.append(edit_cost_constant)
  36. for i in tqdm(range(len(Gn)), desc='computing GEDs', file=sys.stdout):
  37. # for i in range(len(Gn)):
  38. for j in range(i, len(Gn)):
  39. dis, pi_forward, pi_backward = GED(Gn[i], Gn[j], lib='gedlibpy',
  40. cost='CONSTANT', method='IPFP',
  41. edit_cost_constant=edit_cost_constant, stabilizer='min',
  42. repeat=30)
  43. ged_all.append(dis)
  44. n_eo_tmp = get_nb_edit_operations(Gn[i],
  45. Gn[j], pi_forward, pi_backward)
  46. for idx, item in enumerate(idx_nonzeros):
  47. n_edit_operations[idx].append(n_eo_tmp[item])
  48. residual = np.sqrt(np.sum(np.square(np.array(ged_all) - dis_k_vec)))
  49. residual_list.append(residual)
  50. # "fit" geds to distances in feature space by tuning edit costs using the
  51. # Least Squares Method.
  52. nb_cost_mat = np.array(n_edit_operations).T
  53. edit_costs_new, residual = get_better_costs(nb_cost_mat, dis_k_vec)
  54. print(residual)
  55. for i in range(len(edit_costs_new)):
  56. if edit_costs_new[i] < 0:
  57. if edit_costs_new[i] > -1e-6:
  58. edit_costs_new[i] = 0
  59. else:
  60. raise ValueError('The edit cost is negative.')
  61. for idx, item in enumerate(idx_nonzeros):
  62. edit_costs[item] = edit_costs_new[idx]
  63. return edit_costs, residual_list, edit_cost_list
  64. def get_better_costs(nb_cost_mat, dis_k_vec):
  65. # # method 1: simple least square method.
  66. # edit_costs_new, residual, _, _ = np.linalg.lstsq(nb_cost_mat, dis_k_vec,
  67. # rcond=None)
  68. # # method 2: least square method with x_i >= 0.
  69. # edit_costs_new, residual = optimize.nnls(nb_cost_mat, dis_k_vec)
  70. # method 3: solve as a quadratic program with constraints: x_i >= 0, sum(x) = 1.
  71. P = np.dot(nb_cost_mat.T, nb_cost_mat)
  72. q_T = -2 * np.dot(dis_k_vec.T, nb_cost_mat)
  73. G = -1 * np.identity(nb_cost_mat.shape[1])
  74. h = np.array([0 for i in range(nb_cost_mat.shape[1])])
  75. A = np.array([1 for i in range(nb_cost_mat.shape[1])])
  76. b = 1
  77. x = cp.Variable(nb_cost_mat.shape[1])
  78. prob = cp.Problem(cp.Minimize(cp.quad_form(x, P) + q_T@x),
  79. [G@x <= h,
  80. A@x == b])
  81. prob.solve()
  82. edit_costs_new = x.value
  83. residual = prob.value - np.dot(dis_k_vec.T, dis_k_vec)
  84. # p = program(minimize(norm2(nb_cost_mat*x-dis_k_vec)),[equals(sum(x),1),geq(x,0)])
  85. return edit_costs_new, residual
  86. if __name__ == '__main__':
  87. from utils import remove_edges
  88. ds = {'name': 'MUTAG', 'dataset': '../datasets/MUTAG/MUTAG_A.txt',
  89. 'extra_params': {}} # node/edge symb
  90. Gn, y_all = loadDataset(ds['dataset'], extra_params=ds['extra_params'])
  91. # Gn = Gn[0:10]
  92. remove_edges(Gn)
  93. gkernel = 'marginalizedkernel'
  94. itr_max = 10
  95. edit_costs, residual_list, edit_cost_list = \
  96. fit_GED_to_kernel_distance(Gn, gkernel, itr_max)

A Python package for graph kernels, graph edit distances and graph pre-image problem.