You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

fitDistance.py 3.6 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103
  1. #!/usr/bin/env python3
  2. # -*- coding: utf-8 -*-
  3. """
  4. Created on Wed Oct 16 14:20:06 2019
  5. @author: ljia
  6. """
  7. import numpy as np
  8. from tqdm import tqdm
  9. import sys
  10. sys.path.insert(0, "../")
  11. from pygraph.utils.graphfiles import loadDataset
  12. from ged import GED, get_nb_edit_operations
  13. from utils import kernel_distance_matrix
  14. def fit_GED_to_kernel_distance(Gn, gkernel, itr_max):
  15. c_vi = 1
  16. c_vr = 1
  17. c_vs = 1
  18. c_ei = 1
  19. c_er = 1
  20. c_es = 1
  21. # compute distances in feature space.
  22. dis_k_mat, _, _, _ = kernel_distance_matrix(Gn, gkernel=gkernel)
  23. dis_k_vec = []
  24. for i in range(len(dis_k_mat)):
  25. for j in range(i, len(dis_k_mat)):
  26. dis_k_vec.append(dis_k_mat[i, j])
  27. dis_k_vec = np.array(dis_k_vec)
  28. residual_list = []
  29. edit_cost_list = []
  30. for itr in range(itr_max):
  31. print('iteration', itr)
  32. ged_all = []
  33. n_vi_all = []
  34. n_vr_all = []
  35. n_vs_all = []
  36. n_ei_all = []
  37. n_er_all = []
  38. n_es_all = []
  39. # compute GEDs and numbers of edit operations.
  40. edit_cost_constant = [c_vi, c_vr, c_vs, c_ei, c_er, c_es]
  41. edit_cost_list.append(edit_cost_constant)
  42. for i in tqdm(range(len(Gn)), desc='computing GEDs', file=sys.stdout):
  43. # for i in range(len(Gn)):
  44. for j in range(i, len(Gn)):
  45. dis, pi_forward, pi_backward = GED(Gn[i], Gn[j], lib='gedlibpy',
  46. cost='CONSTANT', method='IPFP',
  47. edit_cost_constant=edit_cost_constant, stabilizer='min',
  48. repeat=30)
  49. ged_all.append(dis)
  50. n_vi, n_vr, n_vs, n_ei, n_er, n_es = get_nb_edit_operations(Gn[i],
  51. Gn[j], pi_forward, pi_backward)
  52. n_vi_all.append(n_vi)
  53. n_vr_all.append(n_vr)
  54. n_vs_all.append(n_vs)
  55. n_ei_all.append(n_ei)
  56. n_er_all.append(n_er)
  57. n_es_all.append(n_es)
  58. residual = np.sqrt(np.sum(np.square(np.array(ged_all) - dis_k_vec)))
  59. residual_list.append(residual)
  60. # "fit" geds to distances in feature space by tuning edit costs using the
  61. # Least Squares Method.
  62. nb_cost_mat = np.column_stack((np.array(n_vi_all), np.array(n_vr_all),
  63. np.array(n_vs_all), np.array(n_ei_all),
  64. np.array(n_er_all), np.array(n_es_all)))
  65. edit_costs, residual, _, _ = np.linalg.lstsq(nb_cost_mat, dis_k_vec,
  66. rcond=None)
  67. for i in range(len(edit_costs)):
  68. if edit_costs[i] < 0:
  69. if edit_costs[i] > -1e-3:
  70. edit_costs[i] = 0
  71. # else:
  72. # raise ValueError('The edit cost is negative.')
  73. c_vi = edit_costs[0]
  74. c_vr = edit_costs[1]
  75. c_vs = edit_costs[2]
  76. c_ei = edit_costs[3]
  77. c_er = edit_costs[4]
  78. c_es = edit_costs[5]
  79. return c_vi, c_vr, c_vs, c_ei, c_er, c_es, residual_list, edit_cost_list
  80. if __name__ == '__main__':
  81. from utils import remove_edges
  82. ds = {'name': 'MUTAG', 'dataset': '../datasets/MUTAG/MUTAG_A.txt',
  83. 'extra_params': {}} # node/edge symb
  84. Gn, y_all = loadDataset(ds['dataset'], extra_params=ds['extra_params'])
  85. Gn = Gn[0:10]
  86. remove_edges(Gn)
  87. gkernel = 'marginalizedkernel'
  88. itr_max = 10
  89. c_vi, c_vr, c_vs, c_ei, c_er, c_es, residual_list, edit_cost_list = \
  90. fit_GED_to_kernel_distance(Gn, gkernel, itr_max)

A Python package for graph kernels, graph edit distances and graph pre-image problem.