You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

optim_costs.py 4.5 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136
  1. from ged import compute_geds
  2. from distances import sum_squares, euclid_d
  3. import numpy as np
  4. # from tqdm import tqdm
  5. import sys
  6. # sys.path.insert(0, "../")
  7. def optimize_costs_unlabeled(nb_cost_mat, dis_k_vec):
  8. """
  9. Optimize edit costs to fit dis_k_vec according to edit operations in nb_cost_mat
  10. ! take care that nb_cost_mat do not contains 0 lines
  11. :param nb_cost_mat: \in \mathbb{N}^{N x 6} encoding the number of edit operations for each pair of graph
  12. :param dis_k_vec: The N distances to fit
  13. """
  14. import cvxpy as cp
  15. import numpy as np
  16. MAX_SAMPLE = 1000
  17. nb_cost_mat_m = np.array([[x[0], x[1], x[3], x[4]] for x in nb_cost_mat])
  18. dis_k_vec = np.array(dis_k_vec)
  19. # dis_k_vec_norm = dis_k_vec/np.max(dis_k_vec)
  20. # import pickle
  21. # pickle.dump([nb_cost_mat, dis_k_vec], open('debug', 'wb'))
  22. N = nb_cost_mat_m.shape[0]
  23. sub_sample = np.random.permutation(np.arange(N))
  24. sub_sample = sub_sample[:MAX_SAMPLE]
  25. x = cp.Variable(nb_cost_mat_m.shape[1])
  26. cost = cp.sum_squares((nb_cost_mat_m[sub_sample, :] @ x) - dis_k_vec[sub_sample])
  27. prob = cp.Problem(cp.Minimize(cost), [x >= 0])
  28. prob.solve()
  29. edit_costs_new = [x.value[0], x.value[1], 0, x.value[2], x.value[3], 0]
  30. edit_costs_new = [xi if xi > 0 else 0 for xi in edit_costs_new]
  31. residual = prob.value
  32. return edit_costs_new, residual
  33. def optimize_costs_classif_unlabeled(nb_cost_mat, Y):
  34. """
  35. Optimize edit costs to fit dis_k_vec according to edit operations in
  36. nb_cost_mat
  37. ! take care that nb_cost_mat do not contains 0 lines
  38. :param nb_cost_mat: \in \mathbb{N}^{N x 6} encoding the number of edit
  39. operations for each pair of graph
  40. :param dis_k_vec: {-1,1}^N vector of common classes
  41. """
  42. # import cvxpy as cp
  43. from ml import reg_log
  44. # import pickle
  45. # pickle.dump([nb_cost_mat, Y], open('debug', 'wb'))
  46. nb_cost_mat_m = np.array([[x[0], x[1], x[3], x[4]]
  47. for x in nb_cost_mat])
  48. w, J, _ = reg_log(nb_cost_mat_m, Y, pos_contraint=True)
  49. edit_costs_new = [w[0], w[1], 0, w[2], w[3], 0]
  50. residual = J[-1]
  51. return edit_costs_new, residual
  52. def optimize_costs_classif(nb_cost_mat, Y):
  53. """
  54. Optimize edit costs to fit dis_k_vec according to edit operations in nb_cost_mat
  55. ! take care that nb_cost_mat do not contains 0 lines
  56. :param nb_cost_mat: \in \mathbb{N}^{N x 6} encoding the number of edit operations for each pair of graph
  57. :param dis_k_vec: {-1,1}^N vector of common classes
  58. """
  59. #import pickle
  60. # pickle.dump([nb_cost_mat, Y], open("test.pickle", "wb"))
  61. from ml import reg_log
  62. w, J, _ = reg_log(nb_cost_mat, Y, pos_contraint=True)
  63. return w, J[-1]
  64. def optimize_costs(nb_cost_mat, dis_k_vec):
  65. """
  66. Optimize edit costs to fit dis_k_vec according to edit operations in nb_cost_mat
  67. ! take care that nb_cost_mat do not contains 0 lines
  68. :param nb_cost_mat: \in \mathbb{N}^{N x 6} encoding the number of edit operations for each pair of graph
  69. :param dis_k_vec: The N distances to fit
  70. """
  71. import cvxpy as cp
  72. x = cp.Variable(nb_cost_mat.shape[1])
  73. cost = cp.sum_squares((nb_cost_mat @ x) - dis_k_vec)
  74. constraints = [x >= [0.01 for i in range(nb_cost_mat.shape[1])],
  75. np.array([1.0, 1.0, -1.0, 0.0, 0.0, 0.0]).T@x >= 0.0,
  76. np.array([0.0, 0.0, 0.0, 1.0, 1.0, -1.0]).T@x >= 0.0]
  77. prob = cp.Problem(cp.Minimize(cost), constraints)
  78. prob.solve()
  79. edit_costs_new = x.value
  80. residual = prob.value
  81. return edit_costs_new, residual
  82. def compute_optimal_costs(G, y, init_costs=[3, 3, 1, 3, 3, 1],
  83. y_distance=euclid_d,
  84. mode='reg', unlabeled=False,
  85. ed_method='BIPARTITE',
  86. **kwargs):
  87. N = len(y)
  88. G_pairs = []
  89. distances_vec = []
  90. for i in range(N):
  91. for j in range(i+1, N):
  92. G_pairs.append([i, j])
  93. distances_vec.append(y_distance(y[i], y[j]))
  94. ged_vec_init, n_edit_operations = compute_geds(G_pairs, G, init_costs, ed_method, **kwargs)
  95. residual_list = [sum_squares(ged_vec_init, distances_vec)]
  96. if (mode == 'reg'):
  97. if unlabeled:
  98. method_optim = optimize_costs_unlabeled
  99. else:
  100. method_optim = optimize_costs
  101. elif (mode == 'classif'):
  102. if unlabeled:
  103. method_optim = optimize_costs_classif_unlabeled
  104. else:
  105. method_optim = optimize_costs_classif
  106. ite_max = 5
  107. for i in range(ite_max):
  108. print('ite', i + 1, '/', ite_max, ':')
  109. # compute GEDs and numbers of edit operations.
  110. edit_costs_new, residual = method_optim(
  111. np.array(n_edit_operations), distances_vec)
  112. ged_vec, n_edit_operations = compute_geds(G_pairs, G, edit_costs_new, ed_method, **kwargs)
  113. residual_list.append(sum_squares(ged_vec, distances_vec))
  114. return edit_costs_new

A Python package for graph kernels, graph edit distances and graph pre-image problem.