You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

optim_costs.py 4.9 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149
  1. import numpy as np
  2. from gklearn.ged.model.distances import sum_squares, euclid_d
  3. from gklearn.ged.model.ged_com import compute_geds
  4. def optimize_costs_unlabeled(nb_cost_mat, dis_k_vec):
  5. """
  6. Optimize edit costs to fit dis_k_vec according to edit operations in nb_cost_mat
  7. ! take care that nb_cost_mat do not contains 0 lines
  8. :param nb_cost_mat: \in \mathbb{N}^{N x 6} encoding the number of edit operations for each pair of graph
  9. :param dis_k_vec: The N distances to fit
  10. """
  11. import cvxpy as cp
  12. import numpy as np
  13. MAX_SAMPLE = 1000
  14. nb_cost_mat_m = np.array([[x[0], x[1], x[3], x[4]] for x in nb_cost_mat])
  15. dis_k_vec = np.array(dis_k_vec)
  16. # dis_k_vec_norm = dis_k_vec/np.max(dis_k_vec)
  17. # import pickle
  18. # pickle.dump([nb_cost_mat, dis_k_vec], open('debug', 'wb'))
  19. N = nb_cost_mat_m.shape[0]
  20. sub_sample = np.random.permutation(np.arange(N))
  21. sub_sample = sub_sample[:MAX_SAMPLE]
  22. x = cp.Variable(nb_cost_mat_m.shape[1])
  23. cost = cp.sum_squares((nb_cost_mat_m[sub_sample, :] @ x) - dis_k_vec[sub_sample])
  24. prob = cp.Problem(cp.Minimize(cost), [x >= 0])
  25. prob.solve()
  26. edit_costs_new = [x.value[0], x.value[1], 0, x.value[2], x.value[3], 0]
  27. edit_costs_new = [xi if xi > 0 else 0 for xi in edit_costs_new]
  28. residual = prob.value
  29. return edit_costs_new, residual
  30. def optimize_costs_classif_unlabeled(nb_cost_mat, Y):
  31. """
  32. Optimize edit costs to fit dis_k_vec according to edit operations in
  33. nb_cost_mat
  34. ! take care that nb_cost_mat do not contains 0 lines
  35. :param nb_cost_mat: \in \mathbb{N}^{N x 6} encoding the number of edit
  36. operations for each pair of graph
  37. :param dis_k_vec: {-1,1}^N vector of common classes
  38. """
  39. # import cvxpy as cp
  40. from ml import reg_log
  41. # import pickle
  42. # pickle.dump([nb_cost_mat, Y], open('debug', 'wb'))
  43. nb_cost_mat_m = np.array([[x[0], x[1], x[3], x[4]]
  44. for x in nb_cost_mat])
  45. w, J, _ = reg_log(nb_cost_mat_m, Y, pos_contraint=True)
  46. edit_costs_new = [w[0], w[1], 0, w[2], w[3], 0]
  47. residual = J[-1]
  48. return edit_costs_new, residual
  49. def optimize_costs_classif(nb_cost_mat, Y):
  50. """
  51. Optimize edit costs to fit dis_k_vec according to edit operations in nb_cost_mat
  52. ! take care that nb_cost_mat do not contains 0 lines
  53. :param nb_cost_mat: \in \mathbb{N}^{N x 6} encoding the number of edit operations for each pair of graph
  54. :param dis_k_vec: {-1,1}^N vector of common classes
  55. """
  56. #import pickle
  57. # pickle.dump([nb_cost_mat, Y], open("test.pickle", "wb"))
  58. from ml import reg_log
  59. w, J, _ = reg_log(nb_cost_mat, Y, pos_contraint=True)
  60. return w, J[-1]
  61. def optimize_costs(nb_cost_mat, dis_k_vec):
  62. """
  63. Optimize edit costs to fit dis_k_vec according to edit operations in nb_cost_mat
  64. ! take care that nb_cost_mat do not contains 0 lines
  65. :param nb_cost_mat: \in \mathbb{N}^{N x 6} encoding the number of edit operations for each pair of graph
  66. :param dis_k_vec: The N distances to fit
  67. """
  68. import cvxpy as cp
  69. x = cp.Variable(nb_cost_mat.shape[1])
  70. cost = cp.sum_squares((nb_cost_mat @ x) - dis_k_vec)
  71. constraints = [x >= [0.01 for i in range(nb_cost_mat.shape[1])],
  72. np.array([1.0, 1.0, -1.0, 0.0, 0.0, 0.0]).T@x >= 0.0,
  73. np.array([0.0, 0.0, 0.0, 1.0, 1.0, -1.0]).T@x >= 0.0]
  74. prob = cp.Problem(cp.Minimize(cost), constraints)
  75. prob.solve()
  76. edit_costs_new = x.value
  77. residual = prob.value
  78. return edit_costs_new, residual
  79. def compute_optimal_costs(G, y, init_costs=[3, 3, 1, 3, 3, 1],
  80. y_distance=euclid_d,
  81. mode='reg', unlabeled=False,
  82. ed_method='BIPARTITE',
  83. verbose=True,
  84. **kwargs):
  85. N = len(y)
  86. G_pairs = []
  87. distances_vec = []
  88. for i in range(N):
  89. for j in range(i+1, N):
  90. G_pairs.append([i, j])
  91. distances_vec.append(y_distance(y[i], y[j]))
  92. ged_vec_init, n_edit_operations = compute_geds(G_pairs, G, init_costs, ed_method,
  93. verbose=verbose, **kwargs)
  94. residual_list = [sum_squares(ged_vec_init, distances_vec)]
  95. if (mode == 'reg'):
  96. if unlabeled:
  97. method_optim = optimize_costs_unlabeled
  98. else:
  99. method_optim = optimize_costs
  100. elif (mode == 'classif'):
  101. if unlabeled:
  102. method_optim = optimize_costs_classif_unlabeled
  103. else:
  104. method_optim = optimize_costs_classif
  105. ite_max = 5
  106. for i in range(ite_max):
  107. if verbose:
  108. print('ite', i + 1, '/', ite_max, ':')
  109. # compute GEDs and numbers of edit operations.
  110. edit_costs_new, residual = method_optim(
  111. np.array(n_edit_operations), distances_vec)
  112. ged_vec, n_edit_operations = compute_geds(G_pairs, G, edit_costs_new, ed_method,
  113. verbose=verbose, **kwargs)
  114. residual_list.append(sum_squares(ged_vec, distances_vec))
  115. return edit_costs_new
  116. def get_optimal_costs_GH2020(**kwargs):
  117. import pickle
  118. import os
  119. dir_root = 'cj/output/'
  120. ds_name = kwargs.get('ds_name')
  121. nb_trial = kwargs.get('nb_trial')
  122. file_name = os.path.join(dir_root, 'costs.' + ds_name + '.' + str(nb_trial) + '.pkl')
  123. with open(file_name, 'rb') as f:
  124. edit_costs = pickle.load(f)
  125. return edit_costs

A Python package for graph kernels, graph edit distances and graph pre-image problem.