You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

fitDistance.py 21 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430
  1. #!/usr/bin/env python3
  2. # -*- coding: utf-8 -*-
  3. """
  4. Created on Wed Oct 16 14:20:06 2019
  5. @author: ljia
  6. """
  7. import numpy as np
  8. from tqdm import tqdm
  9. from itertools import combinations_with_replacement, combinations
  10. import multiprocessing
  11. from multiprocessing import Pool
  12. from functools import partial
  13. import time
  14. import random
  15. import sys
  16. from scipy import optimize
  17. from scipy.optimize import minimize
  18. import cvxpy as cp
  19. from gklearn.preimage.ged import GED, get_nb_edit_operations, get_nb_edit_operations_letter, get_nb_edit_operations_nonsymbolic
  20. from gklearn.preimage.utils import kernel_distance_matrix
  21. def fit_GED_to_kernel_distance(Gn, node_label, edge_label, gkernel, itr_max,
  22. params_ged={'lib': 'gedlibpy', 'cost': 'CONSTANT',
  23. 'method': 'IPFP', 'stabilizer': None},
  24. init_costs=[3, 3, 1, 3, 3, 1],
  25. dataset='monoterpenoides', Kmatrix=None,
  26. parallel=True):
  27. # dataset = dataset.lower()
  28. # c_vi, c_vr, c_vs, c_ei, c_er, c_es or parts of them.
  29. # random.seed(1)
  30. # cost_rdm = random.sample(range(1, 10), 6)
  31. # init_costs = cost_rdm + [0]
  32. # init_costs = cost_rdm
  33. # init_costs = [3, 3, 1, 3, 3, 1]
  34. # init_costs = [i * 0.01 for i in cost_rdm] + [0]
  35. # init_costs = [0.2, 0.2, 0.2, 0.2, 0.2, 0]
  36. # init_costs = [0, 0, 0.9544, 0.026, 0.0196, 0]
  37. # init_costs = [0.008429912251810438, 0.025461055985319694, 0.2047320869225948, 0.004148727085832133, 0.0, 0]
  38. # idx_cost_nonzeros = [i for i, item in enumerate(edit_costs) if item != 0]
  39. # compute distances in feature space.
  40. dis_k_mat, _, _, _ = kernel_distance_matrix(Gn, node_label, edge_label,
  41. Kmatrix=Kmatrix, gkernel=gkernel)
  42. dis_k_vec = []
  43. for i in range(len(dis_k_mat)):
  44. # for j in range(i, len(dis_k_mat)):
  45. for j in range(i + 1, len(dis_k_mat)):
  46. dis_k_vec.append(dis_k_mat[i, j])
  47. dis_k_vec = np.array(dis_k_vec)
  48. # init ged.
  49. print('\ninitial:')
  50. time0 = time.time()
  51. params_ged['dataset'] = dataset
  52. params_ged['edit_cost_constant'] = init_costs
  53. ged_vec_init, ged_mat, n_edit_operations = compute_geds(Gn, params_ged,
  54. parallel=parallel)
  55. residual_list = [np.sqrt(np.sum(np.square(np.array(ged_vec_init) - dis_k_vec)))]
  56. time_list = [time.time() - time0]
  57. edit_cost_list = [init_costs]
  58. nb_cost_mat = np.array(n_edit_operations)
  59. nb_cost_mat_list = [nb_cost_mat]
  60. print('edit_costs:', init_costs)
  61. print('residual_list:', residual_list)
  62. for itr in range(itr_max):
  63. print('\niteration', itr)
  64. time0 = time.time()
  65. # "fit" geds to distances in feature space by tuning edit costs using the
  66. # Least Squares Method.
  67. np.savez('results/xp_fit_method/fit_data_debug' + str(itr) + '.gm',
  68. nb_cost_mat=nb_cost_mat, dis_k_vec=dis_k_vec,
  69. n_edit_operations=n_edit_operations, ged_vec_init=ged_vec_init,
  70. ged_mat=ged_mat)
  71. edit_costs_new, residual = update_costs(nb_cost_mat, dis_k_vec,
  72. dataset=dataset, cost=params_ged['cost'])
  73. for i in range(len(edit_costs_new)):
  74. if -1e-9 <= edit_costs_new[i] <= 1e-9:
  75. edit_costs_new[i] = 0
  76. if edit_costs_new[i] < 0:
  77. raise ValueError('The edit cost is negative.')
  78. # for i in range(len(edit_costs_new)):
  79. # if edit_costs_new[i] < 0:
  80. # edit_costs_new[i] = 0
  81. # compute new GEDs and numbers of edit operations.
  82. params_ged['edit_cost_constant'] = edit_costs_new # np.array([edit_costs_new[0], edit_costs_new[1], 0.75])
  83. ged_vec, ged_mat, n_edit_operations = compute_geds(Gn, params_ged,
  84. parallel=parallel)
  85. residual_list.append(np.sqrt(np.sum(np.square(np.array(ged_vec) - dis_k_vec))))
  86. time_list.append(time.time() - time0)
  87. edit_cost_list.append(edit_costs_new)
  88. nb_cost_mat = np.array(n_edit_operations)
  89. nb_cost_mat_list.append(nb_cost_mat)
  90. print('edit_costs:', edit_costs_new)
  91. print('residual_list:', residual_list)
  92. return edit_costs_new, residual_list, edit_cost_list, dis_k_mat, ged_mat, \
  93. time_list, nb_cost_mat_list
  94. def compute_geds(Gn, params_ged, parallel=False):
  95. edit_cost_name = params_ged['cost']
  96. if edit_cost_name == 'LETTER' or edit_cost_name == 'LETTER2':
  97. get_nb_eo = get_nb_edit_operations_letter
  98. elif edit_cost_name == 'NON_SYMBOLIC':
  99. get_nb_eo = get_nb_edit_operations_nonsymbolic
  100. else:
  101. get_nb_eo = get_nb_edit_operations
  102. ged_mat = np.zeros((len(Gn), len(Gn)))
  103. if parallel:
  104. # print('parallel')
  105. # len_itr = int(len(Gn) * (len(Gn) + 1) / 2)
  106. len_itr = int(len(Gn) * (len(Gn) - 1) / 2)
  107. ged_vec = [0 for i in range(len_itr)]
  108. n_edit_operations = [0 for i in range(len_itr)]
  109. # itr = combinations_with_replacement(range(0, len(Gn)), 2)
  110. itr = combinations(range(0, len(Gn)), 2)
  111. n_jobs = multiprocessing.cpu_count()
  112. if len_itr < 100 * n_jobs:
  113. chunksize = int(len_itr / n_jobs) + 1
  114. else:
  115. chunksize = 100
  116. def init_worker(gn_toshare):
  117. global G_gn
  118. G_gn = gn_toshare
  119. do_partial = partial(_wrapper_compute_ged_parallel, params_ged, get_nb_eo)
  120. pool = Pool(processes=n_jobs, initializer=init_worker, initargs=(Gn,))
  121. iterator = tqdm(pool.imap_unordered(do_partial, itr, chunksize),
  122. desc='computing GEDs', file=sys.stdout)
  123. # iterator = pool.imap_unordered(do_partial, itr, chunksize)
  124. for i, j, dis, n_eo_tmp in iterator:
  125. idx_itr = int(len(Gn) * i + j - (i + 1) * (i + 2) / 2)
  126. ged_vec[idx_itr] = dis
  127. ged_mat[i][j] = dis
  128. ged_mat[j][i] = dis
  129. n_edit_operations[idx_itr] = n_eo_tmp
  130. # print('\n-------------------------------------------')
  131. # print(i, j, idx_itr, dis)
  132. pool.close()
  133. pool.join()
  134. else:
  135. ged_vec = []
  136. n_edit_operations = []
  137. for i in tqdm(range(len(Gn)), desc='computing GEDs', file=sys.stdout):
  138. # for i in range(len(Gn)):
  139. for j in range(i + 1, len(Gn)):
  140. dis, pi_forward, pi_backward = GED(Gn[i], Gn[j], **params_ged)
  141. ged_vec.append(dis)
  142. ged_mat[i][j] = dis
  143. ged_mat[j][i] = dis
  144. n_eo_tmp = get_nb_eo(Gn[i], Gn[j], pi_forward, pi_backward)
  145. n_edit_operations.append(n_eo_tmp)
  146. return ged_vec, ged_mat, n_edit_operations
  147. def _wrapper_compute_ged_parallel(params_ged, get_nb_eo, itr):
  148. i = itr[0]
  149. j = itr[1]
  150. dis, n_eo_tmp = _compute_ged_parallel(G_gn[i], G_gn[j], params_ged, get_nb_eo)
  151. return i, j, dis, n_eo_tmp
  152. def _compute_ged_parallel(g1, g2, params_ged, get_nb_eo):
  153. dis, pi_forward, pi_backward = GED(g1, g2, **params_ged)
  154. n_eo_tmp = get_nb_eo(g1, g2, pi_forward, pi_backward) # [0,0,0,0,0,0]
  155. return dis, n_eo_tmp
  156. def update_costs(nb_cost_mat, dis_k_vec, dataset='monoterpenoides',
  157. cost='CONSTANT', rw_constraints='inequality'):
  158. # if dataset == 'Letter-high':
  159. if cost == 'LETTER':
  160. pass
  161. # # method 1: set alpha automatically, just tune c_vir and c_eir by
  162. # # LMS using cvxpy.
  163. # alpha = 0.5
  164. # coeff = 100 # np.max(alpha * nb_cost_mat[:,4] / dis_k_vec)
  165. ## if np.count_nonzero(nb_cost_mat[:,4]) == 0:
  166. ## alpha = 0.75
  167. ## else:
  168. ## alpha = np.min([dis_k_vec / c_vs for c_vs in nb_cost_mat[:,4] if c_vs != 0])
  169. ## alpha = alpha * 0.99
  170. # param_vir = alpha * (nb_cost_mat[:,0] + nb_cost_mat[:,1])
  171. # param_eir = (1 - alpha) * (nb_cost_mat[:,4] + nb_cost_mat[:,5])
  172. # nb_cost_mat_new = np.column_stack((param_vir, param_eir))
  173. # dis_new = coeff * dis_k_vec - alpha * nb_cost_mat[:,3]
  174. #
  175. # x = cp.Variable(nb_cost_mat_new.shape[1])
  176. # cost = cp.sum_squares(nb_cost_mat_new * x - dis_new)
  177. # constraints = [x >= [0.0 for i in range(nb_cost_mat_new.shape[1])]]
  178. # prob = cp.Problem(cp.Minimize(cost), constraints)
  179. # prob.solve()
  180. # edit_costs_new = x.value
  181. # edit_costs_new = np.array([edit_costs_new[0], edit_costs_new[1], alpha])
  182. # residual = np.sqrt(prob.value)
  183. # # method 2: tune c_vir, c_eir and alpha by nonlinear programming by
  184. # # scipy.optimize.minimize.
  185. # w0 = nb_cost_mat[:,0] + nb_cost_mat[:,1]
  186. # w1 = nb_cost_mat[:,4] + nb_cost_mat[:,5]
  187. # w2 = nb_cost_mat[:,3]
  188. # w3 = dis_k_vec
  189. # func_min = lambda x: np.sum((w0 * x[0] * x[3] + w1 * x[1] * (1 - x[2]) \
  190. # + w2 * x[2] - w3 * x[3]) ** 2)
  191. # bounds = ((0, None), (0., None), (0.5, 0.5), (0, None))
  192. # res = minimize(func_min, [0.9, 1.7, 0.75, 10], bounds=bounds)
  193. # edit_costs_new = res.x[0:3]
  194. # residual = res.fun
  195. # method 3: tune c_vir, c_eir and alpha by nonlinear programming using cvxpy.
  196. # # method 4: tune c_vir, c_eir and alpha by QP function
  197. # # scipy.optimize.least_squares. An initial guess is required.
  198. # w0 = nb_cost_mat[:,0] + nb_cost_mat[:,1]
  199. # w1 = nb_cost_mat[:,4] + nb_cost_mat[:,5]
  200. # w2 = nb_cost_mat[:,3]
  201. # w3 = dis_k_vec
  202. # func = lambda x: (w0 * x[0] * x[3] + w1 * x[1] * (1 - x[2]) \
  203. # + w2 * x[2] - w3 * x[3]) ** 2
  204. # res = optimize.root(func, [0.9, 1.7, 0.75, 100])
  205. # edit_costs_new = res.x
  206. # residual = None
  207. elif cost == 'LETTER2':
  208. # # 1. if c_vi != c_vr, c_ei != c_er.
  209. # nb_cost_mat_new = nb_cost_mat[:,[0,1,3,4,5]]
  210. # x = cp.Variable(nb_cost_mat_new.shape[1])
  211. # cost_fun = cp.sum_squares(nb_cost_mat_new * x - dis_k_vec)
  212. ## # 1.1 no constraints.
  213. ## constraints = [x >= [0.0 for i in range(nb_cost_mat_new.shape[1])]]
  214. # # 1.2 c_vs <= c_vi + c_vr.
  215. # constraints = [x >= [0.0 for i in range(nb_cost_mat_new.shape[1])],
  216. # np.array([1.0, 1.0, -1.0, 0.0, 0.0]).T@x >= 0.0]
  217. ## # 2. if c_vi == c_vr, c_ei == c_er.
  218. ## nb_cost_mat_new = nb_cost_mat[:,[0,3,4]]
  219. ## nb_cost_mat_new[:,0] += nb_cost_mat[:,1]
  220. ## nb_cost_mat_new[:,2] += nb_cost_mat[:,5]
  221. ## x = cp.Variable(nb_cost_mat_new.shape[1])
  222. ## cost_fun = cp.sum_squares(nb_cost_mat_new * x - dis_k_vec)
  223. ## # 2.1 no constraints.
  224. ## constraints = [x >= [0.0 for i in range(nb_cost_mat_new.shape[1])]]
  225. ### # 2.2 c_vs <= c_vi + c_vr.
  226. ### constraints = [x >= [0.0 for i in range(nb_cost_mat_new.shape[1])],
  227. ### np.array([2.0, -1.0, 0.0]).T@x >= 0.0]
  228. #
  229. # prob = cp.Problem(cp.Minimize(cost_fun), constraints)
  230. # prob.solve()
  231. # edit_costs_new = [x.value[0], x.value[0], x.value[1], x.value[2], x.value[2]]
  232. # edit_costs_new = np.array(edit_costs_new)
  233. # residual = np.sqrt(prob.value)
  234. if rw_constraints == 'inequality':
  235. # c_vs <= c_vi + c_vr.
  236. nb_cost_mat_new = nb_cost_mat[:,[0,1,3,4,5]]
  237. x = cp.Variable(nb_cost_mat_new.shape[1])
  238. cost_fun = cp.sum_squares(nb_cost_mat_new * x - dis_k_vec)
  239. constraints = [x >= [0.001 for i in range(nb_cost_mat_new.shape[1])],
  240. np.array([1.0, 1.0, -1.0, 0.0, 0.0]).T@x >= 0.0]
  241. prob = cp.Problem(cp.Minimize(cost_fun), constraints)
  242. try:
  243. prob.solve(verbose=True)
  244. except MemoryError as error0:
  245. print('\nUsing solver "OSQP" caused a memory error.')
  246. print('the original error message is\n', error0)
  247. print('solver status: ', prob.status)
  248. print('trying solver "CVXOPT" instead...\n')
  249. try:
  250. prob.solve(solver=cp.CVXOPT, verbose=True)
  251. except Exception as error1:
  252. print('\nAn error occured when using solver "CVXOPT".')
  253. print('the original error message is\n', error1)
  254. print('solver status: ', prob.status)
  255. print('trying solver "MOSEK" instead. Notice this solver is commercial and a lisence is required.\n')
  256. prob.solve(solver=cp.MOSEK, verbose=True)
  257. else:
  258. print('solver status: ', prob.status)
  259. else:
  260. print('solver status: ', prob.status)
  261. print()
  262. edit_costs_new = x.value
  263. residual = np.sqrt(prob.value)
  264. elif rw_constraints == '2constraints':
  265. # c_vs <= c_vi + c_vr and c_vi == c_vr, c_ei == c_er.
  266. nb_cost_mat_new = nb_cost_mat[:,[0,1,3,4,5]]
  267. x = cp.Variable(nb_cost_mat_new.shape[1])
  268. cost_fun = cp.sum_squares(nb_cost_mat_new * x - dis_k_vec)
  269. constraints = [x >= [0.01 for i in range(nb_cost_mat_new.shape[1])],
  270. np.array([1.0, 1.0, -1.0, 0.0, 0.0]).T@x >= 0.0,
  271. np.array([1.0, -1.0, 0.0, 0.0, 0.0]).T@x == 0.0,
  272. np.array([0.0, 0.0, 0.0, 1.0, -1.0]).T@x == 0.0]
  273. prob = cp.Problem(cp.Minimize(cost_fun), constraints)
  274. prob.solve()
  275. edit_costs_new = x.value
  276. residual = np.sqrt(prob.value)
  277. elif rw_constraints == 'no-constraint':
  278. # no constraint.
  279. nb_cost_mat_new = nb_cost_mat[:,[0,1,3,4,5]]
  280. x = cp.Variable(nb_cost_mat_new.shape[1])
  281. cost_fun = cp.sum_squares(nb_cost_mat_new * x - dis_k_vec)
  282. constraints = [x >= [0.01 for i in range(nb_cost_mat_new.shape[1])]]
  283. prob = cp.Problem(cp.Minimize(cost_fun), constraints)
  284. prob.solve()
  285. edit_costs_new = x.value
  286. residual = np.sqrt(prob.value)
  287. # elif method == 'inequality_modified':
  288. # # c_vs <= c_vi + c_vr.
  289. # nb_cost_mat_new = nb_cost_mat[:,[0,1,3,4,5]]
  290. # x = cp.Variable(nb_cost_mat_new.shape[1])
  291. # cost_fun = cp.sum_squares(nb_cost_mat_new * x - dis_k_vec)
  292. # constraints = [x >= [0.0 for i in range(nb_cost_mat_new.shape[1])],
  293. # np.array([1.0, 1.0, -1.0, 0.0, 0.0]).T@x >= 0.0]
  294. # prob = cp.Problem(cp.Minimize(cost_fun), constraints)
  295. # prob.solve()
  296. # # use same costs for insertion and removal rather than the fitted costs.
  297. # edit_costs_new = [x.value[0], x.value[0], x.value[1], x.value[2], x.value[2]]
  298. # edit_costs_new = np.array(edit_costs_new)
  299. # residual = np.sqrt(prob.value)
  300. elif cost == 'NON_SYMBOLIC':
  301. is_n_attr = np.count_nonzero(nb_cost_mat[:,2])
  302. is_e_attr = np.count_nonzero(nb_cost_mat[:,5])
  303. if dataset == 'SYNTHETICnew':
  304. # nb_cost_mat_new = nb_cost_mat[:,[0,1,2,3,4]]
  305. nb_cost_mat_new = nb_cost_mat[:,[2,3,4]]
  306. x = cp.Variable(nb_cost_mat_new.shape[1])
  307. cost_fun = cp.sum_squares(nb_cost_mat_new * x - dis_k_vec)
  308. # constraints = [x >= [0.0 for i in range(nb_cost_mat_new.shape[1])],
  309. # np.array([0.0, 0.0, 0.0, 1.0, -1.0]).T@x == 0.0]
  310. # constraints = [x >= [0.0001 for i in range(nb_cost_mat_new.shape[1])]]
  311. constraints = [x >= [0.0001 for i in range(nb_cost_mat_new.shape[1])],
  312. np.array([0.0, 1.0, -1.0]).T@x == 0.0]
  313. prob = cp.Problem(cp.Minimize(cost_fun), constraints)
  314. prob.solve()
  315. # print(x.value)
  316. edit_costs_new = np.concatenate((np.array([0.0, 0.0]), x.value,
  317. np.array([0.0])))
  318. residual = np.sqrt(prob.value)
  319. elif rw_constraints == 'inequality':
  320. # c_vs <= c_vi + c_vr.
  321. if is_n_attr and is_e_attr:
  322. nb_cost_mat_new = nb_cost_mat[:,[0,1,2,3,4,5]]
  323. x = cp.Variable(nb_cost_mat_new.shape[1])
  324. cost_fun = cp.sum_squares(nb_cost_mat_new * x - dis_k_vec)
  325. constraints = [x >= [0.01 for i in range(nb_cost_mat_new.shape[1])],
  326. np.array([1.0, 1.0, -1.0, 0.0, 0.0, 0.0]).T@x >= 0.0,
  327. np.array([0.0, 0.0, 0.0, 1.0, 1.0, -1.0]).T@x >= 0.0]
  328. prob = cp.Problem(cp.Minimize(cost_fun), constraints)
  329. prob.solve()
  330. edit_costs_new = x.value
  331. residual = np.sqrt(prob.value)
  332. elif is_n_attr and not is_e_attr:
  333. nb_cost_mat_new = nb_cost_mat[:,[0,1,2,3,4]]
  334. x = cp.Variable(nb_cost_mat_new.shape[1])
  335. cost_fun = cp.sum_squares(nb_cost_mat_new * x - dis_k_vec)
  336. constraints = [x >= [0.001 for i in range(nb_cost_mat_new.shape[1])],
  337. np.array([1.0, 1.0, -1.0, 0.0, 0.0]).T@x >= 0.0]
  338. prob = cp.Problem(cp.Minimize(cost_fun), constraints)
  339. prob.solve()
  340. print(x.value)
  341. edit_costs_new = np.concatenate((x.value, np.array([0.0])))
  342. residual = np.sqrt(prob.value)
  343. elif not is_n_attr and is_e_attr:
  344. nb_cost_mat_new = nb_cost_mat[:,[0,1,3,4,5]]
  345. x = cp.Variable(nb_cost_mat_new.shape[1])
  346. cost_fun = cp.sum_squares(nb_cost_mat_new * x - dis_k_vec)
  347. constraints = [x >= [0.01 for i in range(nb_cost_mat_new.shape[1])],
  348. np.array([0.0, 0.0, 1.0, 1.0, -1.0]).T@x >= 0.0]
  349. prob = cp.Problem(cp.Minimize(cost_fun), constraints)
  350. prob.solve()
  351. edit_costs_new = np.concatenate((x.value[0:2], np.array([0.0]), x.value[2:]))
  352. residual = np.sqrt(prob.value)
  353. else:
  354. nb_cost_mat_new = nb_cost_mat[:,[0,1,3,4]]
  355. x = cp.Variable(nb_cost_mat_new.shape[1])
  356. cost_fun = cp.sum_squares(nb_cost_mat_new * x - dis_k_vec)
  357. constraints = [x >= [0.01 for i in range(nb_cost_mat_new.shape[1])]]
  358. prob = cp.Problem(cp.Minimize(cost_fun), constraints)
  359. prob.solve()
  360. edit_costs_new = np.concatenate((x.value[0:2], np.array([0.0]),
  361. x.value[2:], np.array([0.0])))
  362. residual = np.sqrt(prob.value)
  363. else:
  364. # # method 1: simple least square method.
  365. # edit_costs_new, residual, _, _ = np.linalg.lstsq(nb_cost_mat, dis_k_vec,
  366. # rcond=None)
  367. # # method 2: least square method with x_i >= 0.
  368. # edit_costs_new, residual = optimize.nnls(nb_cost_mat, dis_k_vec)
  369. # method 3: solve as a quadratic program with constraints.
  370. # P = np.dot(nb_cost_mat.T, nb_cost_mat)
  371. # q_T = -2 * np.dot(dis_k_vec.T, nb_cost_mat)
  372. # G = -1 * np.identity(nb_cost_mat.shape[1])
  373. # h = np.array([0 for i in range(nb_cost_mat.shape[1])])
  374. # A = np.array([1 for i in range(nb_cost_mat.shape[1])])
  375. # b = 1
  376. # x = cp.Variable(nb_cost_mat.shape[1])
  377. # prob = cp.Problem(cp.Minimize(cp.quad_form(x, P) + q_T@x),
  378. # [G@x <= h])
  379. # prob.solve()
  380. # edit_costs_new = x.value
  381. # residual = prob.value - np.dot(dis_k_vec.T, dis_k_vec)
  382. # G = -1 * np.identity(nb_cost_mat.shape[1])
  383. # h = np.array([0 for i in range(nb_cost_mat.shape[1])])
  384. x = cp.Variable(nb_cost_mat.shape[1])
  385. cost_fun = cp.sum_squares(nb_cost_mat * x - dis_k_vec)
  386. constraints = [x >= [0.0 for i in range(nb_cost_mat.shape[1])],
  387. # np.array([1.0, 1.0, -1.0, 0.0, 0.0]).T@x >= 0.0]
  388. np.array([1.0, 1.0, -1.0, 0.0, 0.0, 0.0]).T@x >= 0.0,
  389. np.array([0.0, 0.0, 0.0, 1.0, 1.0, -1.0]).T@x >= 0.0]
  390. prob = cp.Problem(cp.Minimize(cost_fun), constraints)
  391. prob.solve()
  392. edit_costs_new = x.value
  393. residual = np.sqrt(prob.value)
  394. # method 4:
  395. return edit_costs_new, residual
  396. if __name__ == '__main__':
  397. print('check test_fitDistance.py')

A Python package for graph kernels, graph edit distances and graph pre-image problem.