You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

median_preimage_generator.py 46 kB

4 years ago
12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034
  1. #!/usr/bin/env python3
  2. # -*- coding: utf-8 -*-
  3. """
  4. Created on Thu Mar 26 18:27:22 2020
  5. @author: ljia
  6. """
  7. import numpy as np
  8. import time
  9. import random
  10. import multiprocessing
  11. import networkx as nx
  12. import cvxpy as cp
  13. from gklearn.preimage import PreimageGenerator
  14. from gklearn.preimage.utils import compute_k_dis
  15. from gklearn.ged.util import compute_geds, ged_options_to_string
  16. from gklearn.ged.median import MedianGraphEstimator
  17. from gklearn.ged.median import constant_node_costs,mge_options_to_string
  18. from gklearn.gedlib import librariesImport, gedlibpy
  19. from gklearn.utils import Timer
  20. from gklearn.utils.utils import get_graph_kernel_by_name
  21. class MedianPreimageGenerator(PreimageGenerator):
  22. def __init__(self, dataset=None):
  23. PreimageGenerator.__init__(self, dataset=dataset)
  24. # arguments to set.
  25. self._mge = None
  26. self._ged_options = {}
  27. self._mge_options = {}
  28. self._fit_method = 'k-graphs'
  29. self._init_ecc = None
  30. self._parallel = True
  31. self._n_jobs = multiprocessing.cpu_count()
  32. self._ds_name = None
  33. self._time_limit_in_sec = 0
  34. self._max_itrs = 100
  35. self._max_itrs_without_update = 3
  36. self._epsilon_residual = 0.01
  37. self._epsilon_ec = 0.1
  38. self._allow_zeros = False
  39. self._triangle_rule = True
  40. # values to compute.
  41. self._runtime_optimize_ec = None
  42. self._runtime_generate_preimage = None
  43. self._runtime_total = None
  44. self._set_median = None
  45. self._gen_median = None
  46. self._best_from_dataset = None
  47. self._sod_set_median = None
  48. self._sod_gen_median = None
  49. self._k_dis_set_median = None
  50. self._k_dis_gen_median = None
  51. self._k_dis_dataset = None
  52. self._itrs = 0
  53. self._converged = False
  54. self._num_updates_ecc = 0
  55. # values that can be set or to be computed.
  56. self._edit_cost_constants = []
  57. self._gram_matrix_unnorm = None
  58. self._runtime_precompute_gm = None
  59. def set_options(self, **kwargs):
  60. self._kernel_options = kwargs.get('kernel_options', {})
  61. self._graph_kernel = kwargs.get('graph_kernel', None)
  62. self._verbose = kwargs.get('verbose', 2)
  63. self._ged_options = kwargs.get('ged_options', {})
  64. self._mge_options = kwargs.get('mge_options', {})
  65. self._fit_method = kwargs.get('fit_method', 'k-graphs')
  66. self._init_ecc = kwargs.get('init_ecc', None)
  67. self._edit_cost_constants = kwargs.get('edit_cost_constants', [])
  68. self._parallel = kwargs.get('parallel', True)
  69. self._n_jobs = kwargs.get('n_jobs', multiprocessing.cpu_count())
  70. self._ds_name = kwargs.get('ds_name', None)
  71. self._time_limit_in_sec = kwargs.get('time_limit_in_sec', 0)
  72. self._max_itrs = kwargs.get('max_itrs', 100)
  73. self._max_itrs_without_update = kwargs.get('max_itrs_without_update', 3)
  74. self._epsilon_residual = kwargs.get('epsilon_residual', 0.01)
  75. self._epsilon_ec = kwargs.get('epsilon_ec', 0.1)
  76. self._gram_matrix_unnorm = kwargs.get('gram_matrix_unnorm', None)
  77. self._runtime_precompute_gm = kwargs.get('runtime_precompute_gm', None)
  78. self._allow_zeros = kwargs.get('allow_zeros', False)
  79. self._triangle_rule = kwargs.get('triangle_rule', True)
  80. def run(self):
  81. self._graph_kernel = get_graph_kernel_by_name(self._kernel_options['name'],
  82. node_labels=self._dataset.node_labels,
  83. edge_labels=self._dataset.edge_labels,
  84. node_attrs=self._dataset.node_attrs,
  85. edge_attrs=self._dataset.edge_attrs,
  86. ds_infos=self._dataset.get_dataset_infos(keys=['directed']),
  87. kernel_options=self._kernel_options)
  88. # record start time.
  89. start = time.time()
  90. # 1. precompute gram matrix.
  91. if self._gram_matrix_unnorm is None:
  92. gram_matrix, run_time = self._graph_kernel.compute(self._dataset.graphs, **self._kernel_options)
  93. self._gram_matrix_unnorm = self._graph_kernel.gram_matrix_unnorm
  94. end_precompute_gm = time.time()
  95. self._runtime_precompute_gm = end_precompute_gm - start
  96. else:
  97. if self._runtime_precompute_gm is None:
  98. raise Exception('Parameter "runtime_precompute_gm" must be given when using pre-computed Gram matrix.')
  99. self._graph_kernel.gram_matrix_unnorm = self._gram_matrix_unnorm
  100. if self._kernel_options['normalize']:
  101. self._graph_kernel.gram_matrix = self._graph_kernel.normalize_gm(np.copy(self._gram_matrix_unnorm))
  102. else:
  103. self._graph_kernel.gram_matrix = np.copy(self._gram_matrix_unnorm)
  104. end_precompute_gm = time.time()
  105. start -= self._runtime_precompute_gm
  106. if self._fit_method != 'k-graphs' and self._fit_method != 'whole-dataset':
  107. start = time.time()
  108. self._runtime_precompute_gm = 0
  109. end_precompute_gm = start
  110. # 2. optimize edit cost constants.
  111. self._optimize_edit_cost_constants()
  112. end_optimize_ec = time.time()
  113. self._runtime_optimize_ec = end_optimize_ec - end_precompute_gm
  114. # 3. compute set median and gen median using optimized edit costs.
  115. if self._verbose >= 2:
  116. print('\nstart computing set median and gen median using optimized edit costs...\n')
  117. self._gmg_bcu()
  118. end_generate_preimage = time.time()
  119. self._runtime_generate_preimage = end_generate_preimage - end_optimize_ec
  120. self._runtime_total = end_generate_preimage - start
  121. if self._verbose >= 2:
  122. print('medians computed.')
  123. print('SOD of the set median: ', self._sod_set_median)
  124. print('SOD of the generalized median: ', self._sod_gen_median)
  125. # 4. compute kernel distances to the true median.
  126. if self._verbose >= 2:
  127. print('\nstart computing distances to true median....\n')
  128. self._compute_distances_to_true_median()
  129. # 5. print out results.
  130. if self._verbose:
  131. print()
  132. print('================================================================================')
  133. print('Finished generation of preimages.')
  134. print('--------------------------------------------------------------------------------')
  135. print('The optimized edit cost constants:', self._edit_cost_constants)
  136. print('SOD of the set median:', self._sod_set_median)
  137. print('SOD of the generalized median:', self._sod_gen_median)
  138. print('Distance in kernel space for set median:', self._k_dis_set_median)
  139. print('Distance in kernel space for generalized median:', self._k_dis_gen_median)
  140. print('Minimum distance in kernel space for each graph in median set:', self._k_dis_dataset)
  141. print('Time to pre-compute Gram matrix:', self._runtime_precompute_gm)
  142. print('Time to optimize edit costs:', self._runtime_optimize_ec)
  143. print('Time to generate pre-images:', self._runtime_generate_preimage)
  144. print('Total time:', self._runtime_total)
  145. print('Total number of iterations for optimizing:', self._itrs)
  146. print('Total number of updating edit costs:', self._num_updates_ecc)
  147. print('Is optimization of edit costs converged:', self._converged)
  148. print('================================================================================')
  149. print()
  150. def get_results(self):
  151. results = {}
  152. results['edit_cost_constants'] = self._edit_cost_constants
  153. results['runtime_precompute_gm'] = self._runtime_precompute_gm
  154. results['runtime_optimize_ec'] = self._runtime_optimize_ec
  155. results['runtime_generate_preimage'] = self._runtime_generate_preimage
  156. results['runtime_total'] = self._runtime_total
  157. results['sod_set_median'] = self._sod_set_median
  158. results['sod_gen_median'] = self._sod_gen_median
  159. results['k_dis_set_median'] = self._k_dis_set_median
  160. results['k_dis_gen_median'] = self._k_dis_gen_median
  161. results['k_dis_dataset'] = self._k_dis_dataset
  162. results['itrs'] = self._itrs
  163. results['converged'] = self._converged
  164. results['num_updates_ecc'] = self._num_updates_ecc
  165. results['mge'] = {}
  166. results['mge']['num_decrease_order'] = self._mge.get_num_times_order_decreased()
  167. results['mge']['num_increase_order'] = self._mge.get_num_times_order_increased()
  168. results['mge']['num_converged_descents'] = self._mge.get_num_converged_descents()
  169. # results['ged_matrix_set_median'] = self._mge.ged_matrix_set_median_tmp
  170. return results
  171. def _optimize_edit_cost_constants(self):
  172. """fit edit cost constants.
  173. """
  174. if self._fit_method == 'random': # random
  175. if self._ged_options['edit_cost'] == 'LETTER':
  176. self._edit_cost_constants = random.sample(range(1, 1000), 3)
  177. self._edit_cost_constants = [item * 0.001 for item in self._edit_cost_constants]
  178. elif self._ged_options['edit_cost'] == 'LETTER2':
  179. random.seed(time.time())
  180. self._edit_cost_constants = random.sample(range(1, 1000), 5)
  181. self._edit_cost_constants = [item * 0.01 for item in self._edit_cost_constants]
  182. elif self._ged_options['edit_cost'] == 'NON_SYMBOLIC':
  183. self._edit_cost_constants = random.sample(range(1, 1000), 6)
  184. self._edit_cost_constants = [item * 0.01 for item in self._edit_cost_constants]
  185. if self._dataset.node_attrs == []:
  186. self._edit_cost_constants[2] = 0
  187. if self._dataset.edge_attrs == []:
  188. self._edit_cost_constants[5] = 0
  189. else:
  190. self._edit_cost_constants = random.sample(range(1, 1000), 6)
  191. self._edit_cost_constants = [item * 0.01 for item in self._edit_cost_constants]
  192. if self._verbose >= 2:
  193. print('edit cost constants used:', self._edit_cost_constants)
  194. elif self._fit_method == 'expert': # expert
  195. if self._init_ecc is None:
  196. if self._ged_options['edit_cost'] == 'LETTER':
  197. self._edit_cost_constants = [0.9, 1.7, 0.75]
  198. elif self._ged_options['edit_cost'] == 'LETTER2':
  199. self._edit_cost_constants = [0.675, 0.675, 0.75, 0.425, 0.425]
  200. else:
  201. self._edit_cost_constants = [3, 3, 1, 3, 3, 1]
  202. else:
  203. self._edit_cost_constants = self._init_ecc
  204. elif self._fit_method == 'k-graphs':
  205. if self._init_ecc is None:
  206. if self._ged_options['edit_cost'] == 'LETTER':
  207. self._init_ecc = [0.9, 1.7, 0.75]
  208. elif self._ged_options['edit_cost'] == 'LETTER2':
  209. self._init_ecc = [0.675, 0.675, 0.75, 0.425, 0.425]
  210. elif self._ged_options['edit_cost'] == 'NON_SYMBOLIC':
  211. self._init_ecc = [0, 0, 1, 1, 1, 0]
  212. if self._dataset.node_attrs == []:
  213. self._init_ecc[2] = 0
  214. if self._dataset.edge_attrs == []:
  215. self._init_ecc[5] = 0
  216. else:
  217. self._init_ecc = [3, 3, 1, 3, 3, 1]
  218. # optimize on the k-graph subset.
  219. self._optimize_ecc_by_kernel_distances()
  220. elif self._fit_method == 'whole-dataset':
  221. if self._init_ecc is None:
  222. if self._ged_options['edit_cost'] == 'LETTER':
  223. self._init_ecc = [0.9, 1.7, 0.75]
  224. elif self._ged_options['edit_cost'] == 'LETTER2':
  225. self._init_ecc = [0.675, 0.675, 0.75, 0.425, 0.425]
  226. else:
  227. self._init_ecc = [3, 3, 1, 3, 3, 1]
  228. # optimizeon the whole set.
  229. self._optimize_ecc_by_kernel_distances()
  230. elif self._fit_method == 'precomputed':
  231. pass
  232. def _optimize_ecc_by_kernel_distances(self):
  233. # compute distances in feature space.
  234. dis_k_mat, _, _, _ = self._graph_kernel.compute_distance_matrix()
  235. dis_k_vec = []
  236. for i in range(len(dis_k_mat)):
  237. # for j in range(i, len(dis_k_mat)):
  238. for j in range(i + 1, len(dis_k_mat)):
  239. dis_k_vec.append(dis_k_mat[i, j])
  240. dis_k_vec = np.array(dis_k_vec)
  241. # init ged.
  242. if self._verbose >= 2:
  243. print('\ninitial:')
  244. time0 = time.time()
  245. graphs = [self._clean_graph(g) for g in self._dataset.graphs]
  246. self._edit_cost_constants = self._init_ecc
  247. options = self._ged_options.copy()
  248. options['edit_cost_constants'] = self._edit_cost_constants # @todo
  249. options['node_labels'] = self._dataset.node_labels
  250. options['edge_labels'] = self._dataset.edge_labels
  251. options['node_attrs'] = self._dataset.node_attrs
  252. options['edge_attrs'] = self._dataset.edge_attrs
  253. ged_vec_init, ged_mat, n_edit_operations = compute_geds(graphs, options=options, parallel=self._parallel, verbose=(self._verbose > 1))
  254. residual_list = [np.sqrt(np.sum(np.square(np.array(ged_vec_init) - dis_k_vec)))]
  255. time_list = [time.time() - time0]
  256. edit_cost_list = [self._init_ecc]
  257. nb_cost_mat = np.array(n_edit_operations)
  258. nb_cost_mat_list = [nb_cost_mat]
  259. if self._verbose >= 2:
  260. print('Current edit cost constants:', self._edit_cost_constants)
  261. print('Residual list:', residual_list)
  262. # run iteration from initial edit costs.
  263. self._converged = False
  264. itrs_without_update = 0
  265. self._itrs = 0
  266. self._num_updates_ecc = 0
  267. timer = Timer(self._time_limit_in_sec)
  268. while not self._termination_criterion_met(self._converged, timer, self._itrs, itrs_without_update):
  269. if self._verbose >= 2:
  270. print('\niteration', self._itrs + 1)
  271. time0 = time.time()
  272. # "fit" geds to distances in feature space by tuning edit costs using theLeast Squares Method.
  273. # np.savez('results/xp_fit_method/fit_data_debug' + str(self._itrs) + '.gm',
  274. # nb_cost_mat=nb_cost_mat, dis_k_vec=dis_k_vec,
  275. # n_edit_operations=n_edit_operations, ged_vec_init=ged_vec_init,
  276. # ged_mat=ged_mat)
  277. self._edit_cost_constants, _ = self._update_ecc(nb_cost_mat, dis_k_vec)
  278. for i in range(len(self._edit_cost_constants)):
  279. if -1e-9 <= self._edit_cost_constants[i] <= 1e-9:
  280. self._edit_cost_constants[i] = 0
  281. if self._edit_cost_constants[i] < 0:
  282. raise ValueError('The edit cost is negative.')
  283. # for i in range(len(self._edit_cost_constants)):
  284. # if self._edit_cost_constants[i] < 0:
  285. # self._edit_cost_constants[i] = 0
  286. # compute new GEDs and numbers of edit operations.
  287. options = self._ged_options.copy() # np.array([self._edit_cost_constants[0], self._edit_cost_constants[1], 0.75])
  288. options['edit_cost_constants'] = self._edit_cost_constants # @todo
  289. options['node_labels'] = self._dataset.node_labels
  290. options['edge_labels'] = self._dataset.edge_labels
  291. options['node_attrs'] = self._dataset.node_attrs
  292. options['edge_attrs'] = self._dataset.edge_attrs
  293. ged_vec, ged_mat, n_edit_operations = compute_geds(graphs, options=options, parallel=self._parallel, verbose=(self._verbose > 1))
  294. residual_list.append(np.sqrt(np.sum(np.square(np.array(ged_vec) - dis_k_vec))))
  295. time_list.append(time.time() - time0)
  296. edit_cost_list.append(self._edit_cost_constants)
  297. nb_cost_mat = np.array(n_edit_operations)
  298. nb_cost_mat_list.append(nb_cost_mat)
  299. # check convergency.
  300. ec_changed = False
  301. for i, cost in enumerate(self._edit_cost_constants):
  302. if cost == 0:
  303. if edit_cost_list[-2][i] > self._epsilon_ec:
  304. ec_changed = True
  305. break
  306. elif abs(cost - edit_cost_list[-2][i]) / cost > self._epsilon_ec:
  307. ec_changed = True
  308. break
  309. # if abs(cost - edit_cost_list[-2][i]) > self._epsilon_ec:
  310. # ec_changed = True
  311. # break
  312. residual_changed = False
  313. if residual_list[-1] == 0:
  314. if residual_list[-2] > self._epsilon_residual:
  315. residual_changed = True
  316. elif abs(residual_list[-1] - residual_list[-2]) / residual_list[-1] > self._epsilon_residual:
  317. residual_changed = True
  318. self._converged = not (ec_changed or residual_changed)
  319. if self._converged:
  320. itrs_without_update += 1
  321. else:
  322. itrs_without_update = 0
  323. self._num_updates_ecc += 1
  324. # print current states.
  325. if self._verbose >= 2:
  326. print()
  327. print('-------------------------------------------------------------------------')
  328. print('States of iteration', self._itrs + 1)
  329. print('-------------------------------------------------------------------------')
  330. # print('Time spend:', self._runtime_optimize_ec)
  331. print('Total number of iterations for optimizing:', self._itrs + 1)
  332. print('Total number of updating edit costs:', self._num_updates_ecc)
  333. print('Was optimization of edit costs converged:', self._converged)
  334. print('Did edit costs change:', ec_changed)
  335. print('Did residual change:', residual_changed)
  336. print('Iterations without update:', itrs_without_update)
  337. print('Current edit cost constants:', self._edit_cost_constants)
  338. print('Residual list:', residual_list)
  339. print('-------------------------------------------------------------------------')
  340. self._itrs += 1
  341. def _termination_criterion_met(self, converged, timer, itr, itrs_without_update):
  342. if timer.expired() or (itr >= self._max_itrs if self._max_itrs >= 0 else False):
  343. # if self._state == AlgorithmState.TERMINATED:
  344. # self._state = AlgorithmState.INITIALIZED
  345. return True
  346. return converged or (itrs_without_update > self._max_itrs_without_update if self._max_itrs_without_update >= 0 else False)
  347. def _update_ecc(self, nb_cost_mat, dis_k_vec, rw_constraints='inequality'):
  348. # if self._ds_name == 'Letter-high':
  349. if self._ged_options['edit_cost'] == 'LETTER':
  350. raise Exception('Cannot compute for cost "LETTER".')
  351. pass
  352. # # method 1: set alpha automatically, just tune c_vir and c_eir by
  353. # # LMS using cvxpy.
  354. # alpha = 0.5
  355. # coeff = 100 # np.max(alpha * nb_cost_mat[:,4] / dis_k_vec)
  356. ## if np.count_nonzero(nb_cost_mat[:,4]) == 0:
  357. ## alpha = 0.75
  358. ## else:
  359. ## alpha = np.min([dis_k_vec / c_vs for c_vs in nb_cost_mat[:,4] if c_vs != 0])
  360. ## alpha = alpha * 0.99
  361. # param_vir = alpha * (nb_cost_mat[:,0] + nb_cost_mat[:,1])
  362. # param_eir = (1 - alpha) * (nb_cost_mat[:,4] + nb_cost_mat[:,5])
  363. # nb_cost_mat_new = np.column_stack((param_vir, param_eir))
  364. # dis_new = coeff * dis_k_vec - alpha * nb_cost_mat[:,3]
  365. #
  366. # x = cp.Variable(nb_cost_mat_new.shape[1])
  367. # cost = cp.sum_squares(nb_cost_mat_new * x - dis_new)
  368. # constraints = [x >= [0.0 for i in range(nb_cost_mat_new.shape[1])]]
  369. # prob = cp.Problem(cp.Minimize(cost), constraints)
  370. # prob.solve()
  371. # edit_costs_new = x.value
  372. # edit_costs_new = np.array([edit_costs_new[0], edit_costs_new[1], alpha])
  373. # residual = np.sqrt(prob.value)
  374. # # method 2: tune c_vir, c_eir and alpha by nonlinear programming by
  375. # # scipy.optimize.minimize.
  376. # w0 = nb_cost_mat[:,0] + nb_cost_mat[:,1]
  377. # w1 = nb_cost_mat[:,4] + nb_cost_mat[:,5]
  378. # w2 = nb_cost_mat[:,3]
  379. # w3 = dis_k_vec
  380. # func_min = lambda x: np.sum((w0 * x[0] * x[3] + w1 * x[1] * (1 - x[2]) \
  381. # + w2 * x[2] - w3 * x[3]) ** 2)
  382. # bounds = ((0, None), (0., None), (0.5, 0.5), (0, None))
  383. # res = minimize(func_min, [0.9, 1.7, 0.75, 10], bounds=bounds)
  384. # edit_costs_new = res.x[0:3]
  385. # residual = res.fun
  386. # method 3: tune c_vir, c_eir and alpha by nonlinear programming using cvxpy.
  387. # # method 4: tune c_vir, c_eir and alpha by QP function
  388. # # scipy.optimize.least_squares. An initial guess is required.
  389. # w0 = nb_cost_mat[:,0] + nb_cost_mat[:,1]
  390. # w1 = nb_cost_mat[:,4] + nb_cost_mat[:,5]
  391. # w2 = nb_cost_mat[:,3]
  392. # w3 = dis_k_vec
  393. # func = lambda x: (w0 * x[0] * x[3] + w1 * x[1] * (1 - x[2]) \
  394. # + w2 * x[2] - w3 * x[3]) ** 2
  395. # res = optimize.root(func, [0.9, 1.7, 0.75, 100])
  396. # edit_costs_new = res.x
  397. # residual = None
  398. elif self._ged_options['edit_cost'] == 'LETTER2':
  399. # # 1. if c_vi != c_vr, c_ei != c_er.
  400. # nb_cost_mat_new = nb_cost_mat[:,[0,1,3,4,5]]
  401. # x = cp.Variable(nb_cost_mat_new.shape[1])
  402. # cost_fun = cp.sum_squares(nb_cost_mat_new @ x - dis_k_vec)
  403. ## # 1.1 no constraints.
  404. ## constraints = [x >= [0.0 for i in range(nb_cost_mat_new.shape[1])]]
  405. # # 1.2 c_vs <= c_vi + c_vr.
  406. # constraints = [x >= [0.0 for i in range(nb_cost_mat_new.shape[1])],
  407. # np.array([1.0, 1.0, -1.0, 0.0, 0.0]).T@x >= 0.0]
  408. ## # 2. if c_vi == c_vr, c_ei == c_er.
  409. ## nb_cost_mat_new = nb_cost_mat[:,[0,3,4]]
  410. ## nb_cost_mat_new[:,0] += nb_cost_mat[:,1]
  411. ## nb_cost_mat_new[:,2] += nb_cost_mat[:,5]
  412. ## x = cp.Variable(nb_cost_mat_new.shape[1])
  413. ## cost_fun = cp.sum_squares(nb_cost_mat_new @ x - dis_k_vec)
  414. ## # 2.1 no constraints.
  415. ## constraints = [x >= [0.0 for i in range(nb_cost_mat_new.shape[1])]]
  416. ### # 2.2 c_vs <= c_vi + c_vr.
  417. ### constraints = [x >= [0.0 for i in range(nb_cost_mat_new.shape[1])],
  418. ### np.array([2.0, -1.0, 0.0]).T@x >= 0.0]
  419. #
  420. # prob = cp.Problem(cp.Minimize(cost_fun), constraints)
  421. # prob.solve()
  422. # edit_costs_new = [x.value[0], x.value[0], x.value[1], x.value[2], x.value[2]]
  423. # edit_costs_new = np.array(edit_costs_new)
  424. # residual = np.sqrt(prob.value)
  425. if not self._triangle_rule and self._allow_zeros:
  426. nb_cost_mat_new = nb_cost_mat[:,[0,1,3,4,5]]
  427. x = cp.Variable(nb_cost_mat_new.shape[1])
  428. cost_fun = cp.sum_squares(nb_cost_mat_new @ x - dis_k_vec)
  429. constraints = [x >= [0.0 for i in range(nb_cost_mat_new.shape[1])],
  430. np.array([1.0, 0.0, 0.0, 0.0, 0.0]).T@x >= 0.01,
  431. np.array([0.0, 1.0, 0.0, 0.0, 0.0]).T@x >= 0.01,
  432. np.array([0.0, 0.0, 0.0, 1.0, 0.0]).T@x >= 0.01,
  433. np.array([0.0, 0.0, 0.0, 0.0, 1.0]).T@x >= 0.01]
  434. prob = cp.Problem(cp.Minimize(cost_fun), constraints)
  435. self._execute_cvx(prob)
  436. edit_costs_new = x.value
  437. residual = np.sqrt(prob.value)
  438. elif self._triangle_rule and self._allow_zeros:
  439. nb_cost_mat_new = nb_cost_mat[:,[0,1,3,4,5]]
  440. x = cp.Variable(nb_cost_mat_new.shape[1])
  441. cost_fun = cp.sum_squares(nb_cost_mat_new @ x - dis_k_vec)
  442. constraints = [x >= [0.0 for i in range(nb_cost_mat_new.shape[1])],
  443. np.array([1.0, 0.0, 0.0, 0.0, 0.0]).T@x >= 0.01,
  444. np.array([0.0, 1.0, 0.0, 0.0, 0.0]).T@x >= 0.01,
  445. np.array([0.0, 0.0, 0.0, 1.0, 0.0]).T@x >= 0.01,
  446. np.array([0.0, 0.0, 0.0, 0.0, 1.0]).T@x >= 0.01,
  447. np.array([1.0, 1.0, -1.0, 0.0, 0.0]).T@x >= 0.0]
  448. prob = cp.Problem(cp.Minimize(cost_fun), constraints)
  449. self._execute_cvx(prob)
  450. edit_costs_new = x.value
  451. residual = np.sqrt(prob.value)
  452. elif not self._triangle_rule and not self._allow_zeros:
  453. nb_cost_mat_new = nb_cost_mat[:,[0,1,3,4,5]]
  454. x = cp.Variable(nb_cost_mat_new.shape[1])
  455. cost_fun = cp.sum_squares(nb_cost_mat_new @ x - dis_k_vec)
  456. constraints = [x >= [0.01 for i in range(nb_cost_mat_new.shape[1])]]
  457. prob = cp.Problem(cp.Minimize(cost_fun), constraints)
  458. prob.solve()
  459. edit_costs_new = x.value
  460. residual = np.sqrt(prob.value)
  461. # elif method == 'inequality_modified':
  462. # # c_vs <= c_vi + c_vr.
  463. # nb_cost_mat_new = nb_cost_mat[:,[0,1,3,4,5]]
  464. # x = cp.Variable(nb_cost_mat_new.shape[1])
  465. # cost_fun = cp.sum_squares(nb_cost_mat_new @ x - dis_k_vec)
  466. # constraints = [x >= [0.0 for i in range(nb_cost_mat_new.shape[1])],
  467. # np.array([1.0, 1.0, -1.0, 0.0, 0.0]).T@x >= 0.0]
  468. # prob = cp.Problem(cp.Minimize(cost_fun), constraints)
  469. # prob.solve()
  470. # # use same costs for insertion and removal rather than the fitted costs.
  471. # edit_costs_new = [x.value[0], x.value[0], x.value[1], x.value[2], x.value[2]]
  472. # edit_costs_new = np.array(edit_costs_new)
  473. # residual = np.sqrt(prob.value)
  474. elif self._triangle_rule and not self._allow_zeros:
  475. # c_vs <= c_vi + c_vr.
  476. nb_cost_mat_new = nb_cost_mat[:,[0,1,3,4,5]]
  477. x = cp.Variable(nb_cost_mat_new.shape[1])
  478. cost_fun = cp.sum_squares(nb_cost_mat_new @ x - dis_k_vec)
  479. constraints = [x >= [0.01 for i in range(nb_cost_mat_new.shape[1])],
  480. np.array([1.0, 1.0, -1.0, 0.0, 0.0]).T@x >= 0.0]
  481. prob = cp.Problem(cp.Minimize(cost_fun), constraints)
  482. self._execute_cvx(prob)
  483. edit_costs_new = x.value
  484. residual = np.sqrt(prob.value)
  485. elif rw_constraints == '2constraints': # @todo: rearrange it later.
  486. # c_vs <= c_vi + c_vr and c_vi == c_vr, c_ei == c_er.
  487. nb_cost_mat_new = nb_cost_mat[:,[0,1,3,4,5]]
  488. x = cp.Variable(nb_cost_mat_new.shape[1])
  489. cost_fun = cp.sum_squares(nb_cost_mat_new @ x - dis_k_vec)
  490. constraints = [x >= [0.01 for i in range(nb_cost_mat_new.shape[1])],
  491. np.array([1.0, 1.0, -1.0, 0.0, 0.0]).T@x >= 0.0,
  492. np.array([1.0, -1.0, 0.0, 0.0, 0.0]).T@x == 0.0,
  493. np.array([0.0, 0.0, 0.0, 1.0, -1.0]).T@x == 0.0]
  494. prob = cp.Problem(cp.Minimize(cost_fun), constraints)
  495. prob.solve()
  496. edit_costs_new = x.value
  497. residual = np.sqrt(prob.value)
  498. elif self._ged_options['edit_cost'] == 'NON_SYMBOLIC':
  499. is_n_attr = np.count_nonzero(nb_cost_mat[:,2])
  500. is_e_attr = np.count_nonzero(nb_cost_mat[:,5])
  501. if self._ds_name == 'SYNTHETICnew': # @todo: rearrenge this later.
  502. # nb_cost_mat_new = nb_cost_mat[:,[0,1,2,3,4]]
  503. nb_cost_mat_new = nb_cost_mat[:,[2,3,4]]
  504. x = cp.Variable(nb_cost_mat_new.shape[1])
  505. cost_fun = cp.sum_squares(nb_cost_mat_new @ x - dis_k_vec)
  506. # constraints = [x >= [0.0 for i in range(nb_cost_mat_new.shape[1])],
  507. # np.array([0.0, 0.0, 0.0, 1.0, -1.0]).T@x == 0.0]
  508. # constraints = [x >= [0.0001 for i in range(nb_cost_mat_new.shape[1])]]
  509. constraints = [x >= [0.0001 for i in range(nb_cost_mat_new.shape[1])],
  510. np.array([0.0, 1.0, -1.0]).T@x == 0.0]
  511. prob = cp.Problem(cp.Minimize(cost_fun), constraints)
  512. prob.solve()
  513. # print(x.value)
  514. edit_costs_new = np.concatenate((np.array([0.0, 0.0]), x.value,
  515. np.array([0.0])))
  516. residual = np.sqrt(prob.value)
  517. elif not self._triangle_rule and self._allow_zeros:
  518. if is_n_attr and is_e_attr:
  519. nb_cost_mat_new = nb_cost_mat[:,[0,1,2,3,4,5]]
  520. x = cp.Variable(nb_cost_mat_new.shape[1])
  521. cost_fun = cp.sum_squares(nb_cost_mat_new @ x - dis_k_vec)
  522. constraints = [x >= [0.0 for i in range(nb_cost_mat_new.shape[1])],
  523. np.array([1.0, 0.0, 0.0, 0.0, 0.0, 0.0]).T@x >= 0.01,
  524. np.array([0.0, 1.0, 0.0, 0.0, 0.0, 0.0]).T@x >= 0.01,
  525. np.array([0.0, 0.0, 0.0, 1.0, 0.0, 0.0]).T@x >= 0.01,
  526. np.array([0.0, 0.0, 0.0, 0.0, 1.0, 0.0]).T@x >= 0.01]
  527. prob = cp.Problem(cp.Minimize(cost_fun), constraints)
  528. self._execute_cvx(prob)
  529. edit_costs_new = x.value
  530. residual = np.sqrt(prob.value)
  531. elif is_n_attr and not is_e_attr:
  532. nb_cost_mat_new = nb_cost_mat[:,[0,1,2,3,4]]
  533. x = cp.Variable(nb_cost_mat_new.shape[1])
  534. cost_fun = cp.sum_squares(nb_cost_mat_new @ x - dis_k_vec)
  535. constraints = [x >= [0.0 for i in range(nb_cost_mat_new.shape[1])],
  536. np.array([1.0, 0.0, 0.0, 0.0, 0.0]).T@x >= 0.01,
  537. np.array([0.0, 1.0, 0.0, 0.0, 0.0]).T@x >= 0.01,
  538. np.array([0.0, 0.0, 0.0, 1.0, 0.0]).T@x >= 0.01,
  539. np.array([0.0, 0.0, 0.0, 0.0, 1.0]).T@x >= 0.01]
  540. prob = cp.Problem(cp.Minimize(cost_fun), constraints)
  541. self._execute_cvx(prob)
  542. edit_costs_new = np.concatenate((x.value, np.array([0.0])))
  543. residual = np.sqrt(prob.value)
  544. elif not is_n_attr and is_e_attr:
  545. nb_cost_mat_new = nb_cost_mat[:,[0,1,3,4,5]]
  546. x = cp.Variable(nb_cost_mat_new.shape[1])
  547. cost_fun = cp.sum_squares(nb_cost_mat_new @ x - dis_k_vec)
  548. constraints = [x >= [0.0 for i in range(nb_cost_mat_new.shape[1])],
  549. np.array([1.0, 0.0, 0.0, 0.0, 0.0]).T@x >= 0.01,
  550. np.array([0.0, 1.0, 0.0, 0.0, 0.0]).T@x >= 0.01,
  551. np.array([0.0, 0.0, 1.0, 0.0, 0.0]).T@x >= 0.01,
  552. np.array([0.0, 0.0, 0.0, 1.0, 0.0]).T@x >= 0.01]
  553. prob = cp.Problem(cp.Minimize(cost_fun), constraints)
  554. self._execute_cvx(prob)
  555. edit_costs_new = np.concatenate((x.value[0:2], np.array([0.0]), x.value[2:]))
  556. residual = np.sqrt(prob.value)
  557. else:
  558. nb_cost_mat_new = nb_cost_mat[:,[0,1,3,4]]
  559. x = cp.Variable(nb_cost_mat_new.shape[1])
  560. cost_fun = cp.sum_squares(nb_cost_mat_new @ x - dis_k_vec)
  561. constraints = [x >= [0.01 for i in range(nb_cost_mat_new.shape[1])]]
  562. prob = cp.Problem(cp.Minimize(cost_fun), constraints)
  563. self._execute_cvx(prob)
  564. edit_costs_new = np.concatenate((x.value[0:2], np.array([0.0]),
  565. x.value[2:], np.array([0.0])))
  566. residual = np.sqrt(prob.value)
  567. elif self._triangle_rule and self._allow_zeros:
  568. if is_n_attr and is_e_attr:
  569. nb_cost_mat_new = nb_cost_mat[:,[0,1,2,3,4,5]]
  570. x = cp.Variable(nb_cost_mat_new.shape[1])
  571. cost_fun = cp.sum_squares(nb_cost_mat_new @ x - dis_k_vec)
  572. constraints = [x >= [0.0 for i in range(nb_cost_mat_new.shape[1])],
  573. np.array([1.0, 0.0, 0.0, 0.0, 0.0, 0.0]).T@x >= 0.01,
  574. np.array([0.0, 1.0, 0.0, 0.0, 0.0, 0.0]).T@x >= 0.01,
  575. np.array([0.0, 0.0, 0.0, 1.0, 0.0, 0.0]).T@x >= 0.01,
  576. np.array([0.0, 0.0, 0.0, 0.0, 1.0, 0.0]).T@x >= 0.01,
  577. np.array([1.0, 1.0, -1.0, 0.0, 0.0, 0.0]).T@x >= 0.0,
  578. np.array([0.0, 0.0, 0.0, 1.0, 1.0, -1.0]).T@x >= 0.0]
  579. prob = cp.Problem(cp.Minimize(cost_fun), constraints)
  580. self._execute_cvx(prob)
  581. edit_costs_new = x.value
  582. residual = np.sqrt(prob.value)
  583. elif is_n_attr and not is_e_attr:
  584. nb_cost_mat_new = nb_cost_mat[:,[0,1,2,3,4]]
  585. x = cp.Variable(nb_cost_mat_new.shape[1])
  586. cost_fun = cp.sum_squares(nb_cost_mat_new @ x - dis_k_vec)
  587. constraints = [x >= [0.0 for i in range(nb_cost_mat_new.shape[1])],
  588. np.array([1.0, 0.0, 0.0, 0.0, 0.0]).T@x >= 0.01,
  589. np.array([0.0, 1.0, 0.0, 0.0, 0.0]).T@x >= 0.01,
  590. np.array([0.0, 0.0, 0.0, 1.0, 0.0]).T@x >= 0.01,
  591. np.array([0.0, 0.0, 0.0, 0.0, 1.0]).T@x >= 0.01,
  592. np.array([1.0, 1.0, -1.0, 0.0, 0.0]).T@x >= 0.0]
  593. prob = cp.Problem(cp.Minimize(cost_fun), constraints)
  594. self._execute_cvx(prob)
  595. edit_costs_new = np.concatenate((x.value, np.array([0.0])))
  596. residual = np.sqrt(prob.value)
  597. elif not is_n_attr and is_e_attr:
  598. nb_cost_mat_new = nb_cost_mat[:,[0,1,3,4,5]]
  599. x = cp.Variable(nb_cost_mat_new.shape[1])
  600. cost_fun = cp.sum_squares(nb_cost_mat_new @ x - dis_k_vec)
  601. constraints = [x >= [0.0 for i in range(nb_cost_mat_new.shape[1])],
  602. np.array([1.0, 0.0, 0.0, 0.0, 0.0]).T@x >= 0.01,
  603. np.array([0.0, 1.0, 0.0, 0.0, 0.0]).T@x >= 0.01,
  604. np.array([0.0, 0.0, 1.0, 0.0, 0.0]).T@x >= 0.01,
  605. np.array([0.0, 0.0, 0.0, 1.0, 0.0]).T@x >= 0.01,
  606. np.array([0.0, 0.0, 1.0, 1.0, -1.0]).T@x >= 0.0]
  607. prob = cp.Problem(cp.Minimize(cost_fun), constraints)
  608. self._execute_cvx(prob)
  609. edit_costs_new = np.concatenate((x.value[0:2], np.array([0.0]), x.value[2:]))
  610. residual = np.sqrt(prob.value)
  611. else:
  612. nb_cost_mat_new = nb_cost_mat[:,[0,1,3,4]]
  613. x = cp.Variable(nb_cost_mat_new.shape[1])
  614. cost_fun = cp.sum_squares(nb_cost_mat_new @ x - dis_k_vec)
  615. constraints = [x >= [0.01 for i in range(nb_cost_mat_new.shape[1])]]
  616. prob = cp.Problem(cp.Minimize(cost_fun), constraints)
  617. self._execute_cvx(prob)
  618. edit_costs_new = np.concatenate((x.value[0:2], np.array([0.0]),
  619. x.value[2:], np.array([0.0])))
  620. residual = np.sqrt(prob.value)
  621. elif not self._triangle_rule and not self._allow_zeros:
  622. if is_n_attr and is_e_attr:
  623. nb_cost_mat_new = nb_cost_mat[:,[0,1,2,3,4,5]]
  624. x = cp.Variable(nb_cost_mat_new.shape[1])
  625. cost_fun = cp.sum_squares(nb_cost_mat_new @ x - dis_k_vec)
  626. constraints = [x >= [0.01 for i in range(nb_cost_mat_new.shape[1])]]
  627. prob = cp.Problem(cp.Minimize(cost_fun), constraints)
  628. self._execute_cvx(prob)
  629. edit_costs_new = x.value
  630. residual = np.sqrt(prob.value)
  631. elif is_n_attr and not is_e_attr:
  632. nb_cost_mat_new = nb_cost_mat[:,[0,1,2,3,4]]
  633. x = cp.Variable(nb_cost_mat_new.shape[1])
  634. cost_fun = cp.sum_squares(nb_cost_mat_new @ x - dis_k_vec)
  635. constraints = [x >= [0.01 for i in range(nb_cost_mat_new.shape[1])]]
  636. prob = cp.Problem(cp.Minimize(cost_fun), constraints)
  637. self._execute_cvx(prob)
  638. edit_costs_new = np.concatenate((x.value, np.array([0.0])))
  639. residual = np.sqrt(prob.value)
  640. elif not is_n_attr and is_e_attr:
  641. nb_cost_mat_new = nb_cost_mat[:,[0,1,3,4,5]]
  642. x = cp.Variable(nb_cost_mat_new.shape[1])
  643. cost_fun = cp.sum_squares(nb_cost_mat_new @ x - dis_k_vec)
  644. constraints = [x >= [0.01 for i in range(nb_cost_mat_new.shape[1])]]
  645. prob = cp.Problem(cp.Minimize(cost_fun), constraints)
  646. self._execute_cvx(prob)
  647. edit_costs_new = np.concatenate((x.value[0:2], np.array([0.0]), x.value[2:]))
  648. residual = np.sqrt(prob.value)
  649. else:
  650. nb_cost_mat_new = nb_cost_mat[:,[0,1,3,4]]
  651. x = cp.Variable(nb_cost_mat_new.shape[1])
  652. cost_fun = cp.sum_squares(nb_cost_mat_new @ x - dis_k_vec)
  653. constraints = [x >= [0.01 for i in range(nb_cost_mat_new.shape[1])]]
  654. prob = cp.Problem(cp.Minimize(cost_fun), constraints)
  655. self._execute_cvx(prob)
  656. edit_costs_new = np.concatenate((x.value[0:2], np.array([0.0]),
  657. x.value[2:], np.array([0.0])))
  658. residual = np.sqrt(prob.value)
  659. elif self._triangle_rule and not self._allow_zeros:
  660. # c_vs <= c_vi + c_vr.
  661. if is_n_attr and is_e_attr:
  662. nb_cost_mat_new = nb_cost_mat[:,[0,1,2,3,4,5]]
  663. x = cp.Variable(nb_cost_mat_new.shape[1])
  664. cost_fun = cp.sum_squares(nb_cost_mat_new @ x - dis_k_vec)
  665. constraints = [x >= [0.01 for i in range(nb_cost_mat_new.shape[1])],
  666. np.array([1.0, 1.0, -1.0, 0.0, 0.0, 0.0]).T@x >= 0.0,
  667. np.array([0.0, 0.0, 0.0, 1.0, 1.0, -1.0]).T@x >= 0.0]
  668. prob = cp.Problem(cp.Minimize(cost_fun), constraints)
  669. self._execute_cvx(prob)
  670. edit_costs_new = x.value
  671. residual = np.sqrt(prob.value)
  672. elif is_n_attr and not is_e_attr:
  673. nb_cost_mat_new = nb_cost_mat[:,[0,1,2,3,4]]
  674. x = cp.Variable(nb_cost_mat_new.shape[1])
  675. cost_fun = cp.sum_squares(nb_cost_mat_new @ x - dis_k_vec)
  676. constraints = [x >= [0.01 for i in range(nb_cost_mat_new.shape[1])],
  677. np.array([1.0, 1.0, -1.0, 0.0, 0.0]).T@x >= 0.0]
  678. prob = cp.Problem(cp.Minimize(cost_fun), constraints)
  679. self._execute_cvx(prob)
  680. edit_costs_new = np.concatenate((x.value, np.array([0.0])))
  681. residual = np.sqrt(prob.value)
  682. elif not is_n_attr and is_e_attr:
  683. nb_cost_mat_new = nb_cost_mat[:,[0,1,3,4,5]]
  684. x = cp.Variable(nb_cost_mat_new.shape[1])
  685. cost_fun = cp.sum_squares(nb_cost_mat_new @ x - dis_k_vec)
  686. constraints = [x >= [0.01 for i in range(nb_cost_mat_new.shape[1])],
  687. np.array([0.0, 0.0, 1.0, 1.0, -1.0]).T@x >= 0.0]
  688. prob = cp.Problem(cp.Minimize(cost_fun), constraints)
  689. self._execute_cvx(prob)
  690. edit_costs_new = np.concatenate((x.value[0:2], np.array([0.0]), x.value[2:]))
  691. residual = np.sqrt(prob.value)
  692. else:
  693. nb_cost_mat_new = nb_cost_mat[:,[0,1,3,4]]
  694. x = cp.Variable(nb_cost_mat_new.shape[1])
  695. cost_fun = cp.sum_squares(nb_cost_mat_new @ x - dis_k_vec)
  696. constraints = [x >= [0.01 for i in range(nb_cost_mat_new.shape[1])]]
  697. prob = cp.Problem(cp.Minimize(cost_fun), constraints)
  698. self._execute_cvx(prob)
  699. edit_costs_new = np.concatenate((x.value[0:2], np.array([0.0]),
  700. x.value[2:], np.array([0.0])))
  701. residual = np.sqrt(prob.value)
  702. elif self._ged_options['edit_cost'] == 'CONSTANT': # @todo: node/edge may not labeled.
  703. if not self._triangle_rule and self._allow_zeros:
  704. x = cp.Variable(nb_cost_mat.shape[1])
  705. cost_fun = cp.sum_squares(nb_cost_mat @ x - dis_k_vec)
  706. constraints = [x >= [0.0 for i in range(nb_cost_mat.shape[1])],
  707. np.array([1.0, 0.0, 0.0, 0.0, 0.0, 0.0]).T@x >= 0.01,
  708. np.array([0.0, 1.0, 0.0, 0.0, 0.0, 0.0]).T@x >= 0.01,
  709. np.array([0.0, 0.0, 0.0, 1.0, 0.0, 0.0]).T@x >= 0.01,
  710. np.array([0.0, 0.0, 0.0, 0.0, 1.0, 0.0]).T@x >= 0.01]
  711. prob = cp.Problem(cp.Minimize(cost_fun), constraints)
  712. self._execute_cvx(prob)
  713. edit_costs_new = x.value
  714. residual = np.sqrt(prob.value)
  715. elif self._triangle_rule and self._allow_zeros:
  716. x = cp.Variable(nb_cost_mat.shape[1])
  717. cost_fun = cp.sum_squares(nb_cost_mat @ x - dis_k_vec)
  718. constraints = [x >= [0.0 for i in range(nb_cost_mat.shape[1])],
  719. np.array([1.0, 0.0, 0.0, 0.0, 0.0, 0.0]).T@x >= 0.01,
  720. np.array([0.0, 1.0, 0.0, 0.0, 0.0, 0.0]).T@x >= 0.01,
  721. np.array([0.0, 0.0, 0.0, 1.0, 0.0, 0.0]).T@x >= 0.01,
  722. np.array([0.0, 0.0, 0.0, 0.0, 1.0, 0.0]).T@x >= 0.01,
  723. np.array([1.0, 1.0, -1.0, 0.0, 0.0, 0.0]).T@x >= 0.0,
  724. np.array([0.0, 0.0, 0.0, 1.0, 1.0, -1.0]).T@x >= 0.0]
  725. prob = cp.Problem(cp.Minimize(cost_fun), constraints)
  726. self._execute_cvx(prob)
  727. edit_costs_new = x.value
  728. residual = np.sqrt(prob.value)
  729. elif not self._triangle_rule and not self._allow_zeros:
  730. x = cp.Variable(nb_cost_mat.shape[1])
  731. cost_fun = cp.sum_squares(nb_cost_mat @ x - dis_k_vec)
  732. constraints = [x >= [0.01 for i in range(nb_cost_mat.shape[1])]]
  733. prob = cp.Problem(cp.Minimize(cost_fun), constraints)
  734. self._execute_cvx(prob)
  735. edit_costs_new = x.value
  736. residual = np.sqrt(prob.value)
  737. elif self._triangle_rule and not self._allow_zeros:
  738. x = cp.Variable(nb_cost_mat.shape[1])
  739. cost_fun = cp.sum_squares(nb_cost_mat @ x - dis_k_vec)
  740. constraints = [x >= [0.01 for i in range(nb_cost_mat.shape[1])],
  741. np.array([1.0, 1.0, -1.0, 0.0, 0.0, 0.0]).T@x >= 0.0,
  742. np.array([0.0, 0.0, 0.0, 1.0, 1.0, -1.0]).T@x >= 0.0]
  743. prob = cp.Problem(cp.Minimize(cost_fun), constraints)
  744. self._execute_cvx(prob)
  745. edit_costs_new = x.value
  746. residual = np.sqrt(prob.value)
  747. else:
  748. raise Exception('The edit cost "', self._ged_options['edit_cost'], '" is not supported for update progress.')
  749. # # method 1: simple least square method.
  750. # edit_costs_new, residual, _, _ = np.linalg.lstsq(nb_cost_mat, dis_k_vec,
  751. # rcond=None)
  752. # # method 2: least square method with x_i >= 0.
  753. # edit_costs_new, residual = optimize.nnls(nb_cost_mat, dis_k_vec)
  754. # method 3: solve as a quadratic program with constraints.
  755. # P = np.dot(nb_cost_mat.T, nb_cost_mat)
  756. # q_T = -2 * np.dot(dis_k_vec.T, nb_cost_mat)
  757. # G = -1 * np.identity(nb_cost_mat.shape[1])
  758. # h = np.array([0 for i in range(nb_cost_mat.shape[1])])
  759. # A = np.array([1 for i in range(nb_cost_mat.shape[1])])
  760. # b = 1
  761. # x = cp.Variable(nb_cost_mat.shape[1])
  762. # prob = cp.Problem(cp.Minimize(cp.quad_form(x, P) + q_T@x),
  763. # [G@x <= h])
  764. # prob.solve()
  765. # edit_costs_new = x.value
  766. # residual = prob.value - np.dot(dis_k_vec.T, dis_k_vec)
  767. # G = -1 * np.identity(nb_cost_mat.shape[1])
  768. # h = np.array([0 for i in range(nb_cost_mat.shape[1])])
  769. x = cp.Variable(nb_cost_mat.shape[1])
  770. cost_fun = cp.sum_squares(nb_cost_mat @ x - dis_k_vec)
  771. constraints = [x >= [0.0 for i in range(nb_cost_mat.shape[1])],
  772. # np.array([1.0, 1.0, -1.0, 0.0, 0.0]).T@x >= 0.0]
  773. np.array([1.0, 1.0, -1.0, 0.0, 0.0, 0.0]).T@x >= 0.0,
  774. np.array([0.0, 0.0, 0.0, 1.0, 1.0, -1.0]).T@x >= 0.0]
  775. prob = cp.Problem(cp.Minimize(cost_fun), constraints)
  776. self._execute_cvx(prob)
  777. edit_costs_new = x.value
  778. residual = np.sqrt(prob.value)
  779. # method 4:
  780. return edit_costs_new, residual
  781. def _execute_cvx(self, prob):
  782. try:
  783. prob.solve(verbose=(self._verbose>=2))
  784. except MemoryError as error0:
  785. if self._verbose >= 2:
  786. print('\nUsing solver "OSQP" caused a memory error.')
  787. print('the original error message is\n', error0)
  788. print('solver status: ', prob.status)
  789. print('trying solver "CVXOPT" instead...\n')
  790. try:
  791. prob.solve(solver=cp.CVXOPT, verbose=(self._verbose>=2))
  792. except Exception as error1:
  793. if self._verbose >= 2:
  794. print('\nAn error occured when using solver "CVXOPT".')
  795. print('the original error message is\n', error1)
  796. print('solver status: ', prob.status)
  797. print('trying solver "MOSEK" instead. Notice this solver is commercial and a lisence is required.\n')
  798. prob.solve(solver=cp.MOSEK, verbose=(self._verbose>=2))
  799. else:
  800. if self._verbose >= 2:
  801. print('solver status: ', prob.status)
  802. else:
  803. if self._verbose >= 2:
  804. print('solver status: ', prob.status)
  805. if self._verbose >= 2:
  806. print()
  807. def _gmg_bcu(self):
  808. """
  809. The local search algorithm based on block coordinate update (BCU) for estimating a generalized median graph (GMG).
  810. Returns
  811. -------
  812. None.
  813. """
  814. # Set up the ged environment.
  815. ged_env = gedlibpy.GEDEnv() # @todo: maybe create a ged_env as a private varible.
  816. # gedlibpy.restart_env()
  817. ged_env.set_edit_cost(self._ged_options['edit_cost'], edit_cost_constant=self._edit_cost_constants)
  818. graphs = [self._clean_graph(g) for g in self._dataset.graphs]
  819. for g in graphs:
  820. ged_env.add_nx_graph(g, '')
  821. graph_ids = ged_env.get_all_graph_ids()
  822. set_median_id = ged_env.add_graph('set_median')
  823. gen_median_id = ged_env.add_graph('gen_median')
  824. ged_env.init(init_option=self._ged_options['init_option'])
  825. # Set up the madian graph estimator.
  826. self._mge = MedianGraphEstimator(ged_env, constant_node_costs(self._ged_options['edit_cost']))
  827. self._mge.set_refine_method(self._ged_options['method'], ged_options_to_string(self._ged_options))
  828. options = self._mge_options.copy()
  829. if not 'seed' in options:
  830. options['seed'] = int(round(time.time() * 1000)) # @todo: may not work correctly for possible parallel usage.
  831. options['parallel'] = self._parallel
  832. # Select the GED algorithm.
  833. self._mge.set_options(mge_options_to_string(options))
  834. self._mge.set_label_names(node_labels=self._dataset.node_labels,
  835. edge_labels=self._dataset.edge_labels,
  836. node_attrs=self._dataset.node_attrs,
  837. edge_attrs=self._dataset.edge_attrs)
  838. ged_options = self._ged_options.copy()
  839. if self._parallel:
  840. ged_options['threads'] = 1
  841. self._mge.set_init_method(ged_options['method'], ged_options_to_string(ged_options))
  842. self._mge.set_descent_method(ged_options['method'], ged_options_to_string(ged_options))
  843. # Run the estimator.
  844. self._mge.run(graph_ids, set_median_id, gen_median_id)
  845. # Get SODs.
  846. self._sod_set_median = self._mge.get_sum_of_distances('initialized')
  847. self._sod_gen_median = self._mge.get_sum_of_distances('converged')
  848. # Get median graphs.
  849. self._set_median = ged_env.get_nx_graph(set_median_id)
  850. self._gen_median = ged_env.get_nx_graph(gen_median_id)
  851. def _compute_distances_to_true_median(self):
  852. # compute distance in kernel space for set median.
  853. kernels_to_sm, _ = self._graph_kernel.compute(self._set_median, self._dataset.graphs, **self._kernel_options)
  854. kernel_sm, _ = self._graph_kernel.compute(self._set_median, self._set_median, **self._kernel_options)
  855. if self._kernel_options['normalize']:
  856. kernels_to_sm = [kernels_to_sm[i] / np.sqrt(self._gram_matrix_unnorm[i, i] * kernel_sm) for i in range(len(kernels_to_sm))] # normalize
  857. kernel_sm = 1
  858. # @todo: not correct kernel value
  859. gram_with_sm = np.concatenate((np.array([kernels_to_sm]), np.copy(self._graph_kernel.gram_matrix)), axis=0)
  860. gram_with_sm = np.concatenate((np.array([[kernel_sm] + kernels_to_sm]).T, gram_with_sm), axis=1)
  861. self._k_dis_set_median = compute_k_dis(0, range(1, 1+len(self._dataset.graphs)),
  862. [1 / len(self._dataset.graphs)] * len(self._dataset.graphs),
  863. gram_with_sm, withterm3=False)
  864. # compute distance in kernel space for generalized median.
  865. kernels_to_gm, _ = self._graph_kernel.compute(self._gen_median, self._dataset.graphs, **self._kernel_options)
  866. kernel_gm, _ = self._graph_kernel.compute(self._gen_median, self._gen_median, **self._kernel_options)
  867. if self._kernel_options['normalize']:
  868. kernels_to_gm = [kernels_to_gm[i] / np.sqrt(self._gram_matrix_unnorm[i, i] * kernel_gm) for i in range(len(kernels_to_gm))] # normalize
  869. kernel_gm = 1
  870. gram_with_gm = np.concatenate((np.array([kernels_to_gm]), np.copy(self._graph_kernel.gram_matrix)), axis=0)
  871. gram_with_gm = np.concatenate((np.array([[kernel_gm] + kernels_to_gm]).T, gram_with_gm), axis=1)
  872. self._k_dis_gen_median = compute_k_dis(0, range(1, 1+len(self._dataset.graphs)),
  873. [1 / len(self._dataset.graphs)] * len(self._dataset.graphs),
  874. gram_with_gm, withterm3=False)
  875. # compute distance in kernel space for each graph in median set.
  876. k_dis_median_set = []
  877. for idx in range(len(self._dataset.graphs)):
  878. k_dis_median_set.append(compute_k_dis(idx+1, range(1, 1+len(self._dataset.graphs)),
  879. [1 / len(self._dataset.graphs)] * len(self._dataset.graphs),
  880. gram_with_gm, withterm3=False))
  881. idx_k_dis_median_set_min = np.argmin(k_dis_median_set)
  882. self._k_dis_dataset = k_dis_median_set[idx_k_dis_median_set_min]
  883. self._best_from_dataset = self._dataset.graphs[idx_k_dis_median_set_min].copy()
  884. if self._verbose >= 2:
  885. print()
  886. print('distance in kernel space for set median:', self._k_dis_set_median)
  887. print('distance in kernel space for generalized median:', self._k_dis_gen_median)
  888. print('minimum distance in kernel space for each graph in median set:', self._k_dis_dataset)
  889. print('distance in kernel space for each graph in median set:', k_dis_median_set)
  890. # def _clean_graph(self, G, node_labels=[], edge_labels=[], node_attrs=[], edge_attrs=[]):
  891. def _clean_graph(self, G): # @todo: this may not be needed when datafile is updated.
  892. """
  893. Cleans node and edge labels and attributes of the given graph.
  894. """
  895. G_new = nx.Graph(**G.graph)
  896. for nd, attrs in G.nodes(data=True):
  897. G_new.add_node(str(nd)) # @todo: should we keep this as str()?
  898. for l_name in self._dataset.node_labels:
  899. G_new.nodes[str(nd)][l_name] = str(attrs[l_name])
  900. for a_name in self._dataset.node_attrs:
  901. G_new.nodes[str(nd)][a_name] = str(attrs[a_name])
  902. for nd1, nd2, attrs in G.edges(data=True):
  903. G_new.add_edge(str(nd1), str(nd2))
  904. for l_name in self._dataset.edge_labels:
  905. G_new.edges[str(nd1), str(nd2)][l_name] = str(attrs[l_name])
  906. for a_name in self._dataset.edge_attrs:
  907. G_new.edges[str(nd1), str(nd2)][a_name] = str(attrs[a_name])
  908. return G_new
  909. @property
  910. def mge(self):
  911. return self._mge
  912. @property
  913. def ged_options(self):
  914. return self._ged_options
  915. @ged_options.setter
  916. def ged_options(self, value):
  917. self._ged_options = value
  918. @property
  919. def mge_options(self):
  920. return self._mge_options
  921. @mge_options.setter
  922. def mge_options(self, value):
  923. self._mge_options = value
  924. @property
  925. def fit_method(self):
  926. return self._fit_method
  927. @fit_method.setter
  928. def fit_method(self, value):
  929. self._fit_method = value
  930. @property
  931. def init_ecc(self):
  932. return self._init_ecc
  933. @init_ecc.setter
  934. def init_ecc(self, value):
  935. self._init_ecc = value
  936. @property
  937. def set_median(self):
  938. return self._set_median
  939. @property
  940. def gen_median(self):
  941. return self._gen_median
  942. @property
  943. def best_from_dataset(self):
  944. return self._best_from_dataset
  945. @property
  946. def gram_matrix_unnorm(self):
  947. return self._gram_matrix_unnorm
  948. @gram_matrix_unnorm.setter
  949. def gram_matrix_unnorm(self, value):
  950. self._gram_matrix_unnorm = value

A Python package for graph kernels, graph edit distances and graph pre-image problem.