You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

median_graph_estimator.py 54 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482
  1. #!/usr/bin/env python3
  2. # -*- coding: utf-8 -*-
  3. """
  4. Created on Mon Mar 16 18:04:55 2020
  5. @author: ljia
  6. """
  7. import numpy as np
  8. from gklearn.ged.env import AlgorithmState, NodeMap
  9. from gklearn.ged.util import misc
  10. from gklearn.utils import Timer
  11. import time
  12. from tqdm import tqdm
  13. import sys
  14. import networkx as nx
  15. class MedianGraphEstimator(object): # @todo: differ dummy_node from undifined node?
  16. def __init__(self, ged_env, constant_node_costs):
  17. """Constructor.
  18. Parameters
  19. ----------
  20. ged_env : gklearn.gedlib.gedlibpy.GEDEnv
  21. Initialized GED environment. The edit costs must be set by the user.
  22. constant_node_costs : Boolean
  23. Set to True if the node relabeling costs are constant.
  24. """
  25. self.__ged_env = ged_env
  26. self.__init_method = 'BRANCH_FAST'
  27. self.__init_options = ''
  28. self.__descent_method = 'BRANCH_FAST'
  29. self.__descent_options = ''
  30. self.__refine_method = 'IPFP'
  31. self.__refine_options = ''
  32. self.__constant_node_costs = constant_node_costs
  33. self.__labeled_nodes = (ged_env.get_num_node_labels() > 1)
  34. self.__node_del_cost = ged_env.get_node_del_cost(ged_env.get_node_label(1))
  35. self.__node_ins_cost = ged_env.get_node_ins_cost(ged_env.get_node_label(1))
  36. self.__labeled_edges = (ged_env.get_num_edge_labels() > 1)
  37. self.__edge_del_cost = ged_env.get_edge_del_cost(ged_env.get_edge_label(1))
  38. self.__edge_ins_cost = ged_env.get_edge_ins_cost(ged_env.get_edge_label(1))
  39. self.__init_type = 'RANDOM'
  40. self.__num_random_inits = 10
  41. self.__desired_num_random_inits = 10
  42. self.__use_real_randomness = True
  43. self.__seed = 0
  44. self.__update_order = True
  45. self.__refine = True
  46. self.__time_limit_in_sec = 0
  47. self.__epsilon = 0.0001
  48. self.__max_itrs = 100
  49. self.__max_itrs_without_update = 3
  50. self.__num_inits_increase_order = 10
  51. self.__init_type_increase_order = 'K-MEANS++'
  52. self.__max_itrs_increase_order = 10
  53. self.__print_to_stdout = 2
  54. self.__median_id = np.inf # @todo: check
  55. self.__median_node_id_prefix = '' # @todo: check
  56. self.__node_maps_from_median = {}
  57. self.__sum_of_distances = 0
  58. self.__best_init_sum_of_distances = np.inf
  59. self.__converged_sum_of_distances = np.inf
  60. self.__runtime = None
  61. self.__runtime_initialized = None
  62. self.__runtime_converged = None
  63. self.__itrs = [] # @todo: check: {} ?
  64. self.__num_decrease_order = 0
  65. self.__num_increase_order = 0
  66. self.__num_converged_descents = 0
  67. self.__state = AlgorithmState.TERMINATED
  68. self.__label_names = {}
  69. if ged_env is None:
  70. raise Exception('The GED environment pointer passed to the constructor of MedianGraphEstimator is null.')
  71. elif not ged_env.is_initialized():
  72. raise Exception('The GED environment is uninitialized. Call gedlibpy.GEDEnv.init() before passing it to the constructor of MedianGraphEstimator.')
  73. def set_options(self, options):
  74. """Sets the options of the estimator.
  75. Parameters
  76. ----------
  77. options : string
  78. String that specifies with which options to run the estimator.
  79. """
  80. self.__set_default_options()
  81. options_map = misc.options_string_to_options_map(options)
  82. for opt_name, opt_val in options_map.items():
  83. if opt_name == 'init-type':
  84. self.__init_type = opt_val
  85. if opt_val != 'MEDOID' and opt_val != 'RANDOM' and opt_val != 'MIN' and opt_val != 'MAX' and opt_val != 'MEAN':
  86. raise Exception('Invalid argument ' + opt_val + ' for option init-type. Usage: options = "[--init-type RANDOM|MEDOID|EMPTY|MIN|MAX|MEAN] [...]"')
  87. elif opt_name == 'random-inits':
  88. try:
  89. self.__num_random_inits = int(opt_val)
  90. self.__desired_num_random_inits = self.__num_random_inits
  91. except:
  92. raise Exception('Invalid argument "' + opt_val + '" for option random-inits. Usage: options = "[--random-inits <convertible to int greater 0>]"')
  93. if self.__num_random_inits <= 0:
  94. raise Exception('Invalid argument "' + opt_val + '" for option random-inits. Usage: options = "[--random-inits <convertible to int greater 0>]"')
  95. elif opt_name == 'randomness':
  96. if opt_val == 'PSEUDO':
  97. self.__use_real_randomness = False
  98. elif opt_val == 'REAL':
  99. self.__use_real_randomness = True
  100. else:
  101. raise Exception('Invalid argument "' + opt_val + '" for option randomness. Usage: options = "[--randomness REAL|PSEUDO] [...]"')
  102. elif opt_name == 'stdout':
  103. if opt_val == '0':
  104. self.__print_to_stdout = 0
  105. elif opt_val == '1':
  106. self.__print_to_stdout = 1
  107. elif opt_val == '2':
  108. self.__print_to_stdout = 2
  109. else:
  110. raise Exception('Invalid argument "' + opt_val + '" for option stdout. Usage: options = "[--stdout 0|1|2] [...]"')
  111. elif opt_name == 'update-order':
  112. if opt_val == 'TRUE':
  113. self.__update_order = True
  114. elif opt_val == 'FALSE':
  115. self.__update_order = False
  116. else:
  117. raise Exception('Invalid argument "' + opt_val + '" for option update-order. Usage: options = "[--update-order TRUE|FALSE] [...]"')
  118. elif opt_name == 'refine':
  119. if opt_val == 'TRUE':
  120. self.__refine = True
  121. elif opt_val == 'FALSE':
  122. self.__refine = False
  123. else:
  124. raise Exception('Invalid argument "' + opt_val + '" for option refine. Usage: options = "[--refine TRUE|FALSE] [...]"')
  125. elif opt_name == 'time-limit':
  126. try:
  127. self.__time_limit_in_sec = float(opt_val)
  128. except:
  129. raise Exception('Invalid argument "' + opt_val + '" for option time-limit. Usage: options = "[--time-limit <convertible to double>] [...]')
  130. elif opt_name == 'max-itrs':
  131. try:
  132. self.__max_itrs = int(opt_val)
  133. except:
  134. raise Exception('Invalid argument "' + opt_val + '" for option max-itrs. Usage: options = "[--max-itrs <convertible to int>] [...]')
  135. elif opt_name == 'max-itrs-without-update':
  136. try:
  137. self.__max_itrs_without_update = int(opt_val)
  138. except:
  139. raise Exception('Invalid argument "' + opt_val + '" for option max-itrs-without-update. Usage: options = "[--max-itrs-without-update <convertible to int>] [...]')
  140. elif opt_name == 'seed':
  141. try:
  142. self.__seed = int(opt_val)
  143. except:
  144. raise Exception('Invalid argument "' + opt_val + '" for option seed. Usage: options = "[--seed <convertible to int greater equal 0>] [...]')
  145. elif opt_name == 'epsilon':
  146. try:
  147. self.__epsilon = float(opt_val)
  148. except:
  149. raise Exception('Invalid argument "' + opt_val + '" for option epsilon. Usage: options = "[--epsilon <convertible to double greater 0>] [...]')
  150. if self.__epsilon <= 0:
  151. raise Exception('Invalid argument "' + opt_val + '" for option epsilon. Usage: options = "[--epsilon <convertible to double greater 0>] [...]')
  152. elif opt_name == 'inits-increase-order':
  153. try:
  154. self.__num_inits_increase_order = int(opt_val)
  155. except:
  156. raise Exception('Invalid argument "' + opt_val + '" for option inits-increase-order. Usage: options = "[--inits-increase-order <convertible to int greater 0>]"')
  157. if self.__num_inits_increase_order <= 0:
  158. raise Exception('Invalid argument "' + opt_val + '" for option inits-increase-order. Usage: options = "[--inits-increase-order <convertible to int greater 0>]"')
  159. elif opt_name == 'init-type-increase-order':
  160. self.__init_type_increase_order = opt_val
  161. if opt_val != 'CLUSTERS' and opt_val != 'K-MEANS++':
  162. raise Exception('Invalid argument ' + opt_val + ' for option init-type-increase-order. Usage: options = "[--init-type-increase-order CLUSTERS|K-MEANS++] [...]"')
  163. elif opt_name == 'max-itrs-increase-order':
  164. try:
  165. self.__max_itrs_increase_order = int(opt_val)
  166. except:
  167. raise Exception('Invalid argument "' + opt_val + '" for option max-itrs-increase-order. Usage: options = "[--max-itrs-increase-order <convertible to int>] [...]')
  168. else:
  169. valid_options = '[--init-type <arg>] [--random-inits <arg>] [--randomness <arg>] [--seed <arg>] [--stdout <arg>] '
  170. valid_options += '[--time-limit <arg>] [--max-itrs <arg>] [--epsilon <arg>] '
  171. valid_options += '[--inits-increase-order <arg>] [--init-type-increase-order <arg>] [--max-itrs-increase-order <arg>]'
  172. raise Exception('Invalid option "' + opt_name + '". Usage: options = "' + valid_options + '"')
  173. def set_init_method(self, init_method, init_options=''):
  174. """Selects method to be used for computing the initial medoid graph.
  175. Parameters
  176. ----------
  177. init_method : string
  178. The selected method. Default: ged::Options::GEDMethod::BRANCH_UNIFORM.
  179. init_options : string
  180. The options for the selected method. Default: "".
  181. Notes
  182. -----
  183. Has no effect unless "--init-type MEDOID" is passed to set_options().
  184. """
  185. self.__init_method = init_method;
  186. self.__init_options = init_options;
  187. def set_descent_method(self, descent_method, descent_options=''):
  188. """Selects method to be used for block gradient descent..
  189. Parameters
  190. ----------
  191. descent_method : string
  192. The selected method. Default: ged::Options::GEDMethod::BRANCH_FAST.
  193. descent_options : string
  194. The options for the selected method. Default: "".
  195. Notes
  196. -----
  197. Has no effect unless "--init-type MEDOID" is passed to set_options().
  198. """
  199. self.__descent_method = descent_method;
  200. self.__descent_options = descent_options;
  201. def set_refine_method(self, refine_method, refine_options):
  202. """Selects method to be used for improving the sum of distances and the node maps for the converged median.
  203. Parameters
  204. ----------
  205. refine_method : string
  206. The selected method. Default: "IPFP".
  207. refine_options : string
  208. The options for the selected method. Default: "".
  209. Notes
  210. -----
  211. Has no effect if "--refine FALSE" is passed to set_options().
  212. """
  213. self.__refine_method = refine_method
  214. self.__refine_options = refine_options
  215. def run(self, graph_ids, set_median_id, gen_median_id):
  216. """Computes a generalized median graph.
  217. Parameters
  218. ----------
  219. graph_ids : list[integer]
  220. The IDs of the graphs for which the median should be computed. Must have been added to the environment passed to the constructor.
  221. set_median_id : integer
  222. The ID of the computed set-median. A dummy graph with this ID must have been added to the environment passed to the constructor. Upon termination, the computed median can be obtained via gklearn.gedlib.gedlibpy.GEDEnv.get_graph().
  223. gen_median_id : integer
  224. The ID of the computed generalized median. Upon termination, the computed median can be obtained via gklearn.gedlib.gedlibpy.GEDEnv.get_graph().
  225. """
  226. # Sanity checks.
  227. if len(graph_ids) == 0:
  228. raise Exception('Empty vector of graph IDs, unable to compute median.')
  229. all_graphs_empty = True
  230. for graph_id in graph_ids:
  231. if self.__ged_env.get_graph_num_nodes(graph_id) > 0:
  232. self.__median_node_id_prefix = self.__ged_env.get_original_node_ids(graph_id)[0]
  233. all_graphs_empty = False
  234. break
  235. if all_graphs_empty:
  236. raise Exception('All graphs in the collection are empty.')
  237. # Start timer and record start time.
  238. start = time.time()
  239. timer = Timer(self.__time_limit_in_sec)
  240. self.__median_id = gen_median_id
  241. self.__state = AlgorithmState.TERMINATED
  242. # Get ExchangeGraph representations of the input graphs.
  243. graphs = {}
  244. for graph_id in graph_ids:
  245. # @todo: get_nx_graph() function may need to be modified according to the coming code.
  246. graphs[graph_id] = self.__ged_env.get_nx_graph(graph_id, True, True, False)
  247. # print(self.__ged_env.get_graph_internal_id(0))
  248. # print(graphs[0].graph)
  249. # print(graphs[0].nodes(data=True))
  250. # print(graphs[0].edges(data=True))
  251. # print(nx.adjacency_matrix(graphs[0]))
  252. # Construct initial medians.
  253. medians = []
  254. self.__construct_initial_medians(graph_ids, timer, medians)
  255. end_init = time.time()
  256. self.__runtime_initialized = end_init - start
  257. # print(medians[0].graph)
  258. # print(medians[0].nodes(data=True))
  259. # print(medians[0].edges(data=True))
  260. # print(nx.adjacency_matrix(medians[0]))
  261. # Reset information about iterations and number of times the median decreases and increases.
  262. self.__itrs = [0] * len(medians)
  263. self.__num_decrease_order = 0
  264. self.__num_increase_order = 0
  265. self.__num_converged_descents = 0
  266. # Initialize the best median.
  267. best_sum_of_distances = np.inf
  268. self.__best_init_sum_of_distances = np.inf
  269. node_maps_from_best_median = {}
  270. # Run block gradient descent from all initial medians.
  271. self.__ged_env.set_method(self.__descent_method, self.__descent_options)
  272. for median_pos in range(0, len(medians)):
  273. # Terminate if the timer has expired and at least one SOD has been computed.
  274. if timer.expired() and median_pos > 0:
  275. break
  276. # Print information about current iteration.
  277. if self.__print_to_stdout == 2:
  278. print('\n===========================================================')
  279. print('Block gradient descent for initial median', str(median_pos + 1), 'of', str(len(medians)), '.')
  280. print('-----------------------------------------------------------')
  281. # Get reference to the median.
  282. median = medians[median_pos]
  283. # Load initial median into the environment.
  284. self.__ged_env.load_nx_graph(median, gen_median_id)
  285. self.__ged_env.init(self.__ged_env.get_init_type())
  286. # Print information about current iteration.
  287. if self.__print_to_stdout == 2:
  288. progress = tqdm(desc='Computing initial node maps', total=len(graph_ids), file=sys.stdout)
  289. # Compute node maps and sum of distances for initial median.
  290. self.__sum_of_distances = 0
  291. self.__node_maps_from_median.clear()
  292. for graph_id in graph_ids:
  293. self.__ged_env.run_method(gen_median_id, graph_id)
  294. self.__node_maps_from_median[graph_id] = self.__ged_env.get_node_map(gen_median_id, graph_id)
  295. # print(self.__node_maps_from_median[graph_id])
  296. self.__sum_of_distances += self.__node_maps_from_median[graph_id].induced_cost()
  297. # print(self.__sum_of_distances)
  298. # Print information about current iteration.
  299. if self.__print_to_stdout == 2:
  300. progress.update(1)
  301. self.__best_init_sum_of_distances = min(self.__best_init_sum_of_distances, self.__sum_of_distances)
  302. self.__ged_env.load_nx_graph(median, set_median_id)
  303. # print(self.__best_init_sum_of_distances)
  304. # Print information about current iteration.
  305. if self.__print_to_stdout == 2:
  306. print('\n')
  307. # Run block gradient descent from initial median.
  308. converged = False
  309. itrs_without_update = 0
  310. while not self.__termination_criterion_met(converged, timer, self.__itrs[median_pos], itrs_without_update):
  311. # Print information about current iteration.
  312. if self.__print_to_stdout == 2:
  313. print('\n===========================================================')
  314. print('Iteration', str(self.__itrs[median_pos] + 1), 'for initial median', str(median_pos + 1), 'of', str(len(medians)), '.')
  315. print('-----------------------------------------------------------')
  316. # Initialize flags that tell us what happened in the iteration.
  317. median_modified = False
  318. node_maps_modified = False
  319. decreased_order = False
  320. increased_order = False
  321. # Update the median.
  322. median_modified = self.__update_median(graphs, median)
  323. if self.__update_order:
  324. if not median_modified or self.__itrs[median_pos] == 0:
  325. decreased_order = self.__decrease_order(graphs, median)
  326. if not decreased_order or self.__itrs[median_pos] == 0:
  327. increased_order = self.__increase_order(graphs, median)
  328. # Update the number of iterations without update of the median.
  329. if median_modified or decreased_order or increased_order:
  330. itrs_without_update = 0
  331. else:
  332. itrs_without_update += 1
  333. # Print information about current iteration.
  334. if self.__print_to_stdout == 2:
  335. print('Loading median to environment: ... ', end='')
  336. # Load the median into the environment.
  337. # @todo: should this function use the original node label?
  338. self.__ged_env.load_nx_graph(median, gen_median_id)
  339. self.__ged_env.init(self.__ged_env.get_init_type())
  340. # Print information about current iteration.
  341. if self.__print_to_stdout == 2:
  342. print('done.')
  343. # Print information about current iteration.
  344. if self.__print_to_stdout == 2:
  345. print('Updating induced costs: ... ', end='')
  346. # Compute induced costs of the old node maps w.r.t. the updated median.
  347. for graph_id in graph_ids:
  348. # print(self.__node_maps_from_median[graph_id].induced_cost())
  349. # xxx = self.__node_maps_from_median[graph_id]
  350. self.__ged_env.compute_induced_cost(gen_median_id, graph_id, self.__node_maps_from_median[graph_id])
  351. # print('---------------------------------------')
  352. # print(self.__node_maps_from_median[graph_id].induced_cost())
  353. # @todo:!!!!!!!!!!!!!!!!!!!!!!!!!!!!This value is a slight different from the c++ program, which might be a bug! Use it very carefully!
  354. # Print information about current iteration.
  355. if self.__print_to_stdout == 2:
  356. print('done.')
  357. # Update the node maps.
  358. node_maps_modified = self.__update_node_maps()
  359. # Update the order of the median if no improvement can be found with the current order.
  360. # Update the sum of distances.
  361. old_sum_of_distances = self.__sum_of_distances
  362. self.__sum_of_distances = 0
  363. for graph_id, node_map in self.__node_maps_from_median.items():
  364. self.__sum_of_distances += node_map.induced_cost()
  365. # print(self.__sum_of_distances)
  366. # Print information about current iteration.
  367. if self.__print_to_stdout == 2:
  368. print('Old local SOD: ', old_sum_of_distances)
  369. print('New local SOD: ', self.__sum_of_distances)
  370. print('Best converged SOD: ', best_sum_of_distances)
  371. print('Modified median: ', median_modified)
  372. print('Modified node maps: ', node_maps_modified)
  373. print('Decreased order: ', decreased_order)
  374. print('Increased order: ', increased_order)
  375. print('===========================================================\n')
  376. converged = not (median_modified or node_maps_modified or decreased_order or increased_order)
  377. self.__itrs[median_pos] += 1
  378. # Update the best median.
  379. if self.__sum_of_distances < best_sum_of_distances:
  380. best_sum_of_distances = self.__sum_of_distances
  381. node_maps_from_best_median = self.__node_maps_from_median.copy() # @todo: this is a shallow copy, not sure if it is enough.
  382. best_median = median
  383. # Update the number of converged descents.
  384. if converged:
  385. self.__num_converged_descents += 1
  386. # Store the best encountered median.
  387. self.__sum_of_distances = best_sum_of_distances
  388. self.__node_maps_from_median = node_maps_from_best_median
  389. self.__ged_env.load_nx_graph(best_median, gen_median_id)
  390. self.__ged_env.init(self.__ged_env.get_init_type())
  391. end_descent = time.time()
  392. self.__runtime_converged = end_descent - start
  393. # Refine the sum of distances and the node maps for the converged median.
  394. self.__converged_sum_of_distances = self.__sum_of_distances
  395. if self.__refine:
  396. self.__improve_sum_of_distances(timer)
  397. # Record end time, set runtime and reset the number of initial medians.
  398. end = time.time()
  399. self.__runtime = end - start
  400. self.__num_random_inits = self.__desired_num_random_inits
  401. # Print global information.
  402. if self.__print_to_stdout != 0:
  403. print('\n===========================================================')
  404. print('Finished computation of generalized median graph.')
  405. print('-----------------------------------------------------------')
  406. print('Best SOD after initialization: ', self.__best_init_sum_of_distances)
  407. print('Converged SOD: ', self.__converged_sum_of_distances)
  408. if self.__refine:
  409. print('Refined SOD: ', self.__sum_of_distances)
  410. print('Overall runtime: ', self.__runtime)
  411. print('Runtime of initialization: ', self.__runtime_initialized)
  412. print('Runtime of block gradient descent: ', self.__runtime_converged - self.__runtime_initialized)
  413. if self.__refine:
  414. print('Runtime of refinement: ', self.__runtime - self.__runtime_converged)
  415. print('Number of initial medians: ', len(medians))
  416. total_itr = 0
  417. num_started_descents = 0
  418. for itr in self.__itrs:
  419. total_itr += itr
  420. if itr > 0:
  421. num_started_descents += 1
  422. print('Size of graph collection: ', len(graph_ids))
  423. print('Number of started descents: ', num_started_descents)
  424. print('Number of converged descents: ', self.__num_converged_descents)
  425. print('Overall number of iterations: ', total_itr)
  426. print('Overall number of times the order decreased: ', self.__num_decrease_order)
  427. print('Overall number of times the order increased: ', self.__num_increase_order)
  428. print('===========================================================\n')
  429. def __improve_sum_of_distances(self, timer): # @todo: go through and test
  430. # Use method selected for refinement phase.
  431. self.__ged_env.set_method(self.__refine_method, self.__refine_options)
  432. # Print information about current iteration.
  433. if self.__print_to_stdout == 2:
  434. progress = tqdm(desc='Improving node maps', total=len(self.__node_maps_from_median), file=sys.stdout)
  435. print('\n===========================================================')
  436. print('Improving node maps and SOD for converged median.')
  437. print('-----------------------------------------------------------')
  438. progress.update(1)
  439. # Improving the node maps.
  440. for graph_id, node_map in self.__node_maps_from_median.items():
  441. if time.expired():
  442. if self.__state == AlgorithmState.TERMINATED:
  443. self.__state = AlgorithmState.CONVERGED
  444. break
  445. self.__ged_env.run_method(self.__gen_median_id, graph_id)
  446. if self.__ged_env.get_upper_bound(self.__gen_median_id, graph_id) < node_map.induced_cost():
  447. self.__node_maps_from_median[graph_id] = self.__ged_env.get_node_map(self.__gen_median_id, graph_id)
  448. self.__sum_of_distances += self.__node_maps_from_median[graph_id].induced_cost()
  449. # Print information.
  450. if self.__print_to_stdout == 2:
  451. progress.update(1)
  452. self.__sum_of_distances = 0.0
  453. for key, val in self.__node_maps_from_median.items():
  454. self.__sum_of_distances += val.induced_cost()
  455. # Print information.
  456. if self.__print_to_stdout == 2:
  457. print('===========================================================\n')
  458. def __median_available(self):
  459. return self.__gen_median_id != np.inf
  460. def get_state(self):
  461. if not self.__median_available():
  462. raise Exception('No median has been computed. Call run() before calling get_state().')
  463. return self.__state
  464. def get_sum_of_distances(self, state=''):
  465. """Returns the sum of distances.
  466. Parameters
  467. ----------
  468. state : string
  469. The state of the estimator. Can be 'initialized' or 'converged'. Default: ""
  470. Returns
  471. -------
  472. float
  473. The sum of distances (SOD) of the median when the estimator was in the state `state` during the last call to run(). If `state` is not given, the converged SOD (without refinement) or refined SOD (with refinement) is returned.
  474. """
  475. if not self.__median_available():
  476. raise Exception('No median has been computed. Call run() before calling get_sum_of_distances().')
  477. if state == 'initialized':
  478. return self.__best_init_sum_of_distances
  479. if state == 'converged':
  480. return self.__converged_sum_of_distances
  481. return self.__sum_of_distances
  482. def get_runtime(self, state):
  483. if not self.__median_available():
  484. raise Exception('No median has been computed. Call run() before calling get_runtime().')
  485. if state == AlgorithmState.INITIALIZED:
  486. return self.__runtime_initialized
  487. if state == AlgorithmState.CONVERGED:
  488. return self.__runtime_converged
  489. return self.__runtime
  490. def get_num_itrs(self):
  491. if not self.__median_available():
  492. raise Exception('No median has been computed. Call run() before calling get_num_itrs().')
  493. return self.__itrs
  494. def get_num_times_order_decreased(self):
  495. if not self.__median_available():
  496. raise Exception('No median has been computed. Call run() before calling get_num_times_order_decreased().')
  497. return self.__num_decrease_order
  498. def get_num_times_order_increased(self):
  499. if not self.__median_available():
  500. raise Exception('No median has been computed. Call run() before calling get_num_times_order_increased().')
  501. return self.__num_increase_order
  502. def get_num_converged_descents(self):
  503. if not self.__median_available():
  504. raise Exception('No median has been computed. Call run() before calling get_num_converged_descents().')
  505. return self.__num_converged_descents
  506. def get_ged_env(self):
  507. return self.__ged_env
  508. def __set_default_options(self):
  509. self.__init_type = 'RANDOM'
  510. self.__num_random_inits = 10
  511. self.__desired_num_random_inits = 10
  512. self.__use_real_randomness = True
  513. self.__seed = 0
  514. self.__update_order = True
  515. self.__refine = True
  516. self.__time_limit_in_sec = 0
  517. self.__epsilon = 0.0001
  518. self.__max_itrs = 100
  519. self.__max_itrs_without_update = 3
  520. self.__num_inits_increase_order = 10
  521. self.__init_type_increase_order = 'K-MEANS++'
  522. self.__max_itrs_increase_order = 10
  523. self.__print_to_stdout = 2
  524. self.__label_names = {}
  525. def __construct_initial_medians(self, graph_ids, timer, initial_medians):
  526. # Print information about current iteration.
  527. if self.__print_to_stdout == 2:
  528. print('\n===========================================================')
  529. print('Constructing initial median(s).')
  530. print('-----------------------------------------------------------')
  531. # Compute or sample the initial median(s).
  532. initial_medians.clear()
  533. if self.__init_type == 'MEDOID':
  534. self.__compute_medoid(graph_ids, timer, initial_medians)
  535. elif self.__init_type == 'MAX':
  536. pass # @todo
  537. # compute_max_order_graph_(graph_ids, initial_medians)
  538. elif self.__init_type == 'MIN':
  539. pass # @todo
  540. # compute_min_order_graph_(graph_ids, initial_medians)
  541. elif self.__init_type == 'MEAN':
  542. pass # @todo
  543. # compute_mean_order_graph_(graph_ids, initial_medians)
  544. else:
  545. pass # @todo
  546. # sample_initial_medians_(graph_ids, initial_medians)
  547. # Print information about current iteration.
  548. if self.__print_to_stdout == 2:
  549. print('===========================================================')
  550. def __compute_medoid(self, graph_ids, timer, initial_medians):
  551. # Use method selected for initialization phase.
  552. self.__ged_env.set_method(self.__init_method, self.__init_options)
  553. # Print information about current iteration.
  554. if self.__print_to_stdout == 2:
  555. progress = tqdm(desc='Computing medoid', total=len(graph_ids), file=sys.stdout)
  556. # Compute the medoid.
  557. medoid_id = graph_ids[0]
  558. best_sum_of_distances = np.inf
  559. for g_id in graph_ids:
  560. if timer.expired():
  561. self.__state = AlgorithmState.CALLED
  562. break
  563. sum_of_distances = 0
  564. for h_id in graph_ids:
  565. self.__ged_env.run_method(g_id, h_id)
  566. sum_of_distances += self.__ged_env.get_upper_bound(g_id, h_id)
  567. if sum_of_distances < best_sum_of_distances:
  568. best_sum_of_distances = sum_of_distances
  569. medoid_id = g_id
  570. # Print information about current iteration.
  571. if self.__print_to_stdout == 2:
  572. progress.update(1)
  573. initial_medians.append(self.__ged_env.get_nx_graph(medoid_id, True, True, False)) # @todo
  574. # Print information about current iteration.
  575. if self.__print_to_stdout == 2:
  576. print('\n')
  577. def __termination_criterion_met(self, converged, timer, itr, itrs_without_update):
  578. if timer.expired() or (itr >= self.__max_itrs if self.__max_itrs >= 0 else False):
  579. if self.__state == AlgorithmState.TERMINATED:
  580. self.__state = AlgorithmState.INITIALIZED
  581. return True
  582. return converged or (itrs_without_update > self.__max_itrs_without_update if self.__max_itrs_without_update >= 0 else False)
  583. def __update_median(self, graphs, median):
  584. # Print information about current iteration.
  585. if self.__print_to_stdout == 2:
  586. print('Updating median: ', end='')
  587. # Store copy of the old median.
  588. old_median = median.copy() # @todo: this is just a shallow copy.
  589. # Update the node labels.
  590. if self.__labeled_nodes:
  591. self.__update_node_labels(graphs, median)
  592. # Update the edges and their labels.
  593. self.__update_edges(graphs, median)
  594. # Print information about current iteration.
  595. if self.__print_to_stdout == 2:
  596. print('done.')
  597. return not self.__are_graphs_equal(median, old_median)
  598. def __update_node_labels(self, graphs, median):
  599. # Print information about current iteration.
  600. if self.__print_to_stdout == 2:
  601. print('nodes ... ', end='')
  602. # Iterate through all nodes of the median.
  603. for i in range(0, nx.number_of_nodes(median)):
  604. # print('i: ', i)
  605. # Collect the labels of the substituted nodes.
  606. node_labels = []
  607. for graph_id, graph in graphs.items():
  608. # print('graph_id: ', graph_id)
  609. # print(self.__node_maps_from_median[graph_id])
  610. k = self.__node_maps_from_median[graph_id].image(i)
  611. # print('k: ', k)
  612. if k != np.inf:
  613. node_labels.append(graph.nodes[k])
  614. # Compute the median label and update the median.
  615. if len(node_labels) > 0:
  616. # median_label = self.__ged_env.get_median_node_label(node_labels)
  617. median_label = self.__get_median_node_label(node_labels)
  618. if self.__ged_env.get_node_rel_cost(median.nodes[i], median_label) > self.__epsilon:
  619. nx.set_node_attributes(median, {i: median_label})
  620. def __update_edges(self, graphs, median):
  621. # Print information about current iteration.
  622. if self.__print_to_stdout == 2:
  623. print('edges ... ', end='')
  624. # # Clear the adjacency lists of the median and reset number of edges to 0.
  625. # median_edges = list(median.edges)
  626. # for (head, tail) in median_edges:
  627. # median.remove_edge(head, tail)
  628. # @todo: what if edge is not labeled?
  629. # Iterate through all possible edges (i,j) of the median.
  630. for i in range(0, nx.number_of_nodes(median)):
  631. for j in range(i + 1, nx.number_of_nodes(median)):
  632. # Collect the labels of the edges to which (i,j) is mapped by the node maps.
  633. edge_labels = []
  634. for graph_id, graph in graphs.items():
  635. k = self.__node_maps_from_median[graph_id].image(i)
  636. l = self.__node_maps_from_median[graph_id].image(j)
  637. if k != np.inf and l != np.inf:
  638. if graph.has_edge(k, l):
  639. edge_labels.append(graph.edges[(k, l)])
  640. # Compute the median edge label and the overall edge relabeling cost.
  641. rel_cost = 0
  642. median_label = self.__ged_env.get_edge_label(1)
  643. if median.has_edge(i, j):
  644. median_label = median.edges[(i, j)]
  645. if self.__labeled_edges and len(edge_labels) > 0:
  646. new_median_label = self.__get_median_edge_label(edge_labels)
  647. if self.__ged_env.get_edge_rel_cost(median_label, new_median_label) > self.__epsilon:
  648. median_label = new_median_label
  649. for edge_label in edge_labels:
  650. rel_cost += self.__ged_env.get_edge_rel_cost(median_label, edge_label)
  651. # Update the median.
  652. if median.has_edge(i, j):
  653. median.remove_edge(i, j)
  654. if rel_cost < (self.__edge_ins_cost + self.__edge_del_cost) * len(edge_labels) - self.__edge_del_cost * len(graphs):
  655. median.add_edge(i, j, **median_label)
  656. # else:
  657. # if median.has_edge(i, j):
  658. # median.remove_edge(i, j)
  659. def __update_node_maps(self):
  660. # Print information about current iteration.
  661. if self.__print_to_stdout == 2:
  662. progress = tqdm(desc='Updating node maps', total=len(self.__node_maps_from_median), file=sys.stdout)
  663. # Update the node maps.
  664. node_maps_were_modified = False
  665. for graph_id, node_map in self.__node_maps_from_median.items():
  666. self.__ged_env.run_method(self.__median_id, graph_id)
  667. if self.__ged_env.get_upper_bound(self.__median_id, graph_id) < node_map.induced_cost() - self.__epsilon:
  668. # xxx = self.__node_maps_from_median[graph_id]
  669. self.__node_maps_from_median[graph_id] = self.__ged_env.get_node_map(self.__median_id, graph_id)
  670. # yyy = self.__node_maps_from_median[graph_id]
  671. node_maps_were_modified = True
  672. # Print information about current iteration.
  673. if self.__print_to_stdout == 2:
  674. progress.update(1)
  675. # Print information about current iteration.
  676. if self.__print_to_stdout == 2:
  677. print('\n')
  678. # Return true if the node maps were modified.
  679. return node_maps_were_modified
  680. def __decrease_order(self, graphs, median):
  681. # Print information about current iteration
  682. if self.__print_to_stdout == 2:
  683. print('Trying to decrease order: ... ', end='')
  684. # Initialize ID of the node that is to be deleted.
  685. id_deleted_node = [None] # @todo: or np.inf
  686. decreased_order = False
  687. # Decrease the order as long as the best deletion delta is negative.
  688. while self.__compute_best_deletion_delta(graphs, median, id_deleted_node) < -self.__epsilon:
  689. decreased_order = True
  690. median = self.__delete_node_from_median(id_deleted_node[0], median)
  691. # Print information about current iteration.
  692. if self.__print_to_stdout == 2:
  693. print('done.')
  694. # Return true iff the order was decreased.
  695. return decreased_order
  696. def __compute_best_deletion_delta(self, graphs, median, id_deleted_node):
  697. best_delta = 0.0
  698. # Determine node that should be deleted (if any).
  699. for i in range(0, nx.number_of_nodes(median)):
  700. # Compute cost delta.
  701. delta = 0.0
  702. for graph_id, graph in graphs.items():
  703. k = self.__node_maps_from_median[graph_id].image(i)
  704. if k == np.inf:
  705. delta -= self.__node_del_cost
  706. else:
  707. delta += self.__node_ins_cost - self.__ged_env.get_node_rel_cost(median.nodes[i], graph.nodes[k])
  708. for j, j_label in median[i].items():
  709. l = self.__node_maps_from_median[graph_id].image(j)
  710. if k == np.inf or l == np.inf:
  711. delta -= self.__edge_del_cost
  712. elif not graph.has_edge(k, l):
  713. delta -= self.__edge_del_cost
  714. else:
  715. delta += self.__edge_ins_cost - self.__ged_env.get_edge_rel_cost(j_label, graph.edges[(k, l)])
  716. # Update best deletion delta.
  717. if delta < best_delta - self.__epsilon:
  718. best_delta = delta
  719. id_deleted_node[0] = i
  720. # id_deleted_node[0] = 3 # @todo:
  721. return best_delta
  722. def __delete_node_from_median(self, id_deleted_node, median):
  723. # Update the median.
  724. median.remove_node(id_deleted_node)
  725. median = nx.convert_node_labels_to_integers(median, first_label=0, ordering='default', label_attribute=None) # @todo: This doesn't guarantee that the order is the same as in G.
  726. # Update the node maps.
  727. for key, node_map in self.__node_maps_from_median.items():
  728. new_node_map = NodeMap(nx.number_of_nodes(median), node_map.num_target_nodes())
  729. is_unassigned_target_node = [True] * node_map.num_target_nodes()
  730. for i in range(0, nx.number_of_nodes(median) + 1):
  731. if i != id_deleted_node:
  732. new_i = (i if i < id_deleted_node else i - 1)
  733. k = node_map.image(i)
  734. new_node_map.add_assignment(new_i, k)
  735. if k != np.inf:
  736. is_unassigned_target_node[k] = False
  737. for k in range(0, node_map.num_target_nodes()):
  738. if is_unassigned_target_node[k]:
  739. new_node_map.add_assignment(np.inf, k)
  740. # print(new_node_map.get_forward_map(), new_node_map.get_backward_map())
  741. self.__node_maps_from_median[key] = new_node_map
  742. # Increase overall number of decreases.
  743. self.__num_decrease_order += 1
  744. return median
  745. def __increase_order(self, graphs, median):
  746. # Print information about current iteration.
  747. if self.__print_to_stdout == 2:
  748. print('Trying to increase order: ... ', end='')
  749. # Initialize the best configuration and the best label of the node that is to be inserted.
  750. best_config = {}
  751. best_label = self.__ged_env.get_node_label(1)
  752. increased_order = False
  753. # Increase the order as long as the best insertion delta is negative.
  754. while self.__compute_best_insertion_delta(graphs, best_config, best_label) < - self.__epsilon: # @todo
  755. increased_order = True
  756. self.__add_node_to_median(best_config, best_label, median)
  757. # Print information about current iteration.
  758. if self.__print_to_stdout == 2:
  759. print('done.')
  760. # Return true iff the order was increased.
  761. return increased_order
  762. def __compute_best_insertion_delta(self, graphs, best_config, best_label):
  763. # Construct sets of inserted nodes.
  764. no_inserted_node = True
  765. inserted_nodes = {}
  766. for graph_id, graph in graphs.items():
  767. inserted_nodes[graph_id] = []
  768. best_config[graph_id] = np.inf
  769. for k in range(nx.number_of_nodes(graph)):
  770. if self.__node_maps_from_median[graph_id].pre_image(k) == np.inf:
  771. no_inserted_node = False
  772. inserted_nodes[graph_id].append((k, tuple(item for item in graph.nodes[k].items()))) # @todo: can order of label names be garantteed?
  773. # Return 0.0 if no node is inserted in any of the graphs.
  774. if no_inserted_node:
  775. return 0.0
  776. # Compute insertion configuration, label, and delta.
  777. best_delta = 0.0 # @todo
  778. if len(self.__label_names['node_labels']) == 0 and len(self.__label_names['node_attrs']) == 0: # @todo
  779. best_delta = self.__compute_insertion_delta_unlabeled(inserted_nodes, best_config, best_label)
  780. elif self.__constant_node_costs:
  781. best_delta = self.__compute_insertion_delta_constant(inserted_nodes, best_config, best_label)
  782. else:
  783. best_delta = self.__compute_insertion_delta_generic(inserted_nodes, best_config, best_label)
  784. # Return the best delta.
  785. return best_delta
  786. def __compute_insertion_delta_unlabeled(self, inserted_nodes, best_config, best_label):
  787. # Construct the nest configuration and compute its insertion delta.
  788. best_delta = 0.0
  789. best_config.clear()
  790. for graph_id, node_set in inserted_nodes.items():
  791. if len(node_set) == 0:
  792. best_config[graph_id] = np.inf
  793. best_delta += self.__node_del_cost
  794. else:
  795. best_config[graph_id] = node_set[0][0]
  796. best_delta -= self.__node_ins_cost
  797. # Return the best insertion delta.
  798. return best_delta
  799. def __compute_insertion_delta_constant(self, inserted_nodes, best_config, best_label):
  800. # Construct histogram and inverse label maps.
  801. hist = {}
  802. inverse_label_maps = {}
  803. for graph_id, node_set in inserted_nodes.items():
  804. inverse_label_maps[graph_id] = {}
  805. for node in node_set:
  806. k = node[0]
  807. label = node[1]
  808. if label not in inverse_label_maps[graph_id]:
  809. inverse_label_maps[graph_id][label] = k
  810. if label not in hist:
  811. hist[label] = 1
  812. else:
  813. hist[label] += 1
  814. # Determine the best label.
  815. best_count = 0
  816. for key, val in hist.items():
  817. if val > best_count:
  818. best_count = val
  819. best_label_tuple = key
  820. # get best label.
  821. best_label.clear()
  822. for key, val in best_label_tuple:
  823. best_label[key] = val
  824. # Construct the best configuration and compute its insertion delta.
  825. best_config.clear()
  826. best_delta = 0.0
  827. node_rel_cost = self.__ged_env.get_node_rel_cost(self.__ged_env.get_node_label(1), self.__ged_env.get_node_label(2))
  828. triangle_ineq_holds = (node_rel_cost <= self.__node_del_cost + self.__node_ins_cost)
  829. for graph_id, _ in inserted_nodes.items():
  830. if best_label_tuple in inverse_label_maps[graph_id]:
  831. best_config[graph_id] = inverse_label_maps[graph_id][best_label_tuple]
  832. best_delta -= self.__node_ins_cost
  833. elif triangle_ineq_holds and not len(inserted_nodes[graph_id]) == 0:
  834. best_config[graph_id] = inserted_nodes[graph_id][0][0]
  835. best_delta += node_rel_cost - self.__node_ins_cost
  836. else:
  837. best_config[graph_id] = np.inf
  838. best_delta += self.__node_del_cost
  839. # Return the best insertion delta.
  840. return best_delta
  841. def __compute_insertion_delta_generic(self, inserted_nodes, best_config, best_label):
  842. # Collect all node labels of inserted nodes.
  843. node_labels = []
  844. for _, node_set in inserted_nodes.items():
  845. for node in node_set:
  846. node_labels.append(node[1])
  847. # Compute node label medians that serve as initial solutions for block gradient descent.
  848. initial_node_labels = []
  849. self.__compute_initial_node_labels(node_labels, initial_node_labels)
  850. # Determine best insertion configuration, label, and delta via parallel block gradient descent from all initial node labels.
  851. best_delta = 0.0
  852. for node_label in initial_node_labels:
  853. # Construct local configuration.
  854. config = {}
  855. for graph_id, _ in inserted_nodes.items():
  856. config[graph_id] = tuple((np.inf, self.__ged_env.get_node_label(1)))
  857. # Run block gradient descent.
  858. converged = False
  859. itr = 0
  860. while not self.__insertion_termination_criterion_met(converged, itr):
  861. converged = not self.__update_config_(node_label, inserted_nodes, config, node_labels)
  862. converged = converged and (not self.__update_node_label(node_labels, node_label))
  863. itr += 1
  864. # Compute insertion delta of converged solution.
  865. delta = 0.0
  866. for _, node in config.items():
  867. if node[0] == np.inf:
  868. delta += self.__node_del_cost
  869. else:
  870. delta += self.__ged_env.node_rel_cost(node_label, node[1]) - self.__node_ins_cost
  871. # Update best delta and global configuration if improvement has been found.
  872. if delta < best_delta - self.__epsilon:
  873. best_delta = delta
  874. best_label = node_label # @todo: may be wrong.
  875. best_config.clear()
  876. for graph_id, k in config.items():
  877. best_config[graph_id] = k
  878. # Return the best delta.
  879. return best_delta
  880. def __compute_initial_node_labels(self, node_labels, median_labels):
  881. median_labels.clear()
  882. if self.__use_real_randomness: # @todo: may not work if parallelized.
  883. rng = np.random.randint(size=1)
  884. urng = np.random.RandomState(seed=rng[0])
  885. else:
  886. urng = np.random.RandomState(seed=self.__seed)
  887. # Generate the initial node label medians.
  888. if self.__init_type_increase_order == 'K-MEANS++':
  889. # Use k-means++ heuristic to generate the initial node label medians.
  890. already_selected = [False] * len(node_labels)
  891. selected_label_id = urng.uniform(low=0, high=len(node_labels), size=1)[0]
  892. median_labels.append(node_labels[selected_label_id])
  893. already_selected[selected_label_id] = True
  894. while len(median_labels) > self.__num_inits_increase_order:
  895. weights = [np.inf] * len(node_labels)
  896. for label_id in range(0, len(node_labels)):
  897. if already_selected[label_id]:
  898. weights[label_id] = 0
  899. continue
  900. for label in median_labels:
  901. weights[label_id] = min(weights[label_id], self.__ged_env.node_rel_cost(label, node_labels[label_id]))
  902. selected_label_id = urng.choice(range(0, len(weights)), size=1, p=weights)
  903. median_labels.append(node_labels[selected_label_id])
  904. already_selected[selected_label_id] = True
  905. else:
  906. # Compute the initial node medians as the medians of randomly generated clusters of (roughly) equal size.
  907. # @todo: go through and test.
  908. shuffled_node_labels = [np.inf] * len(node_labels) #@todo: random?
  909. # @todo: std::shuffle(shuffled_node_labels.begin(), shuffled_node_labels.end(), urng);?
  910. cluster_size = len(node_labels) / self.__num_inits_increase_order
  911. pos = 0.0
  912. cluster = []
  913. while len(median_labels) < self.__num_inits_increase_order - 1:
  914. while pos < (len(median_labels) + 1) * cluster_size:
  915. cluster.append(shuffled_node_labels[pos])
  916. pos += 1
  917. median_labels.append(self.__get_median_node_label(cluster))
  918. cluster.clear()
  919. while pos < len(shuffled_node_labels):
  920. pos += 1
  921. cluster.append(shuffled_node_labels[pos])
  922. median_labels.append(self.__get_median_node_label(cluster))
  923. cluster.clear()
  924. # Run Lloyd's Algorithm.
  925. converged = False
  926. closest_median_ids = [np.inf] * len(node_labels)
  927. clusters = [[] for _ in len(median_labels)]
  928. itr = 1
  929. while not self.__insertion_termination_criterion_met(converged, itr):
  930. converged = not self.__update_clusters(node_labels, median_labels, closest_median_ids)
  931. if not converged:
  932. for cluster in clusters:
  933. cluster.clear()
  934. for label_id in range(0, len(node_labels)):
  935. cluster[closest_median_ids[label_id]].append(node_labels[label_id])
  936. for cluster_id in range(0, len(clusters)):
  937. self.__update_node_label(cluster[cluster_id], median_labels[cluster_id])
  938. itr += 1
  939. def __insertion_termination_criterion_met(self, converged, itr):
  940. return converged or (itr >= self.__max_itrs_increase_order if self.__max_itrs_increase_order > 0 else False)
  941. def __update_config_(self, node_label, inserted_nodes, config, node_labels):
  942. # Determine the best configuration.
  943. config_modified = False
  944. for graph_id, node_set in inserted_nodes.items():
  945. best_assignment = config[graph_id]
  946. best_cost = 0.0
  947. if best_assignment[0] == np.inf:
  948. best_cost = self.__node_del_cost
  949. else:
  950. bets_cost = self.__ged_env.node_rel_cost(node_label, best_assignment[1]) - self.__node_ins_cost
  951. for node in node_set:
  952. cost = self.__ged_env.node_rel_cost(node_label, node[1]) - self.__node_ins_cost
  953. if cost < best_cost - self.__epsilon:
  954. best_cost = cost
  955. best_assignment = node
  956. config_modified = True
  957. if self.__node_del_cost < best_cost - self.__epsilon:
  958. best_cost = self.__node_del_cost
  959. best_assignment[0] = np.inf # @todo: work?
  960. config_modified = True
  961. config[graph_id] = best_assignment
  962. # Collect the node labels contained in the best configuration.
  963. node_labels.clear()
  964. for key, val in config.items():
  965. if val[0] != np.inf:
  966. node_labels.append(val[1])
  967. # Return true if the configuration was modified.
  968. return config_modified
  969. def __update_node_label(self, node_labels, node_label):
  970. new_node_label = self.__get_median_node_label(node_labels)
  971. if self.__ged_env.node_rel_cost(new_node_label, node_label) > self.__epsilon:
  972. node_label = new_node_label # @todo: may be wrong
  973. return True
  974. return False
  975. def __update_clusters(self, node_labels, median_labels, closest_median_ids):
  976. # Determine the closest median for each node label.
  977. clusters_modified = False
  978. for label_id in range(0, len(node_labels)):
  979. closest_median_id = np.inf
  980. dist_to_closest_median = np.inf
  981. for median_id in range(0, len(median_labels)):
  982. dist_to_median = self.__ged_env.node_rel_cost(median_labels[median_id], node_labels[label_id])
  983. if dist_to_median < dist_to_closest_median - self.__epsilon:
  984. dist_to_closest_median = dist_to_median
  985. closest_median_id = median_id
  986. if closest_median_id != closest_median_ids[label_id]:
  987. closest_median_ids[label_id] = closest_median_id
  988. clusters_modified = True
  989. # Return true if the clusters were modified.
  990. return clusters_modified
  991. def __add_node_to_median(self, best_config, best_label, median):
  992. # Update the median.
  993. median.add_node(nx.number_of_nodes(median), **best_label)
  994. # Update the node maps.
  995. for graph_id, node_map in self.__node_maps_from_median.items():
  996. node_map_as_rel = []
  997. node_map.as_relation(node_map_as_rel)
  998. new_node_map = NodeMap(nx.number_of_nodes(median), node_map.num_target_nodes())
  999. for assignment in node_map_as_rel:
  1000. new_node_map.add_assignment(assignment[0], assignment[1])
  1001. new_node_map.add_assignment(nx.number_of_nodes(median) - 1, best_config[graph_id])
  1002. self.__node_maps_from_median[graph_id] = new_node_map
  1003. # Increase overall number of increases.
  1004. self.__num_increase_order += 1
  1005. def __improve_sum_of_distances(self, timer):
  1006. pass
  1007. def __median_available(self):
  1008. return self.__median_id != np.inf
  1009. # def __get_node_image_from_map(self, node_map, node):
  1010. # """
  1011. # Return ID of the node mapping of `node` in `node_map`.
  1012. # Parameters
  1013. # ----------
  1014. # node_map : list[tuple(int, int)]
  1015. # List of node maps where the mapping node is found.
  1016. #
  1017. # node : int
  1018. # The mapping node of this node is returned
  1019. # Raises
  1020. # ------
  1021. # Exception
  1022. # If the node with ID `node` is not contained in the source nodes of the node map.
  1023. # Returns
  1024. # -------
  1025. # int
  1026. # ID of the mapping of `node`.
  1027. #
  1028. # Notes
  1029. # -----
  1030. # This function is not implemented in the `ged::MedianGraphEstimator` class of the `GEDLIB` library. Instead it is a Python implementation of the `ged::NodeMap::image` function.
  1031. # """
  1032. # if node < len(node_map):
  1033. # return node_map[node][1] if node_map[node][1] < len(node_map) else np.inf
  1034. # else:
  1035. # raise Exception('The node with ID ', str(node), ' is not contained in the source nodes of the node map.')
  1036. # return np.inf
  1037. def __are_graphs_equal(self, g1, g2):
  1038. """
  1039. Check if the two graphs are equal.
  1040. Parameters
  1041. ----------
  1042. g1 : NetworkX graph object
  1043. Graph 1 to be compared.
  1044. g2 : NetworkX graph object
  1045. Graph 2 to be compared.
  1046. Returns
  1047. -------
  1048. bool
  1049. True if the two graph are equal.
  1050. Notes
  1051. -----
  1052. This is not an identical check. Here the two graphs are equal if and only if their original_node_ids, nodes, all node labels, edges and all edge labels are equal. This function is specifically designed for class `MedianGraphEstimator` and should not be used elsewhere.
  1053. """
  1054. # check original node ids.
  1055. if not g1.graph['original_node_ids'] == g2.graph['original_node_ids']:
  1056. return False
  1057. # check nodes.
  1058. nlist1 = [n for n in g1.nodes(data=True)]
  1059. nlist2 = [n for n in g2.nodes(data=True)]
  1060. if not nlist1 == nlist2:
  1061. return False
  1062. # check edges.
  1063. elist1 = [n for n in g1.edges(data=True)]
  1064. elist2 = [n for n in g2.edges(data=True)]
  1065. if not elist1 == elist2:
  1066. return False
  1067. return True
  1068. def compute_my_cost(g, h, node_map):
  1069. cost = 0.0
  1070. for node in g.nodes:
  1071. cost += 0
  1072. def set_label_names(self, node_labels=[], edge_labels=[], node_attrs=[], edge_attrs=[]):
  1073. self.__label_names = {'node_labels': node_labels, 'edge_labels': edge_labels,
  1074. 'node_attrs': node_attrs, 'edge_attrs': edge_attrs}
  1075. def __get_median_node_label(self, node_labels):
  1076. if len(self.__label_names['node_labels']) > 0:
  1077. return self.__get_median_label_symbolic(node_labels)
  1078. elif len(self.__label_names['node_attrs']) > 0:
  1079. return self.__get_median_label_nonsymbolic(node_labels)
  1080. else:
  1081. raise Exception('Node label names are not given.')
  1082. def __get_median_edge_label(self, edge_labels):
  1083. if len(self.__label_names['edge_labels']) > 0:
  1084. return self.__get_median_label_symbolic(edge_labels)
  1085. elif len(self.__label_names['edge_attrs']) > 0:
  1086. return self.__get_median_label_nonsymbolic(edge_labels)
  1087. else:
  1088. raise Exception('Edge label names are not given.')
  1089. def __get_median_label_symbolic(self, labels):
  1090. # Construct histogram.
  1091. hist = {}
  1092. for label in labels:
  1093. label = tuple([kv for kv in label.items()]) # @todo: this may be slow.
  1094. if label not in hist:
  1095. hist[label] = 1
  1096. else:
  1097. hist[label] += 1
  1098. # Return the label that appears most frequently.
  1099. best_count = 0
  1100. median_label = {}
  1101. for label, count in hist.items():
  1102. if count > best_count:
  1103. best_count = count
  1104. median_label = {kv[0]: kv[1] for kv in label}
  1105. return median_label
  1106. def __get_median_label_nonsymbolic(self, labels):
  1107. if len(labels) == 0:
  1108. return {} # @todo
  1109. else:
  1110. # Transform the labels into coordinates and compute mean label as initial solution.
  1111. labels_as_coords = []
  1112. sums = {}
  1113. for key, val in labels[0].items():
  1114. sums[key] = 0
  1115. for label in labels:
  1116. coords = {}
  1117. for key, val in label.items():
  1118. label_f = float(val)
  1119. sums[key] += label_f
  1120. coords[key] = label_f
  1121. labels_as_coords.append(coords)
  1122. median = {}
  1123. for key, val in sums.items():
  1124. median[key] = val / len(labels)
  1125. # Run main loop of Weiszfeld's Algorithm.
  1126. epsilon = 0.0001
  1127. delta = 1.0
  1128. num_itrs = 0
  1129. all_equal = False
  1130. while ((delta > epsilon) and (num_itrs < 100) and (not all_equal)):
  1131. numerator = {}
  1132. for key, val in sums.items():
  1133. numerator[key] = 0
  1134. denominator = 0
  1135. for label_as_coord in labels_as_coords:
  1136. norm = 0
  1137. for key, val in label_as_coord.items():
  1138. norm += (val - median[key]) ** 2
  1139. norm = np.sqrt(norm)
  1140. if norm > 0:
  1141. for key, val in label_as_coord.items():
  1142. numerator[key] += val / norm
  1143. denominator += 1.0 / norm
  1144. if denominator == 0:
  1145. all_equal = True
  1146. else:
  1147. new_median = {}
  1148. delta = 0.0
  1149. for key, val in numerator.items():
  1150. this_median = val / denominator
  1151. new_median[key] = this_median
  1152. delta += np.abs(median[key] - this_median)
  1153. median = new_median
  1154. num_itrs += 1
  1155. # Transform the solution to strings and return it.
  1156. median_label = {}
  1157. for key, val in median.items():
  1158. median_label[key] = str(val)
  1159. return median_label
  1160. # def __get_median_edge_label_symbolic(self, edge_labels):
  1161. # pass
  1162. # def __get_median_edge_label_nonsymbolic(self, edge_labels):
  1163. # if len(edge_labels) == 0:
  1164. # return {}
  1165. # else:
  1166. # # Transform the labels into coordinates and compute mean label as initial solution.
  1167. # edge_labels_as_coords = []
  1168. # sums = {}
  1169. # for key, val in edge_labels[0].items():
  1170. # sums[key] = 0
  1171. # for edge_label in edge_labels:
  1172. # coords = {}
  1173. # for key, val in edge_label.items():
  1174. # label = float(val)
  1175. # sums[key] += label
  1176. # coords[key] = label
  1177. # edge_labels_as_coords.append(coords)
  1178. # median = {}
  1179. # for key, val in sums.items():
  1180. # median[key] = val / len(edge_labels)
  1181. #
  1182. # # Run main loop of Weiszfeld's Algorithm.
  1183. # epsilon = 0.0001
  1184. # delta = 1.0
  1185. # num_itrs = 0
  1186. # all_equal = False
  1187. # while ((delta > epsilon) and (num_itrs < 100) and (not all_equal)):
  1188. # numerator = {}
  1189. # for key, val in sums.items():
  1190. # numerator[key] = 0
  1191. # denominator = 0
  1192. # for edge_label_as_coord in edge_labels_as_coords:
  1193. # norm = 0
  1194. # for key, val in edge_label_as_coord.items():
  1195. # norm += (val - median[key]) ** 2
  1196. # norm += np.sqrt(norm)
  1197. # if norm > 0:
  1198. # for key, val in edge_label_as_coord.items():
  1199. # numerator[key] += val / norm
  1200. # denominator += 1.0 / norm
  1201. # if denominator == 0:
  1202. # all_equal = True
  1203. # else:
  1204. # new_median = {}
  1205. # delta = 0.0
  1206. # for key, val in numerator.items():
  1207. # this_median = val / denominator
  1208. # new_median[key] = this_median
  1209. # delta += np.abs(median[key] - this_median)
  1210. # median = new_median
  1211. #
  1212. # num_itrs += 1
  1213. #
  1214. # # Transform the solution to ged::GXLLabel and return it.
  1215. # median_label = {}
  1216. # for key, val in median.items():
  1217. # median_label[key] = str(val)
  1218. # return median_label

A Python package for graph kernels, graph edit distances and graph pre-image problem.