You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

preimage_iam.py 34 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703
  1. #!/usr/bin/env python3
  2. # -*- coding: utf-8 -*-
  3. """
  4. Created on Tue Apr 30 17:07:43 2019
  5. A graph pre-image method combining iterative pre-image method in reference [1]
  6. and the iterative alternate minimizations (IAM) in reference [2].
  7. @author: ljia
  8. @references:
  9. [1] Gökhan H Bakir, Alexander Zien, and Koji Tsuda. Learning to and graph
  10. pre-images. In Joint Pattern Re ognition Symposium , pages 253-261. Springer, 2004.
  11. [2] Generalized median graph via iterative alternate minimization.
  12. """
  13. import sys
  14. import numpy as np
  15. from tqdm import tqdm
  16. import networkx as nx
  17. import matplotlib.pyplot as plt
  18. import random
  19. from iam import iam_upgraded
  20. from utils import dis_gstar, compute_kernel
  21. def preimage_iam(Gn_init, Gn_median, alpha, idx_gi, Kmatrix, k, r_max,
  22. gkernel, epsilon=0.001, InitIAMWithAllDk=False,
  23. params_iam={'c_ei': 1, 'c_er': 1, 'c_es': 1,
  24. 'ite_max': 50, 'epsilon': 0.001,
  25. 'removeNodes': True, 'connected': False},
  26. params_ged={'ged_cost': 'CHEM_1', 'ged_method': 'IPFP',
  27. 'saveGXL': 'benoit'}):
  28. """This function constructs graph pre-image by the iterative pre-image
  29. framework in reference [1], algorithm 1, where the step of generating new
  30. graphs randomly is replaced by the IAM algorithm in reference [2].
  31. notes
  32. -----
  33. Every time a set of n better graphs is acquired, their distances in kernel space are
  34. compared with the k nearest ones, and the k nearest distances from the k+n
  35. distances will be used as the new ones.
  36. """
  37. # compute k nearest neighbors of phi in DN.
  38. dis_all = [] # distance between g_star and each graph.
  39. term3 = 0
  40. for i1, a1 in enumerate(alpha):
  41. for i2, a2 in enumerate(alpha):
  42. term3 += a1 * a2 * Kmatrix[idx_gi[i1], idx_gi[i2]]
  43. for ig, g in tqdm(enumerate(Gn_init), desc='computing distances', file=sys.stdout):
  44. dtemp = dis_gstar(ig, idx_gi, alpha, Kmatrix, term3=term3)
  45. dis_all.append(dtemp)
  46. # sort
  47. sort_idx = np.argsort(dis_all)
  48. dis_k = [dis_all[idis] for idis in sort_idx[0:k]] # the k shortest distances
  49. nb_best = len(np.argwhere(dis_k == dis_k[0]).flatten().tolist())
  50. ghat_list = [Gn_init[idx].copy() for idx in sort_idx[0:nb_best]] # the nearest neighbors of phi in DN
  51. if dis_k[0] == 0: # the exact pre-image.
  52. print('The exact pre-image is found from the input dataset.')
  53. return 0, ghat_list, 0, 0
  54. dhat = dis_k[0] # the nearest distance
  55. # for g in ghat_list:
  56. # draw_Letter_graph(g)
  57. # nx.draw_networkx(g)
  58. # plt.show()
  59. # print(g.nodes(data=True))
  60. # print(g.edges(data=True))
  61. Gk = [Gn_init[ig].copy() for ig in sort_idx[0:k]] # the k nearest neighbors
  62. # for gi in Gk:
  63. # nx.draw(gi, labels=nx.get_node_attributes(gi, 'atom'), with_labels=True)
  64. ## nx.draw_networkx(gi)
  65. # plt.show()
  66. ## draw_Letter_graph(g)
  67. # print(gi.nodes(data=True))
  68. # print(gi.edges(data=True))
  69. # i = 1
  70. r = 0
  71. itr_total = 0
  72. dis_of_each_itr = [dhat]
  73. found = False
  74. nb_updated = 0
  75. nb_updated_k = 0
  76. while r < r_max:# and not found: # @todo: if not found?# and np.abs(old_dis - cur_dis) > epsilon:
  77. print('\n-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-')
  78. print('Current preimage iteration =', r)
  79. print('Total preimage iteration =', itr_total, '\n')
  80. found = False
  81. Gn_nearest_median = [g.copy() for g in Gk]
  82. if InitIAMWithAllDk: # each graph in D_k is used to initialize IAM.
  83. ghat_new_list = []
  84. for g_tmp in Gk:
  85. Gn_nearest_init = [g_tmp.copy()]
  86. ghat_new_list_tmp, _ = iam_upgraded(Gn_nearest_median,
  87. Gn_nearest_init, params_ged=params_ged, **params_iam)
  88. ghat_new_list += ghat_new_list_tmp
  89. else: # only the best graph in D_k is used to initialize IAM.
  90. Gn_nearest_init = [g.copy() for g in Gk]
  91. ghat_new_list, _ = iam_upgraded(Gn_nearest_median, Gn_nearest_init,
  92. params_ged=params_ged, **params_iam)
  93. # for g in g_tmp_list:
  94. # nx.draw_networkx(g)
  95. # plt.show()
  96. # draw_Letter_graph(g)
  97. # print(g.nodes(data=True))
  98. # print(g.edges(data=True))
  99. # compute distance between \psi and the new generated graphs.
  100. knew = compute_kernel(ghat_new_list + Gn_median, gkernel, False)
  101. dhat_new_list = []
  102. for idx, g_tmp in enumerate(ghat_new_list):
  103. # @todo: the term3 below could use the one at the beginning of the function.
  104. dhat_new_list.append(dis_gstar(idx, range(len(ghat_new_list),
  105. len(ghat_new_list) + len(Gn_median) + 1),
  106. alpha, knew, withterm3=False))
  107. for idx_g, ghat_new in enumerate(ghat_new_list):
  108. dhat_new = dhat_new_list[idx_g]
  109. # if the new distance is smaller than the max of D_k.
  110. if dhat_new < dis_k[-1] and np.abs(dhat_new - dis_k[-1]) >= epsilon:
  111. # check if the new distance is the same as one in D_k.
  112. is_duplicate = False
  113. for dis_tmp in dis_k[1:-1]:
  114. if np.abs(dhat_new - dis_tmp) < epsilon:
  115. is_duplicate = True
  116. print('IAM: duplicate k nearest graph generated.')
  117. break
  118. if not is_duplicate:
  119. if np.abs(dhat_new - dhat) < epsilon:
  120. print('IAM: I am equal!')
  121. # dhat = dhat_new
  122. # ghat_list = [ghat_new.copy()]
  123. else:
  124. print('IAM: we got better k nearest neighbors!')
  125. nb_updated_k += 1
  126. print('the k nearest neighbors are updated',
  127. nb_updated_k, 'times.')
  128. dis_k = [dhat_new] + dis_k[0:k-1] # add the new nearest distance.
  129. Gk = [ghat_new.copy()] + Gk[0:k-1] # add the corresponding graph.
  130. sort_idx = np.argsort(dis_k)
  131. dis_k = [dis_k[idx] for idx in sort_idx[0:k]] # the new k nearest distances.
  132. Gk = [Gk[idx] for idx in sort_idx[0:k]]
  133. if dhat_new < dhat:
  134. print('IAM: I have smaller distance!')
  135. print(str(dhat) + '->' + str(dhat_new))
  136. dhat = dhat_new
  137. ghat_list = [Gk[0].copy()]
  138. r = 0
  139. nb_updated += 1
  140. print('the graph is updated', nb_updated, 'times.')
  141. nx.draw(Gk[0], labels=nx.get_node_attributes(Gk[0], 'atom'),
  142. with_labels=True)
  143. ## plt.savefig("results/gk_iam/simple_two/xx" + str(i) + ".png", format="PNG")
  144. plt.show()
  145. found = True
  146. if not found:
  147. r += 1
  148. dis_of_each_itr.append(dhat)
  149. itr_total += 1
  150. print('\nthe k shortest distances are', dis_k)
  151. print('the shortest distances for previous iterations are', dis_of_each_itr)
  152. print('\n\nthe graph is updated', nb_updated, 'times.')
  153. print('\nthe k nearest neighbors are updated', nb_updated_k, 'times.')
  154. print('distances in kernel space:', dis_of_each_itr, '\n')
  155. return dhat, ghat_list, dis_of_each_itr[-1], nb_updated, nb_updated_k
  156. def preimage_iam_random_mix(Gn_init, Gn_median, alpha, idx_gi, Kmatrix, k, r_max,
  157. l_max, gkernel, epsilon=0.001,
  158. InitIAMWithAllDk=False, InitRandomWithAllDk=True,
  159. params_iam={'c_ei': 1, 'c_er': 1, 'c_es': 1,
  160. 'ite_max': 50, 'epsilon': 0.001,
  161. 'removeNodes': True, 'connected': False},
  162. params_ged={'ged_cost': 'CHEM_1', 'ged_method': 'IPFP',
  163. 'saveGXL': 'benoit'}):
  164. """This function constructs graph pre-image by the iterative pre-image
  165. framework in reference [1], algorithm 1, where new graphs are generated
  166. randomly and by the IAM algorithm in reference [2].
  167. notes
  168. -----
  169. Every time a set of n better graphs is acquired, their distances in kernel space are
  170. compared with the k nearest ones, and the k nearest distances from the k+n
  171. distances will be used as the new ones.
  172. """
  173. Gn_init = [nx.convert_node_labels_to_integers(g) for g in Gn_init]
  174. # compute k nearest neighbors of phi in DN.
  175. dis_all = [] # distance between g_star and each graph.
  176. term3 = 0
  177. for i1, a1 in enumerate(alpha):
  178. for i2, a2 in enumerate(alpha):
  179. term3 += a1 * a2 * Kmatrix[idx_gi[i1], idx_gi[i2]]
  180. for ig, g in tqdm(enumerate(Gn_init), desc='computing distances', file=sys.stdout):
  181. dtemp = dis_gstar(ig, idx_gi, alpha, Kmatrix, term3=term3)
  182. dis_all.append(dtemp)
  183. # sort
  184. sort_idx = np.argsort(dis_all)
  185. dis_k = [dis_all[idis] for idis in sort_idx[0:k]] # the k shortest distances
  186. nb_best = len(np.argwhere(dis_k == dis_k[0]).flatten().tolist())
  187. ghat_list = [Gn_init[idx].copy() for idx in sort_idx[0:nb_best]] # the nearest neighbors of psi in DN
  188. if dis_k[0] == 0: # the exact pre-image.
  189. print('The exact pre-image is found from the input dataset.')
  190. return 0, ghat_list, 0, 0
  191. dhat = dis_k[0] # the nearest distance
  192. # for g in ghat_list:
  193. # draw_Letter_graph(g)
  194. # nx.draw_networkx(g)
  195. # plt.show()
  196. # print(g.nodes(data=True))
  197. # print(g.edges(data=True))
  198. Gk = [Gn_init[ig].copy() for ig in sort_idx[0:k]] # the k nearest neighbors
  199. # for gi in Gk:
  200. # nx.draw(gi, labels=nx.get_node_attributes(gi, 'atom'), with_labels=True)
  201. ## nx.draw_networkx(gi)
  202. # plt.show()
  203. ## draw_Letter_graph(g)
  204. # print(gi.nodes(data=True))
  205. # print(gi.edges(data=True))
  206. r = 0
  207. itr_total = 0
  208. dis_of_each_itr = [dhat]
  209. nb_updated_iam = 0
  210. nb_updated_k_iam = 0
  211. nb_updated_random = 0
  212. nb_updated_k_random = 0
  213. # is_iam_duplicate = False
  214. while r < r_max: # and not found: # @todo: if not found?# and np.abs(old_dis - cur_dis) > epsilon:
  215. print('\n-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-')
  216. print('Current preimage iteration =', r)
  217. print('Total preimage iteration =', itr_total, '\n')
  218. found_iam = False
  219. Gn_nearest_median = [g.copy() for g in Gk]
  220. if InitIAMWithAllDk: # each graph in D_k is used to initialize IAM.
  221. ghat_new_list = []
  222. for g_tmp in Gk:
  223. Gn_nearest_init = [g_tmp.copy()]
  224. ghat_new_list_tmp, _ = iam_upgraded(Gn_nearest_median,
  225. Gn_nearest_init, params_ged=params_ged, **params_iam)
  226. ghat_new_list += ghat_new_list_tmp
  227. else: # only the best graph in D_k is used to initialize IAM.
  228. Gn_nearest_init = [g.copy() for g in Gk]
  229. ghat_new_list, _ = iam_upgraded(Gn_nearest_median, Gn_nearest_init,
  230. params_ged=params_ged, **params_iam)
  231. # for g in g_tmp_list:
  232. # nx.draw_networkx(g)
  233. # plt.show()
  234. # draw_Letter_graph(g)
  235. # print(g.nodes(data=True))
  236. # print(g.edges(data=True))
  237. # compute distance between \psi and the new generated graphs.
  238. knew = compute_kernel(ghat_new_list + Gn_median, gkernel, False)
  239. dhat_new_list = []
  240. for idx, g_tmp in enumerate(ghat_new_list):
  241. # @todo: the term3 below could use the one at the beginning of the function.
  242. dhat_new_list.append(dis_gstar(idx, range(len(ghat_new_list),
  243. len(ghat_new_list) + len(Gn_median) + 1),
  244. alpha, knew, withterm3=False))
  245. # find the new k nearest graphs.
  246. for idx_g, ghat_new in enumerate(ghat_new_list):
  247. dhat_new = dhat_new_list[idx_g]
  248. # if the new distance is smaller than the max of D_k.
  249. if dhat_new < dis_k[-1] and np.abs(dhat_new - dis_k[-1]) >= epsilon:
  250. # check if the new distance is the same as one in D_k.
  251. is_duplicate = False
  252. for dis_tmp in dis_k[1:-1]:
  253. if np.abs(dhat_new - dis_tmp) < epsilon:
  254. is_duplicate = True
  255. print('IAM: duplicate k nearest graph generated.')
  256. break
  257. if not is_duplicate:
  258. if np.abs(dhat_new - dhat) < epsilon:
  259. print('IAM: I am equal!')
  260. # dhat = dhat_new
  261. # ghat_list = [ghat_new.copy()]
  262. else:
  263. print('IAM: we got better k nearest neighbors!')
  264. nb_updated_k_iam += 1
  265. print('the k nearest neighbors are updated',
  266. nb_updated_k_iam, 'times.')
  267. dis_k = [dhat_new] + dis_k[0:k-1] # add the new nearest distance.
  268. Gk = [ghat_new.copy()] + Gk[0:k-1] # add the corresponding graph.
  269. sort_idx = np.argsort(dis_k)
  270. dis_k = [dis_k[idx] for idx in sort_idx[0:k]] # the new k nearest distances.
  271. Gk = [Gk[idx] for idx in sort_idx[0:k]]
  272. if dhat_new < dhat:
  273. print('IAM: I have smaller distance!')
  274. print(str(dhat) + '->' + str(dhat_new))
  275. dhat = dhat_new
  276. ghat_list = [Gk[0].copy()]
  277. r = 0
  278. nb_updated_iam += 1
  279. print('the graph is updated by IAM', nb_updated_iam,
  280. 'times.')
  281. nx.draw(Gk[0], labels=nx.get_node_attributes(Gk[0], 'atom'),
  282. with_labels=True)
  283. ## plt.savefig("results/gk_iam/simple_two/xx" + str(i) + ".png", format="PNG")
  284. plt.show()
  285. found_iam = True
  286. # when new distance is not smaller than the max of D_k, use random generation.
  287. if not found_iam:
  288. print('Distance not better, switching to random generation now.')
  289. print(str(dhat) + '->' + str(dhat_new))
  290. if InitRandomWithAllDk: # use all k nearest graphs as the initials.
  291. init_list = [g_init.copy() for g_init in Gk]
  292. else: # use just the nearest graph as the initial.
  293. init_list = [Gk[0].copy()]
  294. # number of edges to be changed.
  295. if len(init_list) == 1:
  296. # @todo what if the log is negetive? how to choose alpha (scalar)? seems fdgs is always 1.
  297. # fdgs = dhat_new
  298. fdgs = nb_updated_random + 1
  299. if fdgs < 1:
  300. fdgs = 1
  301. fdgs = int(np.ceil(np.log(fdgs)))
  302. if fdgs < 1:
  303. fdgs += 1
  304. # fdgs = nb_updated_random + 1 # @todo:
  305. fdgs_list = [fdgs]
  306. else:
  307. # @todo what if the log is negetive? how to choose alpha (scalar)?
  308. fdgs_list = np.array(dis_k[:])
  309. if np.min(fdgs_list) < 1:
  310. fdgs_list /= dis_k[0]
  311. fdgs_list = [int(item) for item in np.ceil(np.log(fdgs_list))]
  312. if np.min(fdgs_list) < 1:
  313. fdgs_list = np.array(fdgs_list) + 1
  314. l = 0
  315. found_random = False
  316. while l < l_max and not found_random:
  317. for idx_g, g_tmp in enumerate(init_list):
  318. # add and delete edges.
  319. ghat_new = nx.convert_node_labels_to_integers(g_tmp.copy())
  320. # @todo: should we use just half of the adjacency matrix for undirected graphs?
  321. nb_vpairs = nx.number_of_nodes(ghat_new) * (nx.number_of_nodes(ghat_new) - 1)
  322. np.random.seed()
  323. # which edges to change.
  324. # @todo: what if fdgs is bigger than nb_vpairs?
  325. idx_change = random.sample(range(nb_vpairs), fdgs_list[idx_g] if
  326. fdgs_list[idx_g] < nb_vpairs else nb_vpairs)
  327. # idx_change = np.random.randint(0, nx.number_of_nodes(gs) *
  328. # (nx.number_of_nodes(gs) - 1), fdgs)
  329. for item in idx_change:
  330. node1 = int(item / (nx.number_of_nodes(ghat_new) - 1))
  331. node2 = (item - node1 * (nx.number_of_nodes(ghat_new) - 1))
  332. if node2 >= node1: # skip the self pair.
  333. node2 += 1
  334. # @todo: is the randomness correct?
  335. if not ghat_new.has_edge(node1, node2):
  336. ghat_new.add_edge(node1, node2)
  337. # nx.draw_networkx(gs)
  338. # plt.show()
  339. # nx.draw_networkx(ghat_new)
  340. # plt.show()
  341. else:
  342. ghat_new.remove_edge(node1, node2)
  343. # nx.draw_networkx(gs)
  344. # plt.show()
  345. # nx.draw_networkx(ghat_new)
  346. # plt.show()
  347. # nx.draw_networkx(ghat_new)
  348. # plt.show()
  349. # compute distance between \psi and the new generated graph.
  350. knew = compute_kernel([ghat_new] + Gn_median, gkernel, verbose=False)
  351. dhat_new = dis_gstar(0, range(1, len(Gn_median) + 1),
  352. alpha, knew, withterm3=False)
  353. # @todo: the new distance is smaller or also equal?
  354. if dhat_new < dis_k[-1] and np.abs(dhat_new - dis_k[-1]) >= epsilon:
  355. # check if the new distance is the same as one in D_k.
  356. is_duplicate = False
  357. for dis_tmp in dis_k[1:-1]:
  358. if np.abs(dhat_new - dis_tmp) < epsilon:
  359. is_duplicate = True
  360. print('Random: duplicate k nearest graph generated.')
  361. break
  362. if not is_duplicate:
  363. if np.abs(dhat_new - dhat) < epsilon:
  364. print('Random: I am equal!')
  365. # dhat = dhat_new
  366. # ghat_list = [ghat_new.copy()]
  367. else:
  368. print('Random: we got better k nearest neighbors!')
  369. print('l =', str(l))
  370. nb_updated_k_random += 1
  371. print('the k nearest neighbors are updated by random generation',
  372. nb_updated_k_random, 'times.')
  373. dis_k = [dhat_new] + dis_k # add the new nearest distances.
  374. Gk = [ghat_new.copy()] + Gk # add the corresponding graphs.
  375. sort_idx = np.argsort(dis_k)
  376. dis_k = [dis_k[idx] for idx in sort_idx[0:k]] # the new k nearest distances.
  377. Gk = [Gk[idx] for idx in sort_idx[0:k]]
  378. if dhat_new < dhat:
  379. print('\nRandom: I am smaller!')
  380. print('l =', str(l))
  381. print(dhat, '->', dhat_new)
  382. dhat = dhat_new
  383. ghat_list = [ghat_new.copy()]
  384. r = 0
  385. nb_updated_random += 1
  386. print('the graph is updated by random generation',
  387. nb_updated_random, 'times.')
  388. nx.draw(ghat_new, labels=nx.get_node_attributes(ghat_new, 'atom'),
  389. with_labels=True)
  390. ## plt.savefig("results/gk_iam/simple_two/xx" + str(i) + ".png", format="PNG")
  391. plt.show()
  392. found_random = True
  393. break
  394. l += 1
  395. if not found_random: # l == l_max:
  396. r += 1
  397. dis_of_each_itr.append(dhat)
  398. itr_total += 1
  399. print('\nthe k shortest distances are', dis_k)
  400. print('the shortest distances for previous iterations are', dis_of_each_itr)
  401. print('\n\nthe graph is updated by IAM', nb_updated_iam, 'times, and by random generation',
  402. nb_updated_random, 'times.')
  403. print('\nthe k nearest neighbors are updated by IAM', nb_updated_k_iam,
  404. 'times, and by random generation', nb_updated_k_random, 'times.')
  405. print('distances in kernel space:', dis_of_each_itr, '\n')
  406. return dhat, ghat_list, dis_of_each_itr[-1], \
  407. nb_updated_iam, nb_updated_random, nb_updated_k_iam, nb_updated_k_random
  408. ###############################################################################
  409. # Old implementations.
  410. #def gk_iam(Gn, alpha):
  411. # """This function constructs graph pre-image by the iterative pre-image
  412. # framework in reference [1], algorithm 1, where the step of generating new
  413. # graphs randomly is replaced by the IAM algorithm in reference [2].
  414. #
  415. # notes
  416. # -----
  417. # Every time a better graph is acquired, the older one is replaced by it.
  418. # """
  419. # pass
  420. # # compute k nearest neighbors of phi in DN.
  421. # dis_list = [] # distance between g_star and each graph.
  422. # for ig, g in tqdm(enumerate(Gn), desc='computing distances', file=sys.stdout):
  423. # dtemp = k_list[ig] - 2 * (alpha * k_g1_list[ig] + (1 - alpha) *
  424. # k_g2_list[ig]) + (alpha * alpha * k_list[idx1] + alpha *
  425. # (1 - alpha) * k_g2_list[idx1] + (1 - alpha) * alpha *
  426. # k_g1_list[idx2] + (1 - alpha) * (1 - alpha) * k_list[idx2])
  427. # dis_list.append(dtemp)
  428. #
  429. # # sort
  430. # sort_idx = np.argsort(dis_list)
  431. # dis_gs = [dis_list[idis] for idis in sort_idx[0:k]]
  432. # g0hat = Gn[sort_idx[0]] # the nearest neighbor of phi in DN
  433. # if dis_gs[0] == 0: # the exact pre-image.
  434. # print('The exact pre-image is found from the input dataset.')
  435. # return 0, g0hat
  436. # dhat = dis_gs[0] # the nearest distance
  437. # Gk = [Gn[ig] for ig in sort_idx[0:k]] # the k nearest neighbors
  438. # gihat_list = []
  439. #
  440. ## i = 1
  441. # r = 1
  442. # while r < r_max:
  443. # print('r =', r)
  444. ## found = False
  445. # Gs_nearest = Gk + gihat_list
  446. # g_tmp = iam(Gs_nearest)
  447. #
  448. # # compute distance between \psi and the new generated graph.
  449. # knew = marginalizedkernel([g_tmp, g1, g2], node_label='atom', edge_label=None,
  450. # p_quit=lmbda, n_iteration=20, remove_totters=False,
  451. # n_jobs=multiprocessing.cpu_count(), verbose=False)
  452. # dnew = knew[0][0, 0] - 2 * (alpha * knew[0][0, 1] + (1 - alpha) *
  453. # knew[0][0, 2]) + (alpha * alpha * k_list[idx1] + alpha *
  454. # (1 - alpha) * k_g2_list[idx1] + (1 - alpha) * alpha *
  455. # k_g1_list[idx2] + (1 - alpha) * (1 - alpha) * k_list[idx2])
  456. # if dnew <= dhat: # the new distance is smaller
  457. # print('I am smaller!')
  458. # dhat = dnew
  459. # g_new = g_tmp.copy() # found better graph.
  460. # gihat_list = [g_new]
  461. # dis_gs.append(dhat)
  462. # r = 0
  463. # else:
  464. # r += 1
  465. #
  466. # ghat = ([g0hat] if len(gihat_list) == 0 else gihat_list)
  467. #
  468. # return dhat, ghat
  469. #def gk_iam_nearest(Gn, alpha, idx_gi, Kmatrix, k, r_max):
  470. # """This function constructs graph pre-image by the iterative pre-image
  471. # framework in reference [1], algorithm 1, where the step of generating new
  472. # graphs randomly is replaced by the IAM algorithm in reference [2].
  473. #
  474. # notes
  475. # -----
  476. # Every time a better graph is acquired, its distance in kernel space is
  477. # compared with the k nearest ones, and the k nearest distances from the k+1
  478. # distances will be used as the new ones.
  479. # """
  480. # # compute k nearest neighbors of phi in DN.
  481. # dis_list = [] # distance between g_star and each graph.
  482. # for ig, g in tqdm(enumerate(Gn), desc='computing distances', file=sys.stdout):
  483. # dtemp = dis_gstar(ig, idx_gi, alpha, Kmatrix)
  484. ## dtemp = k_list[ig] - 2 * (alpha * k_g1_list[ig] + (1 - alpha) *
  485. ## k_g2_list[ig]) + (alpha * alpha * k_list[0] + alpha *
  486. ## (1 - alpha) * k_g2_list[0] + (1 - alpha) * alpha *
  487. ## k_g1_list[6] + (1 - alpha) * (1 - alpha) * k_list[6])
  488. # dis_list.append(dtemp)
  489. #
  490. # # sort
  491. # sort_idx = np.argsort(dis_list)
  492. # dis_gs = [dis_list[idis] for idis in sort_idx[0:k]] # the k shortest distances
  493. # g0hat = Gn[sort_idx[0]] # the nearest neighbor of phi in DN
  494. # if dis_gs[0] == 0: # the exact pre-image.
  495. # print('The exact pre-image is found from the input dataset.')
  496. # return 0, g0hat
  497. # dhat = dis_gs[0] # the nearest distance
  498. # ghat = g0hat.copy()
  499. # Gk = [Gn[ig].copy() for ig in sort_idx[0:k]] # the k nearest neighbors
  500. # for gi in Gk:
  501. # nx.draw_networkx(gi)
  502. # plt.show()
  503. # print(gi.nodes(data=True))
  504. # print(gi.edges(data=True))
  505. # Gs_nearest = Gk.copy()
  506. ## gihat_list = []
  507. #
  508. ## i = 1
  509. # r = 1
  510. # while r < r_max:
  511. # print('r =', r)
  512. ## found = False
  513. ## Gs_nearest = Gk + gihat_list
  514. ## g_tmp = iam(Gs_nearest)
  515. # g_tmp = test_iam_with_more_graphs_as_init(Gs_nearest, Gs_nearest, c_ei=1, c_er=1, c_es=1)
  516. # nx.draw_networkx(g_tmp)
  517. # plt.show()
  518. # print(g_tmp.nodes(data=True))
  519. # print(g_tmp.edges(data=True))
  520. #
  521. # # compute distance between \psi and the new generated graph.
  522. # gi_list = [Gn[i] for i in idx_gi]
  523. # knew = compute_kernel([g_tmp] + gi_list, 'untilhpathkernel', False)
  524. # dnew = dis_gstar(0, range(1, len(gi_list) + 1), alpha, knew)
  525. #
  526. ## dnew = knew[0, 0] - 2 * (alpha[0] * knew[0, 1] + alpha[1] *
  527. ## knew[0, 2]) + (alpha[0] * alpha[0] * k_list[0] + alpha[0] *
  528. ## alpha[1] * k_g2_list[0] + alpha[1] * alpha[0] *
  529. ## k_g1_list[1] + alpha[1] * alpha[1] * k_list[1])
  530. # if dnew <= dhat and g_tmp != ghat: # the new distance is smaller
  531. # print('I am smaller!')
  532. # print(str(dhat) + '->' + str(dnew))
  533. ## nx.draw_networkx(ghat)
  534. ## plt.show()
  535. ## print('->')
  536. ## nx.draw_networkx(g_tmp)
  537. ## plt.show()
  538. #
  539. # dhat = dnew
  540. # g_new = g_tmp.copy() # found better graph.
  541. # ghat = g_tmp.copy()
  542. # dis_gs.append(dhat) # add the new nearest distance.
  543. # Gs_nearest.append(g_new) # add the corresponding graph.
  544. # sort_idx = np.argsort(dis_gs)
  545. # dis_gs = [dis_gs[idx] for idx in sort_idx[0:k]] # the new k nearest distances.
  546. # Gs_nearest = [Gs_nearest[idx] for idx in sort_idx[0:k]]
  547. # r = 0
  548. # else:
  549. # r += 1
  550. #
  551. # return dhat, ghat
  552. #def gk_iam_nearest_multi(Gn, alpha, idx_gi, Kmatrix, k, r_max):
  553. # """This function constructs graph pre-image by the iterative pre-image
  554. # framework in reference [1], algorithm 1, where the step of generating new
  555. # graphs randomly is replaced by the IAM algorithm in reference [2].
  556. #
  557. # notes
  558. # -----
  559. # Every time a set of n better graphs is acquired, their distances in kernel space are
  560. # compared with the k nearest ones, and the k nearest distances from the k+n
  561. # distances will be used as the new ones.
  562. # """
  563. # Gn_median = [Gn[idx].copy() for idx in idx_gi]
  564. # # compute k nearest neighbors of phi in DN.
  565. # dis_list = [] # distance between g_star and each graph.
  566. # for ig, g in tqdm(enumerate(Gn), desc='computing distances', file=sys.stdout):
  567. # dtemp = dis_gstar(ig, idx_gi, alpha, Kmatrix)
  568. ## dtemp = k_list[ig] - 2 * (alpha * k_g1_list[ig] + (1 - alpha) *
  569. ## k_g2_list[ig]) + (alpha * alpha * k_list[0] + alpha *
  570. ## (1 - alpha) * k_g2_list[0] + (1 - alpha) * alpha *
  571. ## k_g1_list[6] + (1 - alpha) * (1 - alpha) * k_list[6])
  572. # dis_list.append(dtemp)
  573. #
  574. # # sort
  575. # sort_idx = np.argsort(dis_list)
  576. # dis_gs = [dis_list[idis] for idis in sort_idx[0:k]] # the k shortest distances
  577. # nb_best = len(np.argwhere(dis_gs == dis_gs[0]).flatten().tolist())
  578. # g0hat_list = [Gn[idx] for idx in sort_idx[0:nb_best]] # the nearest neighbors of phi in DN
  579. # if dis_gs[0] == 0: # the exact pre-image.
  580. # print('The exact pre-image is found from the input dataset.')
  581. # return 0, g0hat_list
  582. # dhat = dis_gs[0] # the nearest distance
  583. # ghat_list = [g.copy() for g in g0hat_list]
  584. # for g in ghat_list:
  585. # nx.draw_networkx(g)
  586. # plt.show()
  587. # print(g.nodes(data=True))
  588. # print(g.edges(data=True))
  589. # Gk = [Gn[ig].copy() for ig in sort_idx[0:k]] # the k nearest neighbors
  590. # for gi in Gk:
  591. # nx.draw_networkx(gi)
  592. # plt.show()
  593. # print(gi.nodes(data=True))
  594. # print(gi.edges(data=True))
  595. # Gs_nearest = Gk.copy()
  596. ## gihat_list = []
  597. #
  598. ## i = 1
  599. # r = 1
  600. # while r < r_max:
  601. # print('r =', r)
  602. ## found = False
  603. ## Gs_nearest = Gk + gihat_list
  604. ## g_tmp = iam(Gs_nearest)
  605. # g_tmp_list = test_iam_moreGraphsAsInit_tryAllPossibleBestGraphs_deleteNodesInIterations(
  606. # Gn_median, Gs_nearest, c_ei=1, c_er=1, c_es=1)
  607. # for g in g_tmp_list:
  608. # nx.draw_networkx(g)
  609. # plt.show()
  610. # print(g.nodes(data=True))
  611. # print(g.edges(data=True))
  612. #
  613. # # compute distance between \psi and the new generated graphs.
  614. # gi_list = [Gn[i] for i in idx_gi]
  615. # knew = compute_kernel(g_tmp_list + gi_list, 'marginalizedkernel', False)
  616. # dnew_list = []
  617. # for idx, g_tmp in enumerate(g_tmp_list):
  618. # dnew_list.append(dis_gstar(idx, range(len(g_tmp_list),
  619. # len(g_tmp_list) + len(gi_list) + 1), alpha, knew))
  620. #
  621. ## dnew = knew[0, 0] - 2 * (alpha[0] * knew[0, 1] + alpha[1] *
  622. ## knew[0, 2]) + (alpha[0] * alpha[0] * k_list[0] + alpha[0] *
  623. ## alpha[1] * k_g2_list[0] + alpha[1] * alpha[0] *
  624. ## k_g1_list[1] + alpha[1] * alpha[1] * k_list[1])
  625. #
  626. # # find the new k nearest graphs.
  627. # dis_gs = dnew_list + dis_gs # add the new nearest distances.
  628. # Gs_nearest = [g.copy() for g in g_tmp_list] + Gs_nearest # add the corresponding graphs.
  629. # sort_idx = np.argsort(dis_gs)
  630. # if len([i for i in sort_idx[0:k] if i < len(dnew_list)]) > 0:
  631. # print('We got better k nearest neighbors! Hurray!')
  632. # dis_gs = [dis_gs[idx] for idx in sort_idx[0:k]] # the new k nearest distances.
  633. # print(dis_gs[-1])
  634. # Gs_nearest = [Gs_nearest[idx] for idx in sort_idx[0:k]]
  635. # nb_best = len(np.argwhere(dis_gs == dis_gs[0]).flatten().tolist())
  636. # if len([i for i in sort_idx[0:nb_best] if i < len(dnew_list)]) > 0:
  637. # print('I have smaller or equal distance!')
  638. # dhat = dis_gs[0]
  639. # print(str(dhat) + '->' + str(dhat))
  640. # idx_best_list = np.argwhere(dnew_list == dhat).flatten().tolist()
  641. # ghat_list = [g_tmp_list[idx].copy() for idx in idx_best_list]
  642. # for g in ghat_list:
  643. # nx.draw_networkx(g)
  644. # plt.show()
  645. # print(g.nodes(data=True))
  646. # print(g.edges(data=True))
  647. # r = 0
  648. # else:
  649. # r += 1
  650. #
  651. # return dhat, ghat_list

A Python package for graph kernels, graph edit distances and graph pre-image problem.