You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

preimage_iam.py 34 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705
  1. #!/usr/bin/env python3
  2. # -*- coding: utf-8 -*-
  3. """
  4. Created on Tue Apr 30 17:07:43 2019
  5. A graph pre-image method combining iterative pre-image method in reference [1]
  6. and the iterative alternate minimizations (IAM) in reference [2].
  7. @author: ljia
  8. @references:
  9. [1] Gökhan H Bakir, Alexander Zien, and Koji Tsuda. Learning to and graph
  10. pre-images. In Joint Pattern Re ognition Symposium , pages 253-261. Springer, 2004.
  11. [2] Generalized median graph via iterative alternate minimization.
  12. """
  13. import sys
  14. import numpy as np
  15. from tqdm import tqdm
  16. import networkx as nx
  17. import matplotlib.pyplot as plt
  18. import random
  19. from iam import iam_upgraded
  20. from utils import dis_gstar, compute_kernel
  21. def preimage_iam(Gn_init, Gn_median, alpha, idx_gi, Kmatrix, k, r_max,
  22. gkernel, epsilon=0.001, InitIAMWithAllDk=False,
  23. params_iam={'c_ei': 1, 'c_er': 1, 'c_es': 1,
  24. 'ite_max': 50, 'epsilon': 0.001,
  25. 'removeNodes': True, 'connected': False},
  26. params_ged={'lib': 'gedlibpy', 'cost': 'CHEM_1', 'method': 'IPFP',
  27. 'edit_cost_constant': [], 'stabilizer': 'min',
  28. 'repeat': 50}):
  29. """This function constructs graph pre-image by the iterative pre-image
  30. framework in reference [1], algorithm 1, where the step of generating new
  31. graphs randomly is replaced by the IAM algorithm in reference [2].
  32. notes
  33. -----
  34. Every time a set of n better graphs is acquired, their distances in kernel space are
  35. compared with the k nearest ones, and the k nearest distances from the k+n
  36. distances will be used as the new ones.
  37. """
  38. # compute k nearest neighbors of phi in DN.
  39. dis_all = [] # distance between g_star and each graph.
  40. term3 = 0
  41. for i1, a1 in enumerate(alpha):
  42. for i2, a2 in enumerate(alpha):
  43. term3 += a1 * a2 * Kmatrix[idx_gi[i1], idx_gi[i2]]
  44. for ig, g in tqdm(enumerate(Gn_init), desc='computing distances', file=sys.stdout):
  45. dtemp = dis_gstar(ig, idx_gi, alpha, Kmatrix, term3=term3)
  46. dis_all.append(dtemp)
  47. # sort
  48. sort_idx = np.argsort(dis_all)
  49. dis_k = [dis_all[idis] for idis in sort_idx[0:k]] # the k shortest distances
  50. nb_best = len(np.argwhere(dis_k == dis_k[0]).flatten().tolist())
  51. ghat_list = [Gn_init[idx].copy() for idx in sort_idx[0:nb_best]] # the nearest neighbors of phi in DN
  52. if dis_k[0] == 0: # the exact pre-image.
  53. print('The exact pre-image is found from the input dataset.')
  54. return 0, ghat_list, 0, 0
  55. dhat = dis_k[0] # the nearest distance
  56. # for g in ghat_list:
  57. # draw_Letter_graph(g)
  58. # nx.draw_networkx(g)
  59. # plt.show()
  60. # print(g.nodes(data=True))
  61. # print(g.edges(data=True))
  62. Gk = [Gn_init[ig].copy() for ig in sort_idx[0:k]] # the k nearest neighbors
  63. # for gi in Gk:
  64. # nx.draw(gi, labels=nx.get_node_attributes(gi, 'atom'), with_labels=True)
  65. ## nx.draw_networkx(gi)
  66. # plt.show()
  67. ## draw_Letter_graph(g)
  68. # print(gi.nodes(data=True))
  69. # print(gi.edges(data=True))
  70. # i = 1
  71. r = 0
  72. itr_total = 0
  73. dis_of_each_itr = [dhat]
  74. found = False
  75. nb_updated = 0
  76. nb_updated_k = 0
  77. while r < r_max:# and not found: # @todo: if not found?# and np.abs(old_dis - cur_dis) > epsilon:
  78. print('\n-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-')
  79. print('Current preimage iteration =', r)
  80. print('Total preimage iteration =', itr_total, '\n')
  81. found = False
  82. Gn_nearest_median = [g.copy() for g in Gk]
  83. if InitIAMWithAllDk: # each graph in D_k is used to initialize IAM.
  84. ghat_new_list = []
  85. for g_tmp in Gk:
  86. Gn_nearest_init = [g_tmp.copy()]
  87. ghat_new_list_tmp, _, _ = iam_upgraded(Gn_nearest_median,
  88. Gn_nearest_init, params_ged=params_ged, **params_iam)
  89. ghat_new_list += ghat_new_list_tmp
  90. else: # only the best graph in D_k is used to initialize IAM.
  91. Gn_nearest_init = [g.copy() for g in Gk]
  92. ghat_new_list, _, _ = iam_upgraded(Gn_nearest_median, Gn_nearest_init,
  93. params_ged=params_ged, **params_iam)
  94. # for g in g_tmp_list:
  95. # nx.draw_networkx(g)
  96. # plt.show()
  97. # draw_Letter_graph(g)
  98. # print(g.nodes(data=True))
  99. # print(g.edges(data=True))
  100. # compute distance between \psi and the new generated graphs.
  101. knew = compute_kernel(ghat_new_list + Gn_median, gkernel, False)
  102. dhat_new_list = []
  103. for idx, g_tmp in enumerate(ghat_new_list):
  104. # @todo: the term3 below could use the one at the beginning of the function.
  105. dhat_new_list.append(dis_gstar(idx, range(len(ghat_new_list),
  106. len(ghat_new_list) + len(Gn_median) + 1),
  107. alpha, knew, withterm3=False))
  108. for idx_g, ghat_new in enumerate(ghat_new_list):
  109. dhat_new = dhat_new_list[idx_g]
  110. # if the new distance is smaller than the max of D_k.
  111. if dhat_new < dis_k[-1] and np.abs(dhat_new - dis_k[-1]) >= epsilon:
  112. # check if the new distance is the same as one in D_k.
  113. is_duplicate = False
  114. for dis_tmp in dis_k[1:-1]:
  115. if np.abs(dhat_new - dis_tmp) < epsilon:
  116. is_duplicate = True
  117. print('IAM: duplicate k nearest graph generated.')
  118. break
  119. if not is_duplicate:
  120. if np.abs(dhat_new - dhat) < epsilon:
  121. print('IAM: I am equal!')
  122. # dhat = dhat_new
  123. # ghat_list = [ghat_new.copy()]
  124. else:
  125. print('IAM: we got better k nearest neighbors!')
  126. nb_updated_k += 1
  127. print('the k nearest neighbors are updated',
  128. nb_updated_k, 'times.')
  129. dis_k = [dhat_new] + dis_k[0:k-1] # add the new nearest distance.
  130. Gk = [ghat_new.copy()] + Gk[0:k-1] # add the corresponding graph.
  131. sort_idx = np.argsort(dis_k)
  132. dis_k = [dis_k[idx] for idx in sort_idx[0:k]] # the new k nearest distances.
  133. Gk = [Gk[idx] for idx in sort_idx[0:k]]
  134. if dhat_new < dhat:
  135. print('IAM: I have smaller distance!')
  136. print(str(dhat) + '->' + str(dhat_new))
  137. dhat = dhat_new
  138. ghat_list = [Gk[0].copy()]
  139. r = 0
  140. nb_updated += 1
  141. print('the graph is updated', nb_updated, 'times.')
  142. nx.draw(Gk[0], labels=nx.get_node_attributes(Gk[0], 'atom'),
  143. with_labels=True)
  144. ## plt.savefig("results/gk_iam/simple_two/xx" + str(i) + ".png", format="PNG")
  145. plt.show()
  146. found = True
  147. if not found:
  148. r += 1
  149. dis_of_each_itr.append(dhat)
  150. itr_total += 1
  151. print('\nthe k shortest distances are', dis_k)
  152. print('the shortest distances for previous iterations are', dis_of_each_itr)
  153. print('\n\nthe graph is updated', nb_updated, 'times.')
  154. print('\nthe k nearest neighbors are updated', nb_updated_k, 'times.')
  155. print('distances in kernel space:', dis_of_each_itr, '\n')
  156. return dhat, ghat_list, dis_of_each_itr[-1], nb_updated, nb_updated_k
  157. def preimage_iam_random_mix(Gn_init, Gn_median, alpha, idx_gi, Kmatrix, k, r_max,
  158. l_max, gkernel, epsilon=0.001,
  159. InitIAMWithAllDk=False, InitRandomWithAllDk=True,
  160. params_iam={'c_ei': 1, 'c_er': 1, 'c_es': 1,
  161. 'ite_max': 50, 'epsilon': 0.001,
  162. 'removeNodes': True, 'connected': False},
  163. params_ged={'lib': 'gedlibpy', 'cost': 'CHEM_1',
  164. 'method': 'IPFP', 'edit_cost_constant': [],
  165. 'stabilizer': 'min', 'repeat': 50}):
  166. """This function constructs graph pre-image by the iterative pre-image
  167. framework in reference [1], algorithm 1, where new graphs are generated
  168. randomly and by the IAM algorithm in reference [2].
  169. notes
  170. -----
  171. Every time a set of n better graphs is acquired, their distances in kernel space are
  172. compared with the k nearest ones, and the k nearest distances from the k+n
  173. distances will be used as the new ones.
  174. """
  175. Gn_init = [nx.convert_node_labels_to_integers(g) for g in Gn_init]
  176. # compute k nearest neighbors of phi in DN.
  177. dis_all = [] # distance between g_star and each graph.
  178. term3 = 0
  179. for i1, a1 in enumerate(alpha):
  180. for i2, a2 in enumerate(alpha):
  181. term3 += a1 * a2 * Kmatrix[idx_gi[i1], idx_gi[i2]]
  182. for ig, g in tqdm(enumerate(Gn_init), desc='computing distances', file=sys.stdout):
  183. dtemp = dis_gstar(ig, idx_gi, alpha, Kmatrix, term3=term3)
  184. dis_all.append(dtemp)
  185. # sort
  186. sort_idx = np.argsort(dis_all)
  187. dis_k = [dis_all[idis] for idis in sort_idx[0:k]] # the k shortest distances
  188. nb_best = len(np.argwhere(dis_k == dis_k[0]).flatten().tolist())
  189. ghat_list = [Gn_init[idx].copy() for idx in sort_idx[0:nb_best]] # the nearest neighbors of psi in DN
  190. if dis_k[0] == 0: # the exact pre-image.
  191. print('The exact pre-image is found from the input dataset.')
  192. return 0, ghat_list, 0, 0
  193. dhat = dis_k[0] # the nearest distance
  194. # for g in ghat_list:
  195. # draw_Letter_graph(g)
  196. # nx.draw_networkx(g)
  197. # plt.show()
  198. # print(g.nodes(data=True))
  199. # print(g.edges(data=True))
  200. Gk = [Gn_init[ig].copy() for ig in sort_idx[0:k]] # the k nearest neighbors
  201. # for gi in Gk:
  202. # nx.draw(gi, labels=nx.get_node_attributes(gi, 'atom'), with_labels=True)
  203. ## nx.draw_networkx(gi)
  204. # plt.show()
  205. ## draw_Letter_graph(g)
  206. # print(gi.nodes(data=True))
  207. # print(gi.edges(data=True))
  208. r = 0
  209. itr_total = 0
  210. dis_of_each_itr = [dhat]
  211. nb_updated_iam = 0
  212. nb_updated_k_iam = 0
  213. nb_updated_random = 0
  214. nb_updated_k_random = 0
  215. # is_iam_duplicate = False
  216. while r < r_max: # and not found: # @todo: if not found?# and np.abs(old_dis - cur_dis) > epsilon:
  217. print('\n-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-')
  218. print('Current preimage iteration =', r)
  219. print('Total preimage iteration =', itr_total, '\n')
  220. found_iam = False
  221. Gn_nearest_median = [g.copy() for g in Gk]
  222. if InitIAMWithAllDk: # each graph in D_k is used to initialize IAM.
  223. ghat_new_list = []
  224. for g_tmp in Gk:
  225. Gn_nearest_init = [g_tmp.copy()]
  226. ghat_new_list_tmp, _ = iam_upgraded(Gn_nearest_median,
  227. Gn_nearest_init, params_ged=params_ged, **params_iam)
  228. ghat_new_list += ghat_new_list_tmp
  229. else: # only the best graph in D_k is used to initialize IAM.
  230. Gn_nearest_init = [g.copy() for g in Gk]
  231. ghat_new_list, _ = iam_upgraded(Gn_nearest_median, Gn_nearest_init,
  232. params_ged=params_ged, **params_iam)
  233. # for g in g_tmp_list:
  234. # nx.draw_networkx(g)
  235. # plt.show()
  236. # draw_Letter_graph(g)
  237. # print(g.nodes(data=True))
  238. # print(g.edges(data=True))
  239. # compute distance between \psi and the new generated graphs.
  240. knew = compute_kernel(ghat_new_list + Gn_median, gkernel, False)
  241. dhat_new_list = []
  242. for idx, g_tmp in enumerate(ghat_new_list):
  243. # @todo: the term3 below could use the one at the beginning of the function.
  244. dhat_new_list.append(dis_gstar(idx, range(len(ghat_new_list),
  245. len(ghat_new_list) + len(Gn_median) + 1),
  246. alpha, knew, withterm3=False))
  247. # find the new k nearest graphs.
  248. for idx_g, ghat_new in enumerate(ghat_new_list):
  249. dhat_new = dhat_new_list[idx_g]
  250. # if the new distance is smaller than the max of D_k.
  251. if dhat_new < dis_k[-1] and np.abs(dhat_new - dis_k[-1]) >= epsilon:
  252. # check if the new distance is the same as one in D_k.
  253. is_duplicate = False
  254. for dis_tmp in dis_k[1:-1]:
  255. if np.abs(dhat_new - dis_tmp) < epsilon:
  256. is_duplicate = True
  257. print('IAM: duplicate k nearest graph generated.')
  258. break
  259. if not is_duplicate:
  260. if np.abs(dhat_new - dhat) < epsilon:
  261. print('IAM: I am equal!')
  262. # dhat = dhat_new
  263. # ghat_list = [ghat_new.copy()]
  264. else:
  265. print('IAM: we got better k nearest neighbors!')
  266. nb_updated_k_iam += 1
  267. print('the k nearest neighbors are updated',
  268. nb_updated_k_iam, 'times.')
  269. dis_k = [dhat_new] + dis_k[0:k-1] # add the new nearest distance.
  270. Gk = [ghat_new.copy()] + Gk[0:k-1] # add the corresponding graph.
  271. sort_idx = np.argsort(dis_k)
  272. dis_k = [dis_k[idx] for idx in sort_idx[0:k]] # the new k nearest distances.
  273. Gk = [Gk[idx] for idx in sort_idx[0:k]]
  274. if dhat_new < dhat:
  275. print('IAM: I have smaller distance!')
  276. print(str(dhat) + '->' + str(dhat_new))
  277. dhat = dhat_new
  278. ghat_list = [Gk[0].copy()]
  279. r = 0
  280. nb_updated_iam += 1
  281. print('the graph is updated by IAM', nb_updated_iam,
  282. 'times.')
  283. nx.draw(Gk[0], labels=nx.get_node_attributes(Gk[0], 'atom'),
  284. with_labels=True)
  285. ## plt.savefig("results/gk_iam/simple_two/xx" + str(i) + ".png", format="PNG")
  286. plt.show()
  287. found_iam = True
  288. # when new distance is not smaller than the max of D_k, use random generation.
  289. if not found_iam:
  290. print('Distance not better, switching to random generation now.')
  291. print(str(dhat) + '->' + str(dhat_new))
  292. if InitRandomWithAllDk: # use all k nearest graphs as the initials.
  293. init_list = [g_init.copy() for g_init in Gk]
  294. else: # use just the nearest graph as the initial.
  295. init_list = [Gk[0].copy()]
  296. # number of edges to be changed.
  297. if len(init_list) == 1:
  298. # @todo what if the log is negetive? how to choose alpha (scalar)? seems fdgs is always 1.
  299. # fdgs = dhat_new
  300. fdgs = nb_updated_random + 1
  301. if fdgs < 1:
  302. fdgs = 1
  303. fdgs = int(np.ceil(np.log(fdgs)))
  304. if fdgs < 1:
  305. fdgs += 1
  306. # fdgs = nb_updated_random + 1 # @todo:
  307. fdgs_list = [fdgs]
  308. else:
  309. # @todo what if the log is negetive? how to choose alpha (scalar)?
  310. fdgs_list = np.array(dis_k[:])
  311. if np.min(fdgs_list) < 1:
  312. fdgs_list /= dis_k[0]
  313. fdgs_list = [int(item) for item in np.ceil(np.log(fdgs_list))]
  314. if np.min(fdgs_list) < 1:
  315. fdgs_list = np.array(fdgs_list) + 1
  316. l = 0
  317. found_random = False
  318. while l < l_max and not found_random:
  319. for idx_g, g_tmp in enumerate(init_list):
  320. # add and delete edges.
  321. ghat_new = nx.convert_node_labels_to_integers(g_tmp.copy())
  322. # @todo: should we use just half of the adjacency matrix for undirected graphs?
  323. nb_vpairs = nx.number_of_nodes(ghat_new) * (nx.number_of_nodes(ghat_new) - 1)
  324. np.random.seed()
  325. # which edges to change.
  326. # @todo: what if fdgs is bigger than nb_vpairs?
  327. idx_change = random.sample(range(nb_vpairs), fdgs_list[idx_g] if
  328. fdgs_list[idx_g] < nb_vpairs else nb_vpairs)
  329. # idx_change = np.random.randint(0, nx.number_of_nodes(gs) *
  330. # (nx.number_of_nodes(gs) - 1), fdgs)
  331. for item in idx_change:
  332. node1 = int(item / (nx.number_of_nodes(ghat_new) - 1))
  333. node2 = (item - node1 * (nx.number_of_nodes(ghat_new) - 1))
  334. if node2 >= node1: # skip the self pair.
  335. node2 += 1
  336. # @todo: is the randomness correct?
  337. if not ghat_new.has_edge(node1, node2):
  338. ghat_new.add_edge(node1, node2)
  339. # nx.draw_networkx(gs)
  340. # plt.show()
  341. # nx.draw_networkx(ghat_new)
  342. # plt.show()
  343. else:
  344. ghat_new.remove_edge(node1, node2)
  345. # nx.draw_networkx(gs)
  346. # plt.show()
  347. # nx.draw_networkx(ghat_new)
  348. # plt.show()
  349. # nx.draw_networkx(ghat_new)
  350. # plt.show()
  351. # compute distance between \psi and the new generated graph.
  352. knew = compute_kernel([ghat_new] + Gn_median, gkernel, verbose=False)
  353. dhat_new = dis_gstar(0, range(1, len(Gn_median) + 1),
  354. alpha, knew, withterm3=False)
  355. # @todo: the new distance is smaller or also equal?
  356. if dhat_new < dis_k[-1] and np.abs(dhat_new - dis_k[-1]) >= epsilon:
  357. # check if the new distance is the same as one in D_k.
  358. is_duplicate = False
  359. for dis_tmp in dis_k[1:-1]:
  360. if np.abs(dhat_new - dis_tmp) < epsilon:
  361. is_duplicate = True
  362. print('Random: duplicate k nearest graph generated.')
  363. break
  364. if not is_duplicate:
  365. if np.abs(dhat_new - dhat) < epsilon:
  366. print('Random: I am equal!')
  367. # dhat = dhat_new
  368. # ghat_list = [ghat_new.copy()]
  369. else:
  370. print('Random: we got better k nearest neighbors!')
  371. print('l =', str(l))
  372. nb_updated_k_random += 1
  373. print('the k nearest neighbors are updated by random generation',
  374. nb_updated_k_random, 'times.')
  375. dis_k = [dhat_new] + dis_k # add the new nearest distances.
  376. Gk = [ghat_new.copy()] + Gk # add the corresponding graphs.
  377. sort_idx = np.argsort(dis_k)
  378. dis_k = [dis_k[idx] for idx in sort_idx[0:k]] # the new k nearest distances.
  379. Gk = [Gk[idx] for idx in sort_idx[0:k]]
  380. if dhat_new < dhat:
  381. print('\nRandom: I am smaller!')
  382. print('l =', str(l))
  383. print(dhat, '->', dhat_new)
  384. dhat = dhat_new
  385. ghat_list = [ghat_new.copy()]
  386. r = 0
  387. nb_updated_random += 1
  388. print('the graph is updated by random generation',
  389. nb_updated_random, 'times.')
  390. nx.draw(ghat_new, labels=nx.get_node_attributes(ghat_new, 'atom'),
  391. with_labels=True)
  392. ## plt.savefig("results/gk_iam/simple_two/xx" + str(i) + ".png", format="PNG")
  393. plt.show()
  394. found_random = True
  395. break
  396. l += 1
  397. if not found_random: # l == l_max:
  398. r += 1
  399. dis_of_each_itr.append(dhat)
  400. itr_total += 1
  401. print('\nthe k shortest distances are', dis_k)
  402. print('the shortest distances for previous iterations are', dis_of_each_itr)
  403. print('\n\nthe graph is updated by IAM', nb_updated_iam, 'times, and by random generation',
  404. nb_updated_random, 'times.')
  405. print('\nthe k nearest neighbors are updated by IAM', nb_updated_k_iam,
  406. 'times, and by random generation', nb_updated_k_random, 'times.')
  407. print('distances in kernel space:', dis_of_each_itr, '\n')
  408. return dhat, ghat_list, dis_of_each_itr[-1], \
  409. nb_updated_iam, nb_updated_random, nb_updated_k_iam, nb_updated_k_random
  410. ###############################################################################
  411. # Old implementations.
  412. #def gk_iam(Gn, alpha):
  413. # """This function constructs graph pre-image by the iterative pre-image
  414. # framework in reference [1], algorithm 1, where the step of generating new
  415. # graphs randomly is replaced by the IAM algorithm in reference [2].
  416. #
  417. # notes
  418. # -----
  419. # Every time a better graph is acquired, the older one is replaced by it.
  420. # """
  421. # pass
  422. # # compute k nearest neighbors of phi in DN.
  423. # dis_list = [] # distance between g_star and each graph.
  424. # for ig, g in tqdm(enumerate(Gn), desc='computing distances', file=sys.stdout):
  425. # dtemp = k_list[ig] - 2 * (alpha * k_g1_list[ig] + (1 - alpha) *
  426. # k_g2_list[ig]) + (alpha * alpha * k_list[idx1] + alpha *
  427. # (1 - alpha) * k_g2_list[idx1] + (1 - alpha) * alpha *
  428. # k_g1_list[idx2] + (1 - alpha) * (1 - alpha) * k_list[idx2])
  429. # dis_list.append(dtemp)
  430. #
  431. # # sort
  432. # sort_idx = np.argsort(dis_list)
  433. # dis_gs = [dis_list[idis] for idis in sort_idx[0:k]]
  434. # g0hat = Gn[sort_idx[0]] # the nearest neighbor of phi in DN
  435. # if dis_gs[0] == 0: # the exact pre-image.
  436. # print('The exact pre-image is found from the input dataset.')
  437. # return 0, g0hat
  438. # dhat = dis_gs[0] # the nearest distance
  439. # Gk = [Gn[ig] for ig in sort_idx[0:k]] # the k nearest neighbors
  440. # gihat_list = []
  441. #
  442. ## i = 1
  443. # r = 1
  444. # while r < r_max:
  445. # print('r =', r)
  446. ## found = False
  447. # Gs_nearest = Gk + gihat_list
  448. # g_tmp = iam(Gs_nearest)
  449. #
  450. # # compute distance between \psi and the new generated graph.
  451. # knew = marginalizedkernel([g_tmp, g1, g2], node_label='atom', edge_label=None,
  452. # p_quit=lmbda, n_iteration=20, remove_totters=False,
  453. # n_jobs=multiprocessing.cpu_count(), verbose=False)
  454. # dnew = knew[0][0, 0] - 2 * (alpha * knew[0][0, 1] + (1 - alpha) *
  455. # knew[0][0, 2]) + (alpha * alpha * k_list[idx1] + alpha *
  456. # (1 - alpha) * k_g2_list[idx1] + (1 - alpha) * alpha *
  457. # k_g1_list[idx2] + (1 - alpha) * (1 - alpha) * k_list[idx2])
  458. # if dnew <= dhat: # the new distance is smaller
  459. # print('I am smaller!')
  460. # dhat = dnew
  461. # g_new = g_tmp.copy() # found better graph.
  462. # gihat_list = [g_new]
  463. # dis_gs.append(dhat)
  464. # r = 0
  465. # else:
  466. # r += 1
  467. #
  468. # ghat = ([g0hat] if len(gihat_list) == 0 else gihat_list)
  469. #
  470. # return dhat, ghat
  471. #def gk_iam_nearest(Gn, alpha, idx_gi, Kmatrix, k, r_max):
  472. # """This function constructs graph pre-image by the iterative pre-image
  473. # framework in reference [1], algorithm 1, where the step of generating new
  474. # graphs randomly is replaced by the IAM algorithm in reference [2].
  475. #
  476. # notes
  477. # -----
  478. # Every time a better graph is acquired, its distance in kernel space is
  479. # compared with the k nearest ones, and the k nearest distances from the k+1
  480. # distances will be used as the new ones.
  481. # """
  482. # # compute k nearest neighbors of phi in DN.
  483. # dis_list = [] # distance between g_star and each graph.
  484. # for ig, g in tqdm(enumerate(Gn), desc='computing distances', file=sys.stdout):
  485. # dtemp = dis_gstar(ig, idx_gi, alpha, Kmatrix)
  486. ## dtemp = k_list[ig] - 2 * (alpha * k_g1_list[ig] + (1 - alpha) *
  487. ## k_g2_list[ig]) + (alpha * alpha * k_list[0] + alpha *
  488. ## (1 - alpha) * k_g2_list[0] + (1 - alpha) * alpha *
  489. ## k_g1_list[6] + (1 - alpha) * (1 - alpha) * k_list[6])
  490. # dis_list.append(dtemp)
  491. #
  492. # # sort
  493. # sort_idx = np.argsort(dis_list)
  494. # dis_gs = [dis_list[idis] for idis in sort_idx[0:k]] # the k shortest distances
  495. # g0hat = Gn[sort_idx[0]] # the nearest neighbor of phi in DN
  496. # if dis_gs[0] == 0: # the exact pre-image.
  497. # print('The exact pre-image is found from the input dataset.')
  498. # return 0, g0hat
  499. # dhat = dis_gs[0] # the nearest distance
  500. # ghat = g0hat.copy()
  501. # Gk = [Gn[ig].copy() for ig in sort_idx[0:k]] # the k nearest neighbors
  502. # for gi in Gk:
  503. # nx.draw_networkx(gi)
  504. # plt.show()
  505. # print(gi.nodes(data=True))
  506. # print(gi.edges(data=True))
  507. # Gs_nearest = Gk.copy()
  508. ## gihat_list = []
  509. #
  510. ## i = 1
  511. # r = 1
  512. # while r < r_max:
  513. # print('r =', r)
  514. ## found = False
  515. ## Gs_nearest = Gk + gihat_list
  516. ## g_tmp = iam(Gs_nearest)
  517. # g_tmp = test_iam_with_more_graphs_as_init(Gs_nearest, Gs_nearest, c_ei=1, c_er=1, c_es=1)
  518. # nx.draw_networkx(g_tmp)
  519. # plt.show()
  520. # print(g_tmp.nodes(data=True))
  521. # print(g_tmp.edges(data=True))
  522. #
  523. # # compute distance between \psi and the new generated graph.
  524. # gi_list = [Gn[i] for i in idx_gi]
  525. # knew = compute_kernel([g_tmp] + gi_list, 'untilhpathkernel', False)
  526. # dnew = dis_gstar(0, range(1, len(gi_list) + 1), alpha, knew)
  527. #
  528. ## dnew = knew[0, 0] - 2 * (alpha[0] * knew[0, 1] + alpha[1] *
  529. ## knew[0, 2]) + (alpha[0] * alpha[0] * k_list[0] + alpha[0] *
  530. ## alpha[1] * k_g2_list[0] + alpha[1] * alpha[0] *
  531. ## k_g1_list[1] + alpha[1] * alpha[1] * k_list[1])
  532. # if dnew <= dhat and g_tmp != ghat: # the new distance is smaller
  533. # print('I am smaller!')
  534. # print(str(dhat) + '->' + str(dnew))
  535. ## nx.draw_networkx(ghat)
  536. ## plt.show()
  537. ## print('->')
  538. ## nx.draw_networkx(g_tmp)
  539. ## plt.show()
  540. #
  541. # dhat = dnew
  542. # g_new = g_tmp.copy() # found better graph.
  543. # ghat = g_tmp.copy()
  544. # dis_gs.append(dhat) # add the new nearest distance.
  545. # Gs_nearest.append(g_new) # add the corresponding graph.
  546. # sort_idx = np.argsort(dis_gs)
  547. # dis_gs = [dis_gs[idx] for idx in sort_idx[0:k]] # the new k nearest distances.
  548. # Gs_nearest = [Gs_nearest[idx] for idx in sort_idx[0:k]]
  549. # r = 0
  550. # else:
  551. # r += 1
  552. #
  553. # return dhat, ghat
  554. #def gk_iam_nearest_multi(Gn, alpha, idx_gi, Kmatrix, k, r_max):
  555. # """This function constructs graph pre-image by the iterative pre-image
  556. # framework in reference [1], algorithm 1, where the step of generating new
  557. # graphs randomly is replaced by the IAM algorithm in reference [2].
  558. #
  559. # notes
  560. # -----
  561. # Every time a set of n better graphs is acquired, their distances in kernel space are
  562. # compared with the k nearest ones, and the k nearest distances from the k+n
  563. # distances will be used as the new ones.
  564. # """
  565. # Gn_median = [Gn[idx].copy() for idx in idx_gi]
  566. # # compute k nearest neighbors of phi in DN.
  567. # dis_list = [] # distance between g_star and each graph.
  568. # for ig, g in tqdm(enumerate(Gn), desc='computing distances', file=sys.stdout):
  569. # dtemp = dis_gstar(ig, idx_gi, alpha, Kmatrix)
  570. ## dtemp = k_list[ig] - 2 * (alpha * k_g1_list[ig] + (1 - alpha) *
  571. ## k_g2_list[ig]) + (alpha * alpha * k_list[0] + alpha *
  572. ## (1 - alpha) * k_g2_list[0] + (1 - alpha) * alpha *
  573. ## k_g1_list[6] + (1 - alpha) * (1 - alpha) * k_list[6])
  574. # dis_list.append(dtemp)
  575. #
  576. # # sort
  577. # sort_idx = np.argsort(dis_list)
  578. # dis_gs = [dis_list[idis] for idis in sort_idx[0:k]] # the k shortest distances
  579. # nb_best = len(np.argwhere(dis_gs == dis_gs[0]).flatten().tolist())
  580. # g0hat_list = [Gn[idx] for idx in sort_idx[0:nb_best]] # the nearest neighbors of phi in DN
  581. # if dis_gs[0] == 0: # the exact pre-image.
  582. # print('The exact pre-image is found from the input dataset.')
  583. # return 0, g0hat_list
  584. # dhat = dis_gs[0] # the nearest distance
  585. # ghat_list = [g.copy() for g in g0hat_list]
  586. # for g in ghat_list:
  587. # nx.draw_networkx(g)
  588. # plt.show()
  589. # print(g.nodes(data=True))
  590. # print(g.edges(data=True))
  591. # Gk = [Gn[ig].copy() for ig in sort_idx[0:k]] # the k nearest neighbors
  592. # for gi in Gk:
  593. # nx.draw_networkx(gi)
  594. # plt.show()
  595. # print(gi.nodes(data=True))
  596. # print(gi.edges(data=True))
  597. # Gs_nearest = Gk.copy()
  598. ## gihat_list = []
  599. #
  600. ## i = 1
  601. # r = 1
  602. # while r < r_max:
  603. # print('r =', r)
  604. ## found = False
  605. ## Gs_nearest = Gk + gihat_list
  606. ## g_tmp = iam(Gs_nearest)
  607. # g_tmp_list = test_iam_moreGraphsAsInit_tryAllPossibleBestGraphs_deleteNodesInIterations(
  608. # Gn_median, Gs_nearest, c_ei=1, c_er=1, c_es=1)
  609. # for g in g_tmp_list:
  610. # nx.draw_networkx(g)
  611. # plt.show()
  612. # print(g.nodes(data=True))
  613. # print(g.edges(data=True))
  614. #
  615. # # compute distance between \psi and the new generated graphs.
  616. # gi_list = [Gn[i] for i in idx_gi]
  617. # knew = compute_kernel(g_tmp_list + gi_list, 'marginalizedkernel', False)
  618. # dnew_list = []
  619. # for idx, g_tmp in enumerate(g_tmp_list):
  620. # dnew_list.append(dis_gstar(idx, range(len(g_tmp_list),
  621. # len(g_tmp_list) + len(gi_list) + 1), alpha, knew))
  622. #
  623. ## dnew = knew[0, 0] - 2 * (alpha[0] * knew[0, 1] + alpha[1] *
  624. ## knew[0, 2]) + (alpha[0] * alpha[0] * k_list[0] + alpha[0] *
  625. ## alpha[1] * k_g2_list[0] + alpha[1] * alpha[0] *
  626. ## k_g1_list[1] + alpha[1] * alpha[1] * k_list[1])
  627. #
  628. # # find the new k nearest graphs.
  629. # dis_gs = dnew_list + dis_gs # add the new nearest distances.
  630. # Gs_nearest = [g.copy() for g in g_tmp_list] + Gs_nearest # add the corresponding graphs.
  631. # sort_idx = np.argsort(dis_gs)
  632. # if len([i for i in sort_idx[0:k] if i < len(dnew_list)]) > 0:
  633. # print('We got better k nearest neighbors! Hurray!')
  634. # dis_gs = [dis_gs[idx] for idx in sort_idx[0:k]] # the new k nearest distances.
  635. # print(dis_gs[-1])
  636. # Gs_nearest = [Gs_nearest[idx] for idx in sort_idx[0:k]]
  637. # nb_best = len(np.argwhere(dis_gs == dis_gs[0]).flatten().tolist())
  638. # if len([i for i in sort_idx[0:nb_best] if i < len(dnew_list)]) > 0:
  639. # print('I have smaller or equal distance!')
  640. # dhat = dis_gs[0]
  641. # print(str(dhat) + '->' + str(dhat))
  642. # idx_best_list = np.argwhere(dnew_list == dhat).flatten().tolist()
  643. # ghat_list = [g_tmp_list[idx].copy() for idx in idx_best_list]
  644. # for g in ghat_list:
  645. # nx.draw_networkx(g)
  646. # plt.show()
  647. # print(g.nodes(data=True))
  648. # print(g.edges(data=True))
  649. # r = 0
  650. # else:
  651. # r += 1
  652. #
  653. # return dhat, ghat_list

A Python package for graph kernels, graph edit distances and graph pre-image problem.