You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

gk_iam.py 35 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761
  1. #!/usr/bin/env python3
  2. # -*- coding: utf-8 -*-
  3. """
  4. Created on Tue Apr 30 17:07:43 2019
  5. A graph pre-image method combining iterative pre-image method in reference [1]
  6. and the iterative alternate minimizations (IAM) in reference [2].
  7. @author: ljia
  8. @references:
  9. [1] Gökhan H Bakir, Alexander Zien, and Koji Tsuda. Learning to and graph
  10. pre-images. In Joint Pattern Re ognition Symposium , pages 253-261. Springer, 2004.
  11. [2] Generalized median graph via iterative alternate minimization.
  12. """
  13. import sys
  14. import numpy as np
  15. import multiprocessing
  16. from tqdm import tqdm
  17. import networkx as nx
  18. import matplotlib.pyplot as plt
  19. import random
  20. import matplotlib.pyplot as plt
  21. from iam import iam, test_iam_with_more_graphs_as_init, iam_moreGraphsAsInit_tryAllPossibleBestGraphs
  22. sys.path.insert(0, "../")
  23. from pygraph.kernels.marginalizedKernel import marginalizedkernel
  24. from pygraph.kernels.untilHPathKernel import untilhpathkernel
  25. from pygraph.kernels.spKernel import spkernel
  26. import functools
  27. from pygraph.utils.kernels import deltakernel, gaussiankernel, kernelproduct
  28. from pygraph.kernels.structuralspKernel import structuralspkernel
  29. from median import draw_Letter_graph
  30. def gk_iam(Gn, alpha):
  31. """This function constructs graph pre-image by the iterative pre-image
  32. framework in reference [1], algorithm 1, where the step of generating new
  33. graphs randomly is replaced by the IAM algorithm in reference [2].
  34. notes
  35. -----
  36. Every time a better graph is acquired, the older one is replaced by it.
  37. """
  38. pass
  39. # # compute k nearest neighbors of phi in DN.
  40. # dis_list = [] # distance between g_star and each graph.
  41. # for ig, g in tqdm(enumerate(Gn), desc='computing distances', file=sys.stdout):
  42. # dtemp = k_list[ig] - 2 * (alpha * k_g1_list[ig] + (1 - alpha) *
  43. # k_g2_list[ig]) + (alpha * alpha * k_list[idx1] + alpha *
  44. # (1 - alpha) * k_g2_list[idx1] + (1 - alpha) * alpha *
  45. # k_g1_list[idx2] + (1 - alpha) * (1 - alpha) * k_list[idx2])
  46. # dis_list.append(dtemp)
  47. #
  48. # # sort
  49. # sort_idx = np.argsort(dis_list)
  50. # dis_gs = [dis_list[idis] for idis in sort_idx[0:k]]
  51. # g0hat = Gn[sort_idx[0]] # the nearest neighbor of phi in DN
  52. # if dis_gs[0] == 0: # the exact pre-image.
  53. # print('The exact pre-image is found from the input dataset.')
  54. # return 0, g0hat
  55. # dhat = dis_gs[0] # the nearest distance
  56. # Gk = [Gn[ig] for ig in sort_idx[0:k]] # the k nearest neighbors
  57. # gihat_list = []
  58. #
  59. ## i = 1
  60. # r = 1
  61. # while r < r_max:
  62. # print('r =', r)
  63. ## found = False
  64. # Gs_nearest = Gk + gihat_list
  65. # g_tmp = iam(Gs_nearest)
  66. #
  67. # # compute distance between \psi and the new generated graph.
  68. # knew = marginalizedkernel([g_tmp, g1, g2], node_label='atom', edge_label=None,
  69. # p_quit=lmbda, n_iteration=20, remove_totters=False,
  70. # n_jobs=multiprocessing.cpu_count(), verbose=False)
  71. # dnew = knew[0][0, 0] - 2 * (alpha * knew[0][0, 1] + (1 - alpha) *
  72. # knew[0][0, 2]) + (alpha * alpha * k_list[idx1] + alpha *
  73. # (1 - alpha) * k_g2_list[idx1] + (1 - alpha) * alpha *
  74. # k_g1_list[idx2] + (1 - alpha) * (1 - alpha) * k_list[idx2])
  75. # if dnew <= dhat: # the new distance is smaller
  76. # print('I am smaller!')
  77. # dhat = dnew
  78. # g_new = g_tmp.copy() # found better graph.
  79. # gihat_list = [g_new]
  80. # dis_gs.append(dhat)
  81. # r = 0
  82. # else:
  83. # r += 1
  84. #
  85. # ghat = ([g0hat] if len(gihat_list) == 0 else gihat_list)
  86. #
  87. # return dhat, ghat
  88. def gk_iam_nearest(Gn, alpha, idx_gi, Kmatrix, k, r_max):
  89. """This function constructs graph pre-image by the iterative pre-image
  90. framework in reference [1], algorithm 1, where the step of generating new
  91. graphs randomly is replaced by the IAM algorithm in reference [2].
  92. notes
  93. -----
  94. Every time a better graph is acquired, its distance in kernel space is
  95. compared with the k nearest ones, and the k nearest distances from the k+1
  96. distances will be used as the new ones.
  97. """
  98. # compute k nearest neighbors of phi in DN.
  99. dis_list = [] # distance between g_star and each graph.
  100. for ig, g in tqdm(enumerate(Gn), desc='computing distances', file=sys.stdout):
  101. dtemp = dis_gstar(ig, idx_gi, alpha, Kmatrix)
  102. # dtemp = k_list[ig] - 2 * (alpha * k_g1_list[ig] + (1 - alpha) *
  103. # k_g2_list[ig]) + (alpha * alpha * k_list[0] + alpha *
  104. # (1 - alpha) * k_g2_list[0] + (1 - alpha) * alpha *
  105. # k_g1_list[6] + (1 - alpha) * (1 - alpha) * k_list[6])
  106. dis_list.append(dtemp)
  107. # sort
  108. sort_idx = np.argsort(dis_list)
  109. dis_gs = [dis_list[idis] for idis in sort_idx[0:k]] # the k shortest distances
  110. g0hat = Gn[sort_idx[0]] # the nearest neighbor of phi in DN
  111. if dis_gs[0] == 0: # the exact pre-image.
  112. print('The exact pre-image is found from the input dataset.')
  113. return 0, g0hat
  114. dhat = dis_gs[0] # the nearest distance
  115. ghat = g0hat.copy()
  116. Gk = [Gn[ig].copy() for ig in sort_idx[0:k]] # the k nearest neighbors
  117. for gi in Gk:
  118. nx.draw_networkx(gi)
  119. plt.show()
  120. print(gi.nodes(data=True))
  121. print(gi.edges(data=True))
  122. Gs_nearest = Gk.copy()
  123. # gihat_list = []
  124. # i = 1
  125. r = 1
  126. while r < r_max:
  127. print('r =', r)
  128. # found = False
  129. # Gs_nearest = Gk + gihat_list
  130. # g_tmp = iam(Gs_nearest)
  131. g_tmp = test_iam_with_more_graphs_as_init(Gs_nearest, Gs_nearest, c_ei=1, c_er=1, c_es=1)
  132. nx.draw_networkx(g_tmp)
  133. plt.show()
  134. print(g_tmp.nodes(data=True))
  135. print(g_tmp.edges(data=True))
  136. # compute distance between \psi and the new generated graph.
  137. gi_list = [Gn[i] for i in idx_gi]
  138. knew = compute_kernel([g_tmp] + gi_list, 'untilhpathkernel', False)
  139. dnew = dis_gstar(0, range(1, len(gi_list) + 1), alpha, knew)
  140. # dnew = knew[0, 0] - 2 * (alpha[0] * knew[0, 1] + alpha[1] *
  141. # knew[0, 2]) + (alpha[0] * alpha[0] * k_list[0] + alpha[0] *
  142. # alpha[1] * k_g2_list[0] + alpha[1] * alpha[0] *
  143. # k_g1_list[1] + alpha[1] * alpha[1] * k_list[1])
  144. if dnew <= dhat and g_tmp != ghat: # the new distance is smaller
  145. print('I am smaller!')
  146. print(str(dhat) + '->' + str(dnew))
  147. # nx.draw_networkx(ghat)
  148. # plt.show()
  149. # print('->')
  150. # nx.draw_networkx(g_tmp)
  151. # plt.show()
  152. dhat = dnew
  153. g_new = g_tmp.copy() # found better graph.
  154. ghat = g_tmp.copy()
  155. dis_gs.append(dhat) # add the new nearest distance.
  156. Gs_nearest.append(g_new) # add the corresponding graph.
  157. sort_idx = np.argsort(dis_gs)
  158. dis_gs = [dis_gs[idx] for idx in sort_idx[0:k]] # the new k nearest distances.
  159. Gs_nearest = [Gs_nearest[idx] for idx in sort_idx[0:k]]
  160. r = 0
  161. else:
  162. r += 1
  163. return dhat, ghat
  164. #def gk_iam_nearest_multi(Gn, alpha, idx_gi, Kmatrix, k, r_max):
  165. # """This function constructs graph pre-image by the iterative pre-image
  166. # framework in reference [1], algorithm 1, where the step of generating new
  167. # graphs randomly is replaced by the IAM algorithm in reference [2].
  168. #
  169. # notes
  170. # -----
  171. # Every time a set of n better graphs is acquired, their distances in kernel space are
  172. # compared with the k nearest ones, and the k nearest distances from the k+n
  173. # distances will be used as the new ones.
  174. # """
  175. # Gn_median = [Gn[idx].copy() for idx in idx_gi]
  176. # # compute k nearest neighbors of phi in DN.
  177. # dis_list = [] # distance between g_star and each graph.
  178. # for ig, g in tqdm(enumerate(Gn), desc='computing distances', file=sys.stdout):
  179. # dtemp = dis_gstar(ig, idx_gi, alpha, Kmatrix)
  180. ## dtemp = k_list[ig] - 2 * (alpha * k_g1_list[ig] + (1 - alpha) *
  181. ## k_g2_list[ig]) + (alpha * alpha * k_list[0] + alpha *
  182. ## (1 - alpha) * k_g2_list[0] + (1 - alpha) * alpha *
  183. ## k_g1_list[6] + (1 - alpha) * (1 - alpha) * k_list[6])
  184. # dis_list.append(dtemp)
  185. #
  186. # # sort
  187. # sort_idx = np.argsort(dis_list)
  188. # dis_gs = [dis_list[idis] for idis in sort_idx[0:k]] # the k shortest distances
  189. # nb_best = len(np.argwhere(dis_gs == dis_gs[0]).flatten().tolist())
  190. # g0hat_list = [Gn[idx] for idx in sort_idx[0:nb_best]] # the nearest neighbors of phi in DN
  191. # if dis_gs[0] == 0: # the exact pre-image.
  192. # print('The exact pre-image is found from the input dataset.')
  193. # return 0, g0hat_list
  194. # dhat = dis_gs[0] # the nearest distance
  195. # ghat_list = [g.copy() for g in g0hat_list]
  196. # for g in ghat_list:
  197. # nx.draw_networkx(g)
  198. # plt.show()
  199. # print(g.nodes(data=True))
  200. # print(g.edges(data=True))
  201. # Gk = [Gn[ig].copy() for ig in sort_idx[0:k]] # the k nearest neighbors
  202. # for gi in Gk:
  203. # nx.draw_networkx(gi)
  204. # plt.show()
  205. # print(gi.nodes(data=True))
  206. # print(gi.edges(data=True))
  207. # Gs_nearest = Gk.copy()
  208. ## gihat_list = []
  209. #
  210. ## i = 1
  211. # r = 1
  212. # while r < r_max:
  213. # print('r =', r)
  214. ## found = False
  215. ## Gs_nearest = Gk + gihat_list
  216. ## g_tmp = iam(Gs_nearest)
  217. # g_tmp_list = test_iam_moreGraphsAsInit_tryAllPossibleBestGraphs_deleteNodesInIterations(
  218. # Gn_median, Gs_nearest, c_ei=1, c_er=1, c_es=1)
  219. # for g in g_tmp_list:
  220. # nx.draw_networkx(g)
  221. # plt.show()
  222. # print(g.nodes(data=True))
  223. # print(g.edges(data=True))
  224. #
  225. # # compute distance between \psi and the new generated graphs.
  226. # gi_list = [Gn[i] for i in idx_gi]
  227. # knew = compute_kernel(g_tmp_list + gi_list, 'marginalizedkernel', False)
  228. # dnew_list = []
  229. # for idx, g_tmp in enumerate(g_tmp_list):
  230. # dnew_list.append(dis_gstar(idx, range(len(g_tmp_list),
  231. # len(g_tmp_list) + len(gi_list) + 1), alpha, knew))
  232. #
  233. ## dnew = knew[0, 0] - 2 * (alpha[0] * knew[0, 1] + alpha[1] *
  234. ## knew[0, 2]) + (alpha[0] * alpha[0] * k_list[0] + alpha[0] *
  235. ## alpha[1] * k_g2_list[0] + alpha[1] * alpha[0] *
  236. ## k_g1_list[1] + alpha[1] * alpha[1] * k_list[1])
  237. #
  238. # # find the new k nearest graphs.
  239. # dis_gs = dnew_list + dis_gs # add the new nearest distances.
  240. # Gs_nearest = [g.copy() for g in g_tmp_list] + Gs_nearest # add the corresponding graphs.
  241. # sort_idx = np.argsort(dis_gs)
  242. # if len([i for i in sort_idx[0:k] if i < len(dnew_list)]) > 0:
  243. # print('We got better k nearest neighbors! Hurray!')
  244. # dis_gs = [dis_gs[idx] for idx in sort_idx[0:k]] # the new k nearest distances.
  245. # print(dis_gs[-1])
  246. # Gs_nearest = [Gs_nearest[idx] for idx in sort_idx[0:k]]
  247. # nb_best = len(np.argwhere(dis_gs == dis_gs[0]).flatten().tolist())
  248. # if len([i for i in sort_idx[0:nb_best] if i < len(dnew_list)]) > 0:
  249. # print('I have smaller or equal distance!')
  250. # dhat = dis_gs[0]
  251. # print(str(dhat) + '->' + str(dhat))
  252. # idx_best_list = np.argwhere(dnew_list == dhat).flatten().tolist()
  253. # ghat_list = [g_tmp_list[idx].copy() for idx in idx_best_list]
  254. # for g in ghat_list:
  255. # nx.draw_networkx(g)
  256. # plt.show()
  257. # print(g.nodes(data=True))
  258. # print(g.edges(data=True))
  259. # r = 0
  260. # else:
  261. # r += 1
  262. #
  263. # return dhat, ghat_list
  264. def gk_iam_nearest_multi(Gn_init, Gn_median, alpha, idx_gi, Kmatrix, k, r_max,
  265. gkernel, epsilon=0.001,
  266. params_iam={'c_ei': 1, 'c_er': 1, 'c_es': 1,
  267. 'ite_max': 50, 'epsilon': 0.001,
  268. 'removeNodes': True, 'connected': False},
  269. params_ged={'ged_cost': 'CHEM_1', 'ged_method': 'IPFP',
  270. 'saveGXL': 'benoit'}):
  271. """This function constructs graph pre-image by the iterative pre-image
  272. framework in reference [1], algorithm 1, where the step of generating new
  273. graphs randomly is replaced by the IAM algorithm in reference [2].
  274. notes
  275. -----
  276. Every time a set of n better graphs is acquired, their distances in kernel space are
  277. compared with the k nearest ones, and the k nearest distances from the k+n
  278. distances will be used as the new ones.
  279. """
  280. # compute k nearest neighbors of phi in DN.
  281. dis_list = [] # distance between g_star and each graph.
  282. term3 = 0
  283. for i1, a1 in enumerate(alpha):
  284. for i2, a2 in enumerate(alpha):
  285. term3 += a1 * a2 * Kmatrix[idx_gi[i1], idx_gi[i2]]
  286. for ig, g in tqdm(enumerate(Gn_init), desc='computing distances', file=sys.stdout):
  287. dtemp = dis_gstar(ig, idx_gi, alpha, Kmatrix, term3=term3)
  288. # dtemp = k_list[ig] - 2 * (alpha * k_g1_list[ig] + (1 - alpha) *
  289. # k_g2_list[ig]) + (alpha * alpha * k_list[0] + alpha *
  290. # (1 - alpha) * k_g2_list[0] + (1 - alpha) * alpha *
  291. # k_g1_list[6] + (1 - alpha) * (1 - alpha) * k_list[6])
  292. dis_list.append(dtemp)
  293. # sort
  294. sort_idx = np.argsort(dis_list)
  295. dis_gs = [dis_list[idis] for idis in sort_idx[0:k]] # the k shortest distances
  296. nb_best = len(np.argwhere(dis_gs == dis_gs[0]).flatten().tolist())
  297. g0hat_list = [Gn_init[idx] for idx in sort_idx[0:nb_best]] # the nearest neighbors of phi in DN
  298. if dis_gs[0] == 0: # the exact pre-image.
  299. print('The exact pre-image is found from the input dataset.')
  300. return 0, g0hat_list, 0, 0
  301. dhat = dis_gs[0] # the nearest distance
  302. ghat_list = [g.copy() for g in g0hat_list]
  303. # for g in ghat_list:
  304. # draw_Letter_graph(g)
  305. # nx.draw_networkx(g)
  306. # plt.show()
  307. # print(g.nodes(data=True))
  308. # print(g.edges(data=True))
  309. Gk = [Gn_init[ig].copy() for ig in sort_idx[0:k]] # the k nearest neighbors
  310. for gi in Gk:
  311. nx.draw(gi, labels=nx.get_node_attributes(gi, 'atom'), with_labels=True)
  312. # nx.draw_networkx(gi)
  313. plt.show()
  314. # draw_Letter_graph(g)
  315. print(gi.nodes(data=True))
  316. print(gi.edges(data=True))
  317. Gs_nearest = [g.copy() for g in Gk]
  318. Gn_nearest_median = [g.copy() for g in Gs_nearest]
  319. # gihat_list = []
  320. # i = 1
  321. r = 0
  322. itr_total = 0
  323. # cur_dis = dhat
  324. # old_dis = cur_dis * 2
  325. dis_list = [dhat]
  326. found = False
  327. nb_updated = 0
  328. while r < r_max:# and not found: # @todo: if not found?# and np.abs(old_dis - cur_dis) > epsilon:
  329. print('\nCurrent preimage iteration =', r)
  330. print('Total preimage iteration =', itr_total, '\n')
  331. found = False
  332. # Gs_nearest = Gk + gihat_list
  333. # g_tmp = iam(Gs_nearest)
  334. g_tmp_list, _ = iam_moreGraphsAsInit_tryAllPossibleBestGraphs(
  335. Gn_nearest_median, Gs_nearest, params_ged=params_ged, **params_iam)
  336. # for g in g_tmp_list:
  337. # nx.draw_networkx(g)
  338. # plt.show()
  339. # draw_Letter_graph(g)
  340. # print(g.nodes(data=True))
  341. # print(g.edges(data=True))
  342. # compute distance between \psi and the new generated graphs.
  343. knew = compute_kernel(g_tmp_list + Gn_median, gkernel, False)
  344. dnew_list = []
  345. for idx, g_tmp in enumerate(g_tmp_list):
  346. # @todo: the term3 below could use the one at the beginning of the function.
  347. dnew_list.append(dis_gstar(idx, range(len(g_tmp_list),
  348. len(g_tmp_list) + len(Gn_median) + 1),
  349. alpha, knew, withterm3=False))
  350. # dnew = knew[0, 0] - 2 * (alpha[0] * knew[0, 1] + alpha[1] *
  351. # knew[0, 2]) + (alpha[0] * alpha[0] * k_list[0] + alpha[0] *
  352. # alpha[1] * k_g2_list[0] + alpha[1] * alpha[0] *
  353. # k_g1_list[1] + alpha[1] * alpha[1] * k_list[1])
  354. # # find the new k nearest graphs.
  355. # dnew_best = min(dnew_list)
  356. # dis_gs = dnew_list + dis_gs # add the new nearest distances.
  357. # Gs_nearest = [g.copy() for g in g_tmp_list] + Gs_nearest # add the corresponding graphs.
  358. # sort_idx = np.argsort(dis_gs)
  359. # if len([i for i in sort_idx[0:k] if i < len(dnew_list)]) > 0:
  360. # print('We got new k nearest neighbors! Hurray!')
  361. # dis_gs = [dis_gs[idx] for idx in sort_idx[0:k]] # the new k nearest distances.
  362. ## print(dis_gs[-1])
  363. # Gs_nearest = [Gs_nearest[idx] for idx in sort_idx[0:k]]
  364. # nb_best = len(np.argwhere(dis_gs == dis_gs[0]).flatten().tolist())
  365. # if dnew_best < dhat and np.abs(dnew_best - dhat) > epsilon:
  366. # print('I have smaller distance!')
  367. # print(str(dhat) + '->' + str(dis_gs[0]))
  368. # dhat = dis_gs[0]
  369. # idx_best_list = np.argwhere(dnew_list == dhat).flatten().tolist()
  370. # ghat_list = [g_tmp_list[idx].copy() for idx in idx_best_list]
  371. ## for g in ghat_list:
  372. ### nx.draw_networkx(g)
  373. ### plt.show()
  374. ## draw_Letter_graph(g)
  375. ## print(g.nodes(data=True))
  376. ## print(g.edges(data=True))
  377. # r = 0
  378. # found = True
  379. # nb_updated += 1
  380. # elif np.abs(dnew_best - dhat) < epsilon:
  381. # print('I have almost equal distance!')
  382. # print(str(dhat) + '->' + str(dnew_best))
  383. # else:
  384. # dis_gs = [dis_gs[idx] for idx in sort_idx[0:k]]
  385. # Gs_nearest = [Gs_nearest[idx] for idx in sort_idx[0:k]]
  386. # Gn_nearest_median = [g.copy() for g in Gs_nearest]
  387. # if not found:
  388. # r += 1
  389. # find the new k nearest graphs.
  390. dnew_best = min(dnew_list)
  391. if np.abs(dnew_best - dhat) >= epsilon:
  392. dis_gs = dnew_list + dis_gs # add the new nearest distances.
  393. Gs_nearest = [g.copy() for g in g_tmp_list] + Gs_nearest # add the corresponding graphs.
  394. sort_idx = np.argsort(dis_gs)
  395. else: # if the new distance is equal to the old one.
  396. # @todo: works if only one graph is generated.
  397. Gs_nearest[0] = g_tmp_list[0].copy()
  398. sort_idx = np.argsort(dis_gs)
  399. if len([i for i in sort_idx[0:k] if i < len(dnew_list)]) > 0:
  400. print('We got new k nearest neighbors! Hurray!')
  401. dis_gs = [dis_gs[idx] for idx in sort_idx[0:k]] # the new k nearest distances.
  402. # print(dis_gs[-1])
  403. Gs_nearest = [Gs_nearest[idx] for idx in sort_idx[0:k]]
  404. nb_best = len(np.argwhere(dis_gs == dis_gs[0]).flatten().tolist())
  405. if dnew_best < dhat and np.abs(dnew_best - dhat) >= epsilon:
  406. print('I have smaller distance!')
  407. print(str(dhat) + '->' + str(dis_gs[0]))
  408. dhat = dis_gs[0]
  409. idx_best_list = np.argwhere(dnew_list == dhat).flatten().tolist()
  410. ghat_list = [g_tmp_list[idx].copy() for idx in idx_best_list]
  411. # for g in ghat_list:
  412. ## nx.draw_networkx(g)
  413. ## plt.show()
  414. # draw_Letter_graph(g)
  415. # print(g.nodes(data=True))
  416. # print(g.edges(data=True))
  417. r = 0
  418. found = True
  419. nb_updated += 1
  420. elif np.abs(dnew_best - dhat) < epsilon:
  421. print('I have almost equal distance!')
  422. print(str(dhat) + '->' + str(dnew_best))
  423. else:
  424. dis_gs = [dis_gs[idx] for idx in sort_idx[0:k]]
  425. Gs_nearest = [Gs_nearest[idx] for idx in sort_idx[0:k]]
  426. Gn_nearest_median = [g.copy() for g in Gs_nearest]
  427. if not found:
  428. r += 1
  429. # old_dis = cur_dis
  430. # cur_dis = dnew_best
  431. dis_list.append(dhat)
  432. itr_total += 1
  433. print('\nthe graph is updated', nb_updated, 'times.')
  434. print('distances in kernel space:', dis_list, '\n')
  435. return dhat, ghat_list, dis_list[-1], nb_updated
  436. def preimage_iam_random_mix(Gn_init, Gn_median, alpha, idx_gi, Kmatrix, k, r_max,
  437. l_max, gkernel, epsilon=0.001,
  438. params_iam={'c_ei': 1, 'c_er': 1, 'c_es': 1,
  439. 'ite_max': 50, 'epsilon': 0.001,
  440. 'removeNodes': True, 'connected': False},
  441. params_ged={'ged_cost': 'CHEM_1', 'ged_method': 'IPFP',
  442. 'saveGXL': 'benoit'}):
  443. """This function constructs graph pre-image by the iterative pre-image
  444. framework in reference [1], algorithm 1, where new graphs are generated
  445. randomly and by the IAM algorithm in reference [2].
  446. notes
  447. -----
  448. Every time a set of n better graphs is acquired, their distances in kernel space are
  449. compared with the k nearest ones, and the k nearest distances from the k+n
  450. distances will be used as the new ones.
  451. """
  452. Gn_init = [nx.convert_node_labels_to_integers(g) for g in Gn_init]
  453. # compute k nearest neighbors of phi in DN.
  454. dis_list = [] # distance between g_star and each graph.
  455. term3 = 0
  456. for i1, a1 in enumerate(alpha):
  457. for i2, a2 in enumerate(alpha):
  458. term3 += a1 * a2 * Kmatrix[idx_gi[i1], idx_gi[i2]]
  459. for ig, g in tqdm(enumerate(Gn_init), desc='computing distances', file=sys.stdout):
  460. dtemp = dis_gstar(ig, idx_gi, alpha, Kmatrix, term3=term3)
  461. dis_list.append(dtemp)
  462. # sort
  463. sort_idx = np.argsort(dis_list)
  464. dis_gs = [dis_list[idis] for idis in sort_idx[0:k]] # the k shortest distances
  465. nb_best = len(np.argwhere(dis_gs == dis_gs[0]).flatten().tolist())
  466. g0hat_list = [Gn_init[idx] for idx in sort_idx[0:nb_best]] # the nearest neighbors of phi in DN
  467. if dis_gs[0] == 0: # the exact pre-image.
  468. print('The exact pre-image is found from the input dataset.')
  469. return 0, g0hat_list, 0, 0
  470. dhat = dis_gs[0] # the nearest distance
  471. ghat_list = [g.copy() for g in g0hat_list]
  472. # for g in ghat_list:
  473. # draw_Letter_graph(g)
  474. # nx.draw_networkx(g)
  475. # plt.show()
  476. # print(g.nodes(data=True))
  477. # print(g.edges(data=True))
  478. Gk = [Gn_init[ig].copy() for ig in sort_idx[0:k]] # the k nearest neighbors
  479. for gi in Gk:
  480. nx.draw(gi, labels=nx.get_node_attributes(gi, 'atom'), with_labels=True)
  481. # nx.draw_networkx(gi)
  482. plt.show()
  483. # draw_Letter_graph(g)
  484. print(gi.nodes(data=True))
  485. print(gi.edges(data=True))
  486. Gs_nearest = [g.copy() for g in Gk]
  487. Gn_nearest_median = [g.copy() for g in Gs_nearest]
  488. # gihat_list = []
  489. # i = 1
  490. r = 0
  491. itr_total = 0
  492. # cur_dis = dhat
  493. # old_dis = cur_dis * 2
  494. dis_list = [dhat]
  495. found = False
  496. nb_updated_iam = 0
  497. nb_updated_random = 0
  498. while r < r_max: # and not found: # @todo: if not found?# and np.abs(old_dis - cur_dis) > epsilon:
  499. print('\n-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-')
  500. print('Current preimage iteration =', r)
  501. print('Total preimage iteration =', itr_total, '\n')
  502. found = False
  503. # Gs_nearest = Gk + gihat_list
  504. # g_tmp = iam(Gs_nearest)
  505. g_tmp_list, _ = iam_moreGraphsAsInit_tryAllPossibleBestGraphs(
  506. Gn_nearest_median, Gs_nearest, params_ged=params_ged, **params_iam)
  507. # for g in g_tmp_list:
  508. # nx.draw_networkx(g)
  509. # plt.show()
  510. # draw_Letter_graph(g)
  511. # print(g.nodes(data=True))
  512. # print(g.edges(data=True))
  513. # compute distance between \psi and the new generated graphs.
  514. knew = compute_kernel(g_tmp_list + Gn_median, gkernel, False)
  515. dnew_list = []
  516. for idx, g_tmp in enumerate(g_tmp_list):
  517. # @todo: the term3 below could use the one at the beginning of the function.
  518. dnew_list.append(dis_gstar(idx, range(len(g_tmp_list),
  519. len(g_tmp_list) + len(Gn_median) + 1),
  520. alpha, knew, withterm3=False))
  521. # find the new k nearest graphs.
  522. # @todo: for now only consider the situation when only one graph is generated by IAM.
  523. dnew_best = min(dnew_list)
  524. gnew_best = g_tmp_list[0].copy()
  525. # when new distance is equal to the old one, use random generation.
  526. if np.abs(dnew_best - dhat) < epsilon or dhat < dnew_best:
  527. # Gs_nearest[0] = g_tmp_list[0].copy()
  528. # sort_idx = np.argsort(dis_gs)
  529. print('Distance almost equal or worse, switching to random generation now.')
  530. print(str(dhat) + '->' + str(dnew_best))
  531. if dnew_best > dhat and np.abs(dnew_best - dhat) >= epsilon:
  532. dnew_best = dhat
  533. gnew_best = Gs_nearest[0].copy()
  534. # number of edges to be changed.
  535. # @todo what if the log is negetive? how to choose alpha (scalar)? seems fdgs is always 1.
  536. # fdgs = dnew_best
  537. fdgs = nb_updated_random + 1
  538. if fdgs < 1:
  539. fdgs = 1
  540. fdgs = int(np.ceil(np.log(fdgs)))
  541. if fdgs < 1:
  542. fdgs += 1
  543. # fdgs = nb_updated_random + 1 # @todo:
  544. # @todo: should we use just half of the adjacency matrix for undirected graphs?
  545. nb_vpairs = nx.number_of_nodes(gnew_best) * (nx.number_of_nodes(gnew_best) - 1)
  546. l = 0
  547. while l < l_max:
  548. # add and delete edges.
  549. gtemp = gnew_best.copy()
  550. np.random.seed()
  551. # which edges to change.
  552. # @todo: what if fdgs is bigger than nb_vpairs?
  553. idx_change = random.sample(range(nb_vpairs), fdgs if
  554. fdgs < nb_vpairs else nb_vpairs)
  555. # idx_change = np.random.randint(0, nx.number_of_nodes(gs) *
  556. # (nx.number_of_nodes(gs) - 1), fdgs)
  557. for item in idx_change:
  558. node1 = int(item / (nx.number_of_nodes(gtemp) - 1))
  559. node2 = (item - node1 * (nx.number_of_nodes(gtemp) - 1))
  560. if node2 >= node1: # skip the self pair.
  561. node2 += 1
  562. # @todo: is the randomness correct?
  563. if not gtemp.has_edge(node1, node2):
  564. gtemp.add_edge(node1, node2)
  565. # nx.draw_networkx(gs)
  566. # plt.show()
  567. # nx.draw_networkx(gtemp)
  568. # plt.show()
  569. else:
  570. gtemp.remove_edge(node1, node2)
  571. # nx.draw_networkx(gs)
  572. # plt.show()
  573. # nx.draw_networkx(gtemp)
  574. # plt.show()
  575. # nx.draw_networkx(gtemp)
  576. # plt.show()
  577. # compute distance between \psi and the new generated graph.
  578. knew = compute_kernel([gtemp] + Gn_median, gkernel, verbose=False)
  579. dnew = dis_gstar(0, [1, 2], alpha, knew, withterm3=False)
  580. # @todo: the new distance is smaller or also equal?
  581. if dnew < dnew_best or np.abs(dnew_best - dnew) < epsilon:
  582. if np.abs(dnew_best - dnew) < epsilon:
  583. print('I am equal!')
  584. dnew_best = dnew
  585. gnew_best = gtemp.copy()
  586. else:
  587. print('\nI am smaller!')
  588. print('l =', str(l))
  589. print(dnew_best, '->', dnew)
  590. dis_gs = [dnew] + dis_gs # add the new nearest distances.
  591. Gs_nearest = [gtemp.copy()] + Gs_nearest # add the corresponding graphs.
  592. sort_idx = np.argsort(dis_gs)
  593. dis_gs = [dis_gs[idx] for idx in sort_idx[0:k]] # the new k nearest distances.
  594. Gs_nearest = [Gs_nearest[idx] for idx in sort_idx[0:k]]
  595. Gn_nearest_median = [g.copy() for g in Gs_nearest]
  596. dhat = dnew
  597. nb_updated_random += 1
  598. found = True # found better graph.
  599. r = 0
  600. print('the graph is updated by random generation',
  601. nb_updated_random, 'times.')
  602. nx.draw(gtemp, labels=nx.get_node_attributes(gtemp, 'atom'),
  603. with_labels=True)
  604. ## plt.savefig("results/gk_iam/simple_two/xx" + str(i) + ".png", format="PNG")
  605. plt.show()
  606. break
  607. # nx.draw_networkx(gtemp)
  608. # plt.show()
  609. # print(gtemp.nodes(data=True))
  610. # print(gtemp.edges(data=True))
  611. l += 1
  612. if l == l_max:
  613. r += 1
  614. else: # if the new distance is not equal to the old one.
  615. dis_gs = dnew_list + dis_gs # add the new nearest distances.
  616. Gs_nearest = [nx.convert_node_labels_to_integers(g).copy() for g
  617. in g_tmp_list] + Gs_nearest # add the corresponding graphs.
  618. sort_idx = np.argsort(dis_gs)
  619. if len([i for i in sort_idx[0:k] if i < len(dnew_list)]) > 0:
  620. print('We got new k nearest neighbors! Hurray!')
  621. dis_gs = [dis_gs[idx] for idx in sort_idx[0:k]] # the new k nearest distances.
  622. # print(dis_gs[-1])
  623. Gs_nearest = [Gs_nearest[idx] for idx in sort_idx[0:k]]
  624. nb_best = len(np.argwhere(dis_gs == dis_gs[0]).flatten().tolist())
  625. if dnew_best < dhat:
  626. print('I have smaller distance!')
  627. print(str(dhat) + '->' + str(dis_gs[0]))
  628. dhat = dis_gs[0]
  629. idx_best_list = np.argwhere(dnew_list == dhat).flatten().tolist()
  630. ghat_list = [g_tmp_list[idx].copy() for idx in idx_best_list]
  631. # for g in ghat_list:
  632. ## nx.draw_networkx(g)
  633. ## plt.show()
  634. # draw_Letter_graph(g)
  635. # print(g.nodes(data=True))
  636. # print(g.edges(data=True))
  637. r = 0
  638. found = True
  639. nb_updated_iam += 1
  640. print('the graph is updated by IAM', nb_updated_iam, 'times.')
  641. nx.draw(ghat_list[0], labels=nx.get_node_attributes(ghat_list[0], 'atom'),
  642. with_labels=True)
  643. ## plt.savefig("results/gk_iam/simple_two/xx" + str(i) + ".png", format="PNG")
  644. plt.show()
  645. else:
  646. dis_gs = [dis_gs[idx] for idx in sort_idx[0:k]]
  647. Gs_nearest = [Gs_nearest[idx] for idx in sort_idx[0:k]]
  648. Gn_nearest_median = [g.copy() for g in Gs_nearest]
  649. if not found:
  650. r += 1
  651. # old_dis = cur_dis
  652. # cur_dis = dnew_best
  653. dis_list.append(dhat)
  654. itr_total += 1
  655. print('\nthe k shortest distances are', dis_gs)
  656. print('the shortest distances for previous iterations are', dis_list)
  657. print('\nthe graph is updated by IAM', nb_updated_iam, 'times, and by random generation',
  658. nb_updated_random, 'times.')
  659. print('distances in kernel space:', dis_list, '\n')
  660. return dhat, ghat_list, dis_list[-1], nb_updated_iam, nb_updated_random
  661. ###############################################################################
  662. # useful functions.
  663. def dis_gstar(idx_g, idx_gi, alpha, Kmatrix, term3=0, withterm3=True):
  664. term1 = Kmatrix[idx_g, idx_g]
  665. term2 = 0
  666. for i, a in enumerate(alpha):
  667. term2 += a * Kmatrix[idx_g, idx_gi[i]]
  668. term2 *= 2
  669. if withterm3 == False:
  670. for i1, a1 in enumerate(alpha):
  671. for i2, a2 in enumerate(alpha):
  672. term3 += a1 * a2 * Kmatrix[idx_gi[i1], idx_gi[i2]]
  673. return np.sqrt(term1 - term2 + term3)
  674. def compute_kernel(Gn, graph_kernel, verbose):
  675. if graph_kernel == 'marginalizedkernel':
  676. Kmatrix, _ = marginalizedkernel(Gn, node_label='atom', edge_label=None,
  677. p_quit=0.03, n_iteration=10, remove_totters=False,
  678. n_jobs=multiprocessing.cpu_count(), verbose=verbose)
  679. elif graph_kernel == 'untilhpathkernel':
  680. Kmatrix, _ = untilhpathkernel(Gn, node_label='atom', edge_label=None,
  681. depth=10, k_func='MinMax', compute_method='trie',
  682. n_jobs=multiprocessing.cpu_count(), verbose=verbose)
  683. elif graph_kernel == 'spkernel':
  684. mixkernel = functools.partial(kernelproduct, deltakernel, gaussiankernel)
  685. Kmatrix, _, _ = spkernel(Gn, node_label='atom', node_kernels=
  686. {'symb': deltakernel, 'nsymb': gaussiankernel, 'mix': mixkernel},
  687. n_jobs=multiprocessing.cpu_count(), verbose=verbose)
  688. elif graph_kernel == 'structuralspkernel':
  689. mixkernel = functools.partial(kernelproduct, deltakernel, gaussiankernel)
  690. Kmatrix, _ = structuralspkernel(Gn, node_label='atom', node_kernels=
  691. {'symb': deltakernel, 'nsymb': gaussiankernel, 'mix': mixkernel},
  692. n_jobs=multiprocessing.cpu_count(), verbose=verbose)
  693. # normalization
  694. Kmatrix_diag = Kmatrix.diagonal().copy()
  695. for i in range(len(Kmatrix)):
  696. for j in range(i, len(Kmatrix)):
  697. Kmatrix[i][j] /= np.sqrt(Kmatrix_diag[i] * Kmatrix_diag[j])
  698. Kmatrix[j][i] = Kmatrix[i][j]
  699. return Kmatrix
  700. def gram2distances(Kmatrix):
  701. dmatrix = np.zeros((len(Kmatrix), len(Kmatrix)))
  702. for i1 in range(len(Kmatrix)):
  703. for i2 in range(len(Kmatrix)):
  704. dmatrix[i1, i2] = Kmatrix[i1, i1] + Kmatrix[i2, i2] - 2 * Kmatrix[i1, i2]
  705. dmatrix = np.sqrt(dmatrix)
  706. return dmatrix

A Python package for graph kernels, graph edit distances and graph pre-image problem.