You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

gk_iam.py 19 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437
  1. #!/usr/bin/env python3
  2. # -*- coding: utf-8 -*-
  3. """
  4. Created on Tue Apr 30 17:07:43 2019
  5. A graph pre-image method combining iterative pre-image method in reference [1]
  6. and the iterative alternate minimizations (IAM) in reference [2].
  7. @author: ljia
  8. @references:
  9. [1] Gökhan H Bakir, Alexander Zien, and Koji Tsuda. Learning to and graph
  10. pre-images. In Joint Pattern Re ognition Symposium , pages 253-261. Springer, 2004.
  11. [2] Generalized median graph via iterative alternate minimization.
  12. """
  13. import sys
  14. import numpy as np
  15. import multiprocessing
  16. from tqdm import tqdm
  17. import networkx as nx
  18. import matplotlib.pyplot as plt
  19. from iam import iam, test_iam_with_more_graphs_as_init, test_iam_moreGraphsAsInit_tryAllPossibleBestGraphs_deleteNodesInIterations
  20. sys.path.insert(0, "../")
  21. from pygraph.kernels.marginalizedKernel import marginalizedkernel
  22. from pygraph.kernels.untilHPathKernel import untilhpathkernel
  23. from pygraph.kernels.spKernel import spkernel
  24. import functools
  25. from pygraph.utils.kernels import deltakernel, gaussiankernel, kernelproduct
  26. from pygraph.kernels.structuralspKernel import structuralspkernel
  27. from median import draw_Letter_graph
  28. def gk_iam(Gn, alpha):
  29. """This function constructs graph pre-image by the iterative pre-image
  30. framework in reference [1], algorithm 1, where the step of generating new
  31. graphs randomly is replaced by the IAM algorithm in reference [2].
  32. notes
  33. -----
  34. Every time a better graph is acquired, the older one is replaced by it.
  35. """
  36. pass
  37. # # compute k nearest neighbors of phi in DN.
  38. # dis_list = [] # distance between g_star and each graph.
  39. # for ig, g in tqdm(enumerate(Gn), desc='computing distances', file=sys.stdout):
  40. # dtemp = k_list[ig] - 2 * (alpha * k_g1_list[ig] + (1 - alpha) *
  41. # k_g2_list[ig]) + (alpha * alpha * k_list[idx1] + alpha *
  42. # (1 - alpha) * k_g2_list[idx1] + (1 - alpha) * alpha *
  43. # k_g1_list[idx2] + (1 - alpha) * (1 - alpha) * k_list[idx2])
  44. # dis_list.append(dtemp)
  45. #
  46. # # sort
  47. # sort_idx = np.argsort(dis_list)
  48. # dis_gs = [dis_list[idis] for idis in sort_idx[0:k]]
  49. # g0hat = Gn[sort_idx[0]] # the nearest neighbor of phi in DN
  50. # if dis_gs[0] == 0: # the exact pre-image.
  51. # print('The exact pre-image is found from the input dataset.')
  52. # return 0, g0hat
  53. # dhat = dis_gs[0] # the nearest distance
  54. # Gk = [Gn[ig] for ig in sort_idx[0:k]] # the k nearest neighbors
  55. # gihat_list = []
  56. #
  57. ## i = 1
  58. # r = 1
  59. # while r < r_max:
  60. # print('r =', r)
  61. ## found = False
  62. # Gs_nearest = Gk + gihat_list
  63. # g_tmp = iam(Gs_nearest)
  64. #
  65. # # compute distance between phi and the new generated graph.
  66. # knew = marginalizedkernel([g_tmp, g1, g2], node_label='atom', edge_label=None,
  67. # p_quit=lmbda, n_iteration=20, remove_totters=False,
  68. # n_jobs=multiprocessing.cpu_count(), verbose=False)
  69. # dnew = knew[0][0, 0] - 2 * (alpha * knew[0][0, 1] + (1 - alpha) *
  70. # knew[0][0, 2]) + (alpha * alpha * k_list[idx1] + alpha *
  71. # (1 - alpha) * k_g2_list[idx1] + (1 - alpha) * alpha *
  72. # k_g1_list[idx2] + (1 - alpha) * (1 - alpha) * k_list[idx2])
  73. # if dnew <= dhat: # the new distance is smaller
  74. # print('I am smaller!')
  75. # dhat = dnew
  76. # g_new = g_tmp.copy() # found better graph.
  77. # gihat_list = [g_new]
  78. # dis_gs.append(dhat)
  79. # r = 0
  80. # else:
  81. # r += 1
  82. #
  83. # ghat = ([g0hat] if len(gihat_list) == 0 else gihat_list)
  84. #
  85. # return dhat, ghat
  86. def gk_iam_nearest(Gn, alpha, idx_gi, Kmatrix, k, r_max):
  87. """This function constructs graph pre-image by the iterative pre-image
  88. framework in reference [1], algorithm 1, where the step of generating new
  89. graphs randomly is replaced by the IAM algorithm in reference [2].
  90. notes
  91. -----
  92. Every time a better graph is acquired, its distance in kernel space is
  93. compared with the k nearest ones, and the k nearest distances from the k+1
  94. distances will be used as the new ones.
  95. """
  96. # compute k nearest neighbors of phi in DN.
  97. dis_list = [] # distance between g_star and each graph.
  98. for ig, g in tqdm(enumerate(Gn), desc='computing distances', file=sys.stdout):
  99. dtemp = dis_gstar(ig, idx_gi, alpha, Kmatrix)
  100. # dtemp = k_list[ig] - 2 * (alpha * k_g1_list[ig] + (1 - alpha) *
  101. # k_g2_list[ig]) + (alpha * alpha * k_list[0] + alpha *
  102. # (1 - alpha) * k_g2_list[0] + (1 - alpha) * alpha *
  103. # k_g1_list[6] + (1 - alpha) * (1 - alpha) * k_list[6])
  104. dis_list.append(dtemp)
  105. # sort
  106. sort_idx = np.argsort(dis_list)
  107. dis_gs = [dis_list[idis] for idis in sort_idx[0:k]] # the k shortest distances
  108. g0hat = Gn[sort_idx[0]] # the nearest neighbor of phi in DN
  109. if dis_gs[0] == 0: # the exact pre-image.
  110. print('The exact pre-image is found from the input dataset.')
  111. return 0, g0hat
  112. dhat = dis_gs[0] # the nearest distance
  113. ghat = g0hat.copy()
  114. Gk = [Gn[ig].copy() for ig in sort_idx[0:k]] # the k nearest neighbors
  115. for gi in Gk:
  116. nx.draw_networkx(gi)
  117. plt.show()
  118. print(gi.nodes(data=True))
  119. print(gi.edges(data=True))
  120. Gs_nearest = Gk.copy()
  121. # gihat_list = []
  122. # i = 1
  123. r = 1
  124. while r < r_max:
  125. print('r =', r)
  126. # found = False
  127. # Gs_nearest = Gk + gihat_list
  128. # g_tmp = iam(Gs_nearest)
  129. g_tmp = test_iam_with_more_graphs_as_init(Gs_nearest, Gs_nearest, c_ei=1, c_er=1, c_es=1)
  130. nx.draw_networkx(g_tmp)
  131. plt.show()
  132. print(g_tmp.nodes(data=True))
  133. print(g_tmp.edges(data=True))
  134. # compute distance between phi and the new generated graph.
  135. gi_list = [Gn[i] for i in idx_gi]
  136. knew = compute_kernel([g_tmp] + gi_list, 'untilhpathkernel', False)
  137. dnew = dis_gstar(0, range(1, len(gi_list) + 1), alpha, knew)
  138. # dnew = knew[0, 0] - 2 * (alpha[0] * knew[0, 1] + alpha[1] *
  139. # knew[0, 2]) + (alpha[0] * alpha[0] * k_list[0] + alpha[0] *
  140. # alpha[1] * k_g2_list[0] + alpha[1] * alpha[0] *
  141. # k_g1_list[1] + alpha[1] * alpha[1] * k_list[1])
  142. if dnew <= dhat and g_tmp != ghat: # the new distance is smaller
  143. print('I am smaller!')
  144. print(str(dhat) + '->' + str(dnew))
  145. # nx.draw_networkx(ghat)
  146. # plt.show()
  147. # print('->')
  148. # nx.draw_networkx(g_tmp)
  149. # plt.show()
  150. dhat = dnew
  151. g_new = g_tmp.copy() # found better graph.
  152. ghat = g_tmp.copy()
  153. dis_gs.append(dhat) # add the new nearest distance.
  154. Gs_nearest.append(g_new) # add the corresponding graph.
  155. sort_idx = np.argsort(dis_gs)
  156. dis_gs = [dis_gs[idx] for idx in sort_idx[0:k]] # the new k nearest distances.
  157. Gs_nearest = [Gs_nearest[idx] for idx in sort_idx[0:k]]
  158. r = 0
  159. else:
  160. r += 1
  161. return dhat, ghat
  162. #def gk_iam_nearest_multi(Gn, alpha, idx_gi, Kmatrix, k, r_max):
  163. # """This function constructs graph pre-image by the iterative pre-image
  164. # framework in reference [1], algorithm 1, where the step of generating new
  165. # graphs randomly is replaced by the IAM algorithm in reference [2].
  166. #
  167. # notes
  168. # -----
  169. # Every time a set of n better graphs is acquired, their distances in kernel space are
  170. # compared with the k nearest ones, and the k nearest distances from the k+n
  171. # distances will be used as the new ones.
  172. # """
  173. # Gn_median = [Gn[idx].copy() for idx in idx_gi]
  174. # # compute k nearest neighbors of phi in DN.
  175. # dis_list = [] # distance between g_star and each graph.
  176. # for ig, g in tqdm(enumerate(Gn), desc='computing distances', file=sys.stdout):
  177. # dtemp = dis_gstar(ig, idx_gi, alpha, Kmatrix)
  178. ## dtemp = k_list[ig] - 2 * (alpha * k_g1_list[ig] + (1 - alpha) *
  179. ## k_g2_list[ig]) + (alpha * alpha * k_list[0] + alpha *
  180. ## (1 - alpha) * k_g2_list[0] + (1 - alpha) * alpha *
  181. ## k_g1_list[6] + (1 - alpha) * (1 - alpha) * k_list[6])
  182. # dis_list.append(dtemp)
  183. #
  184. # # sort
  185. # sort_idx = np.argsort(dis_list)
  186. # dis_gs = [dis_list[idis] for idis in sort_idx[0:k]] # the k shortest distances
  187. # nb_best = len(np.argwhere(dis_gs == dis_gs[0]).flatten().tolist())
  188. # g0hat_list = [Gn[idx] for idx in sort_idx[0:nb_best]] # the nearest neighbors of phi in DN
  189. # if dis_gs[0] == 0: # the exact pre-image.
  190. # print('The exact pre-image is found from the input dataset.')
  191. # return 0, g0hat_list
  192. # dhat = dis_gs[0] # the nearest distance
  193. # ghat_list = [g.copy() for g in g0hat_list]
  194. # for g in ghat_list:
  195. # nx.draw_networkx(g)
  196. # plt.show()
  197. # print(g.nodes(data=True))
  198. # print(g.edges(data=True))
  199. # Gk = [Gn[ig].copy() for ig in sort_idx[0:k]] # the k nearest neighbors
  200. # for gi in Gk:
  201. # nx.draw_networkx(gi)
  202. # plt.show()
  203. # print(gi.nodes(data=True))
  204. # print(gi.edges(data=True))
  205. # Gs_nearest = Gk.copy()
  206. ## gihat_list = []
  207. #
  208. ## i = 1
  209. # r = 1
  210. # while r < r_max:
  211. # print('r =', r)
  212. ## found = False
  213. ## Gs_nearest = Gk + gihat_list
  214. ## g_tmp = iam(Gs_nearest)
  215. # g_tmp_list = test_iam_moreGraphsAsInit_tryAllPossibleBestGraphs_deleteNodesInIterations(
  216. # Gn_median, Gs_nearest, c_ei=1, c_er=1, c_es=1)
  217. # for g in g_tmp_list:
  218. # nx.draw_networkx(g)
  219. # plt.show()
  220. # print(g.nodes(data=True))
  221. # print(g.edges(data=True))
  222. #
  223. # # compute distance between phi and the new generated graphs.
  224. # gi_list = [Gn[i] for i in idx_gi]
  225. # knew = compute_kernel(g_tmp_list + gi_list, 'marginalizedkernel', False)
  226. # dnew_list = []
  227. # for idx, g_tmp in enumerate(g_tmp_list):
  228. # dnew_list.append(dis_gstar(idx, range(len(g_tmp_list),
  229. # len(g_tmp_list) + len(gi_list) + 1), alpha, knew))
  230. #
  231. ## dnew = knew[0, 0] - 2 * (alpha[0] * knew[0, 1] + alpha[1] *
  232. ## knew[0, 2]) + (alpha[0] * alpha[0] * k_list[0] + alpha[0] *
  233. ## alpha[1] * k_g2_list[0] + alpha[1] * alpha[0] *
  234. ## k_g1_list[1] + alpha[1] * alpha[1] * k_list[1])
  235. #
  236. # # find the new k nearest graphs.
  237. # dis_gs = dnew_list + dis_gs # add the new nearest distances.
  238. # Gs_nearest = [g.copy() for g in g_tmp_list] + Gs_nearest # add the corresponding graphs.
  239. # sort_idx = np.argsort(dis_gs)
  240. # if len([i for i in sort_idx[0:k] if i < len(dnew_list)]) > 0:
  241. # print('We got better k nearest neighbors! Hurray!')
  242. # dis_gs = [dis_gs[idx] for idx in sort_idx[0:k]] # the new k nearest distances.
  243. # print(dis_gs[-1])
  244. # Gs_nearest = [Gs_nearest[idx] for idx in sort_idx[0:k]]
  245. # nb_best = len(np.argwhere(dis_gs == dis_gs[0]).flatten().tolist())
  246. # if len([i for i in sort_idx[0:nb_best] if i < len(dnew_list)]) > 0:
  247. # print('I have smaller or equal distance!')
  248. # dhat = dis_gs[0]
  249. # print(str(dhat) + '->' + str(dhat))
  250. # idx_best_list = np.argwhere(dnew_list == dhat).flatten().tolist()
  251. # ghat_list = [g_tmp_list[idx].copy() for idx in idx_best_list]
  252. # for g in ghat_list:
  253. # nx.draw_networkx(g)
  254. # plt.show()
  255. # print(g.nodes(data=True))
  256. # print(g.edges(data=True))
  257. # r = 0
  258. # else:
  259. # r += 1
  260. #
  261. # return dhat, ghat_list
  262. def gk_iam_nearest_multi(Gn_init, Gn_median, alpha, idx_gi, Kmatrix, k, r_max, gkernel):
  263. """This function constructs graph pre-image by the iterative pre-image
  264. framework in reference [1], algorithm 1, where the step of generating new
  265. graphs randomly is replaced by the IAM algorithm in reference [2].
  266. notes
  267. -----
  268. Every time a set of n better graphs is acquired, their distances in kernel space are
  269. compared with the k nearest ones, and the k nearest distances from the k+n
  270. distances will be used as the new ones.
  271. """
  272. # compute k nearest neighbors of phi in DN.
  273. dis_list = [] # distance between g_star and each graph.
  274. term3 = 0
  275. for i1, a1 in enumerate(alpha):
  276. for i2, a2 in enumerate(alpha):
  277. term3 += a1 * a2 * Kmatrix[idx_gi[i1], idx_gi[i2]]
  278. for ig, g in tqdm(enumerate(Gn_init), desc='computing distances', file=sys.stdout):
  279. dtemp = dis_gstar(ig, idx_gi, alpha, Kmatrix, term3=term3)
  280. # dtemp = k_list[ig] - 2 * (alpha * k_g1_list[ig] + (1 - alpha) *
  281. # k_g2_list[ig]) + (alpha * alpha * k_list[0] + alpha *
  282. # (1 - alpha) * k_g2_list[0] + (1 - alpha) * alpha *
  283. # k_g1_list[6] + (1 - alpha) * (1 - alpha) * k_list[6])
  284. dis_list.append(dtemp)
  285. # sort
  286. sort_idx = np.argsort(dis_list)
  287. dis_gs = [dis_list[idis] for idis in sort_idx[0:k]] # the k shortest distances
  288. nb_best = len(np.argwhere(dis_gs == dis_gs[0]).flatten().tolist())
  289. g0hat_list = [Gn_init[idx] for idx in sort_idx[0:nb_best]] # the nearest neighbors of phi in DN
  290. if dis_gs[0] == 0: # the exact pre-image.
  291. print('The exact pre-image is found from the input dataset.')
  292. return 0, g0hat_list
  293. dhat = dis_gs[0] # the nearest distance
  294. ghat_list = [g.copy() for g in g0hat_list]
  295. for g in ghat_list:
  296. draw_Letter_graph(g)
  297. # nx.draw_networkx(g)
  298. # plt.show()
  299. print(g.nodes(data=True))
  300. print(g.edges(data=True))
  301. Gk = [Gn_init[ig].copy() for ig in sort_idx[0:k]] # the k nearest neighbors
  302. for gi in Gk:
  303. # nx.draw_networkx(gi)
  304. # plt.show()
  305. draw_Letter_graph(g)
  306. print(gi.nodes(data=True))
  307. print(gi.edges(data=True))
  308. Gs_nearest = Gk.copy()
  309. # gihat_list = []
  310. # i = 1
  311. r = 1
  312. while r < r_max:
  313. print('r =', r)
  314. # found = False
  315. # Gs_nearest = Gk + gihat_list
  316. # g_tmp = iam(Gs_nearest)
  317. g_tmp_list = test_iam_moreGraphsAsInit_tryAllPossibleBestGraphs_deleteNodesInIterations(
  318. Gn_median, Gs_nearest, c_ei=1, c_er=1, c_es=1)
  319. for g in g_tmp_list:
  320. # nx.draw_networkx(g)
  321. # plt.show()
  322. draw_Letter_graph(g)
  323. print(g.nodes(data=True))
  324. print(g.edges(data=True))
  325. # compute distance between phi and the new generated graphs.
  326. knew = compute_kernel(g_tmp_list + Gn_median, gkernel, False)
  327. dnew_list = []
  328. for idx, g_tmp in enumerate(g_tmp_list):
  329. dnew_list.append(dis_gstar(idx, range(len(g_tmp_list),
  330. len(g_tmp_list) + len(Gn_median) + 1), alpha, knew,
  331. withterm3=False))
  332. # dnew = knew[0, 0] - 2 * (alpha[0] * knew[0, 1] + alpha[1] *
  333. # knew[0, 2]) + (alpha[0] * alpha[0] * k_list[0] + alpha[0] *
  334. # alpha[1] * k_g2_list[0] + alpha[1] * alpha[0] *
  335. # k_g1_list[1] + alpha[1] * alpha[1] * k_list[1])
  336. # find the new k nearest graphs.
  337. dis_gs = dnew_list + dis_gs # add the new nearest distances.
  338. Gs_nearest = [g.copy() for g in g_tmp_list] + Gs_nearest # add the corresponding graphs.
  339. sort_idx = np.argsort(dis_gs)
  340. if len([i for i in sort_idx[0:k] if i < len(dnew_list)]) > 0:
  341. print('We got better k nearest neighbors! Hurray!')
  342. dis_gs = [dis_gs[idx] for idx in sort_idx[0:k]] # the new k nearest distances.
  343. print(dis_gs[-1])
  344. Gs_nearest = [Gs_nearest[idx] for idx in sort_idx[0:k]]
  345. nb_best = len(np.argwhere(dis_gs == dis_gs[0]).flatten().tolist())
  346. if len([i for i in sort_idx[0:nb_best] if i < len(dnew_list)]) > 0:
  347. print('I have smaller or equal distance!')
  348. print(str(dhat) + '->' + str(dis_gs[0]))
  349. dhat = dis_gs[0]
  350. idx_best_list = np.argwhere(dnew_list == dhat).flatten().tolist()
  351. ghat_list = [g_tmp_list[idx].copy() for idx in idx_best_list]
  352. for g in ghat_list:
  353. # nx.draw_networkx(g)
  354. # plt.show()
  355. draw_Letter_graph(g)
  356. print(g.nodes(data=True))
  357. print(g.edges(data=True))
  358. r = 0
  359. else:
  360. r += 1
  361. return dhat, ghat_list
  362. def dis_gstar(idx_g, idx_gi, alpha, Kmatrix, term3=0, withterm3=True):
  363. term1 = Kmatrix[idx_g, idx_g]
  364. term2 = 0
  365. for i, a in enumerate(alpha):
  366. term2 += a * Kmatrix[idx_g, idx_gi[i]]
  367. term2 *= 2
  368. if withterm3 == False:
  369. for i1, a1 in enumerate(alpha):
  370. for i2, a2 in enumerate(alpha):
  371. term3 += a1 * a2 * Kmatrix[idx_gi[i1], idx_gi[i2]]
  372. return np.sqrt(term1 - term2 + term3)
  373. def compute_kernel(Gn, graph_kernel, verbose):
  374. if graph_kernel == 'marginalizedkernel':
  375. Kmatrix, _ = marginalizedkernel(Gn, node_label='atom', edge_label=None,
  376. p_quit=0.03, n_iteration=20, remove_totters=False,
  377. n_jobs=multiprocessing.cpu_count(), verbose=verbose)
  378. elif graph_kernel == 'untilhpathkernel':
  379. Kmatrix, _ = untilhpathkernel(Gn, node_label='atom', edge_label='bond_type',
  380. depth=10, k_func='MinMax', compute_method='trie',
  381. n_jobs=multiprocessing.cpu_count(), verbose=verbose)
  382. elif graph_kernel == 'spkernel':
  383. mixkernel = functools.partial(kernelproduct, deltakernel, gaussiankernel)
  384. Kmatrix, _, _ = spkernel(Gn, node_label='atom', node_kernels=
  385. {'symb': deltakernel, 'nsymb': gaussiankernel, 'mix': mixkernel},
  386. n_jobs=multiprocessing.cpu_count(), verbose=verbose)
  387. elif graph_kernel == 'structuralspkernel':
  388. mixkernel = functools.partial(kernelproduct, deltakernel, gaussiankernel)
  389. Kmatrix, _ = structuralspkernel(Gn, node_label='atom', node_kernels=
  390. {'symb': deltakernel, 'nsymb': gaussiankernel, 'mix': mixkernel},
  391. n_jobs=multiprocessing.cpu_count(), verbose=verbose)
  392. # normalization
  393. Kmatrix_diag = Kmatrix.diagonal().copy()
  394. for i in range(len(Kmatrix)):
  395. for j in range(i, len(Kmatrix)):
  396. Kmatrix[i][j] /= np.sqrt(Kmatrix_diag[i] * Kmatrix_diag[j])
  397. Kmatrix[j][i] = Kmatrix[i][j]
  398. return Kmatrix
  399. def gram2distances(Kmatrix):
  400. dmatrix = np.zeros((len(Kmatrix), len(Kmatrix)))
  401. for i1 in range(len(Kmatrix)):
  402. for i2 in range(len(Kmatrix)):
  403. dmatrix[i1, i2] = Kmatrix[i1, i1] + Kmatrix[i2, i2] - 2 * Kmatrix[i1, i2]
  404. dmatrix = np.sqrt(dmatrix)
  405. return dmatrix

A Python package for graph kernels, graph edit distances and graph pre-image problem.