You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

preimage_iam.py 36 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763
  1. #!/usr/bin/env python3
  2. # -*- coding: utf-8 -*-
  3. """
  4. Created on Tue Apr 30 17:07:43 2019
  5. A graph pre-image method combining iterative pre-image method in reference [1]
  6. and the iterative alternate minimizations (IAM) in reference [2].
  7. @author: ljia
  8. @references:
  9. [1] Gökhan H Bakir, Alexander Zien, and Koji Tsuda. Learning to and graph
  10. pre-images. In Joint Pattern Re ognition Symposium , pages 253-261. Springer, 2004.
  11. [2] Generalized median graph via iterative alternate minimization.
  12. """
  13. import sys
  14. import numpy as np
  15. import multiprocessing
  16. from tqdm import tqdm
  17. import networkx as nx
  18. import matplotlib.pyplot as plt
  19. import random
  20. from iam import iam_upgraded
  21. sys.path.insert(0, "../")
  22. from pygraph.kernels.marginalizedKernel import marginalizedkernel
  23. from pygraph.kernels.untilHPathKernel import untilhpathkernel
  24. from pygraph.kernels.spKernel import spkernel
  25. import functools
  26. from pygraph.utils.kernels import deltakernel, gaussiankernel, kernelproduct
  27. from pygraph.kernels.structuralspKernel import structuralspkernel
  28. def preimage_iam(Gn_init, Gn_median, alpha, idx_gi, Kmatrix, k, r_max,
  29. gkernel, epsilon=0.001, InitIAMWithAllDk=False,
  30. params_iam={'c_ei': 1, 'c_er': 1, 'c_es': 1,
  31. 'ite_max': 50, 'epsilon': 0.001,
  32. 'removeNodes': True, 'connected': False},
  33. params_ged={'ged_cost': 'CHEM_1', 'ged_method': 'IPFP',
  34. 'saveGXL': 'benoit'}):
  35. """This function constructs graph pre-image by the iterative pre-image
  36. framework in reference [1], algorithm 1, where the step of generating new
  37. graphs randomly is replaced by the IAM algorithm in reference [2].
  38. notes
  39. -----
  40. Every time a set of n better graphs is acquired, their distances in kernel space are
  41. compared with the k nearest ones, and the k nearest distances from the k+n
  42. distances will be used as the new ones.
  43. """
  44. # compute k nearest neighbors of phi in DN.
  45. dis_all = [] # distance between g_star and each graph.
  46. term3 = 0
  47. for i1, a1 in enumerate(alpha):
  48. for i2, a2 in enumerate(alpha):
  49. term3 += a1 * a2 * Kmatrix[idx_gi[i1], idx_gi[i2]]
  50. for ig, g in tqdm(enumerate(Gn_init), desc='computing distances', file=sys.stdout):
  51. dtemp = dis_gstar(ig, idx_gi, alpha, Kmatrix, term3=term3)
  52. dis_all.append(dtemp)
  53. # sort
  54. sort_idx = np.argsort(dis_all)
  55. dis_k = [dis_all[idis] for idis in sort_idx[0:k]] # the k shortest distances
  56. nb_best = len(np.argwhere(dis_k == dis_k[0]).flatten().tolist())
  57. ghat_list = [Gn_init[idx].copy() for idx in sort_idx[0:nb_best]] # the nearest neighbors of phi in DN
  58. if dis_k[0] == 0: # the exact pre-image.
  59. print('The exact pre-image is found from the input dataset.')
  60. return 0, ghat_list, 0, 0
  61. dhat = dis_k[0] # the nearest distance
  62. # for g in ghat_list:
  63. # draw_Letter_graph(g)
  64. # nx.draw_networkx(g)
  65. # plt.show()
  66. # print(g.nodes(data=True))
  67. # print(g.edges(data=True))
  68. Gk = [Gn_init[ig].copy() for ig in sort_idx[0:k]] # the k nearest neighbors
  69. for gi in Gk:
  70. nx.draw(gi, labels=nx.get_node_attributes(gi, 'atom'), with_labels=True)
  71. # nx.draw_networkx(gi)
  72. plt.show()
  73. # draw_Letter_graph(g)
  74. print(gi.nodes(data=True))
  75. print(gi.edges(data=True))
  76. # i = 1
  77. r = 0
  78. itr_total = 0
  79. dis_of_each_itr = [dhat]
  80. found = False
  81. nb_updated = 0
  82. nb_updated_k = 0
  83. while r < r_max:# and not found: # @todo: if not found?# and np.abs(old_dis - cur_dis) > epsilon:
  84. print('\n-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-')
  85. print('Current preimage iteration =', r)
  86. print('Total preimage iteration =', itr_total, '\n')
  87. found = False
  88. Gn_nearest_median = [g.copy() for g in Gk]
  89. if InitIAMWithAllDk: # each graph in D_k is used to initialize IAM.
  90. ghat_new_list = []
  91. for g_tmp in Gk:
  92. Gn_nearest_init = [g_tmp.copy()]
  93. ghat_new_list_tmp, _ = iam_upgraded(Gn_nearest_median,
  94. Gn_nearest_init, params_ged=params_ged, **params_iam)
  95. ghat_new_list += ghat_new_list_tmp
  96. else: # only the best graph in D_k is used to initialize IAM.
  97. Gn_nearest_init = [g.copy() for g in Gk]
  98. ghat_new_list, _ = iam_upgraded(Gn_nearest_median, Gn_nearest_init,
  99. params_ged=params_ged, **params_iam)
  100. # for g in g_tmp_list:
  101. # nx.draw_networkx(g)
  102. # plt.show()
  103. # draw_Letter_graph(g)
  104. # print(g.nodes(data=True))
  105. # print(g.edges(data=True))
  106. # compute distance between \psi and the new generated graphs.
  107. knew = compute_kernel(ghat_new_list + Gn_median, gkernel, False)
  108. dhat_new_list = []
  109. for idx, g_tmp in enumerate(ghat_new_list):
  110. # @todo: the term3 below could use the one at the beginning of the function.
  111. dhat_new_list.append(dis_gstar(idx, range(len(ghat_new_list),
  112. len(ghat_new_list) + len(Gn_median) + 1),
  113. alpha, knew, withterm3=False))
  114. for idx_g, ghat_new in enumerate(ghat_new_list):
  115. dhat_new = dhat_new_list[idx_g]
  116. # if the new distance is smaller than the max of D_k.
  117. if dhat_new < dis_k[-1] and np.abs(dhat_new - dis_k[-1]) >= epsilon:
  118. # check if the new distance is the same as one in D_k.
  119. is_duplicate = False
  120. for dis_tmp in dis_k[1:-1]:
  121. if np.abs(dhat_new - dis_tmp) < epsilon:
  122. is_duplicate = True
  123. print('IAM: duplicate k nearest graph generated.')
  124. break
  125. if not is_duplicate:
  126. if np.abs(dhat_new - dhat) < epsilon:
  127. print('IAM: I am equal!')
  128. # dhat = dhat_new
  129. # ghat_list = [ghat_new.copy()]
  130. else:
  131. print('IAM: we got better k nearest neighbors!')
  132. nb_updated_k += 1
  133. print('the k nearest neighbors are updated',
  134. nb_updated_k, 'times.')
  135. dis_k = [dhat_new] + dis_k[0:k-1] # add the new nearest distance.
  136. Gk = [ghat_new.copy()] + Gk[0:k-1] # add the corresponding graph.
  137. sort_idx = np.argsort(dis_k)
  138. dis_k = [dis_k[idx] for idx in sort_idx[0:k]] # the new k nearest distances.
  139. Gk = [Gk[idx] for idx in sort_idx[0:k]]
  140. if dhat_new < dhat:
  141. print('IAM: I have smaller distance!')
  142. print(str(dhat) + '->' + str(dhat_new))
  143. dhat = dhat_new
  144. ghat_list = [Gk[0].copy()]
  145. r = 0
  146. nb_updated += 1
  147. print('the graph is updated', nb_updated, 'times.')
  148. nx.draw(Gk[0], labels=nx.get_node_attributes(Gk[0], 'atom'),
  149. with_labels=True)
  150. ## plt.savefig("results/gk_iam/simple_two/xx" + str(i) + ".png", format="PNG")
  151. plt.show()
  152. found = True
  153. if not found:
  154. r += 1
  155. dis_of_each_itr.append(dhat)
  156. itr_total += 1
  157. print('\nthe k shortest distances are', dis_k)
  158. print('the shortest distances for previous iterations are', dis_of_each_itr)
  159. print('\nthe graph is updated', nb_updated, 'times.')
  160. print('\nthe k nearest neighbors are updated', nb_updated_k, 'times.')
  161. print('distances in kernel space:', dis_of_each_itr, '\n')
  162. return dhat, ghat_list, dis_of_each_itr[-1], nb_updated, nb_updated_k
  163. def preimage_iam_random_mix(Gn_init, Gn_median, alpha, idx_gi, Kmatrix, k, r_max,
  164. l_max, gkernel, epsilon=0.001,
  165. InitIAMWithAllDk=False, InitRandomWithAllDk=True,
  166. params_iam={'c_ei': 1, 'c_er': 1, 'c_es': 1,
  167. 'ite_max': 50, 'epsilon': 0.001,
  168. 'removeNodes': True, 'connected': False},
  169. params_ged={'ged_cost': 'CHEM_1', 'ged_method': 'IPFP',
  170. 'saveGXL': 'benoit'}):
  171. """This function constructs graph pre-image by the iterative pre-image
  172. framework in reference [1], algorithm 1, where new graphs are generated
  173. randomly and by the IAM algorithm in reference [2].
  174. notes
  175. -----
  176. Every time a set of n better graphs is acquired, their distances in kernel space are
  177. compared with the k nearest ones, and the k nearest distances from the k+n
  178. distances will be used as the new ones.
  179. """
  180. Gn_init = [nx.convert_node_labels_to_integers(g) for g in Gn_init]
  181. # compute k nearest neighbors of phi in DN.
  182. dis_all = [] # distance between g_star and each graph.
  183. term3 = 0
  184. for i1, a1 in enumerate(alpha):
  185. for i2, a2 in enumerate(alpha):
  186. term3 += a1 * a2 * Kmatrix[idx_gi[i1], idx_gi[i2]]
  187. for ig, g in tqdm(enumerate(Gn_init), desc='computing distances', file=sys.stdout):
  188. dtemp = dis_gstar(ig, idx_gi, alpha, Kmatrix, term3=term3)
  189. dis_all.append(dtemp)
  190. # sort
  191. sort_idx = np.argsort(dis_all)
  192. dis_k = [dis_all[idis] for idis in sort_idx[0:k]] # the k shortest distances
  193. nb_best = len(np.argwhere(dis_k == dis_k[0]).flatten().tolist())
  194. ghat_list = [Gn_init[idx].copy() for idx in sort_idx[0:nb_best]] # the nearest neighbors of psi in DN
  195. if dis_k[0] == 0: # the exact pre-image.
  196. print('The exact pre-image is found from the input dataset.')
  197. return 0, ghat_list, 0, 0
  198. dhat = dis_k[0] # the nearest distance
  199. # for g in ghat_list:
  200. # draw_Letter_graph(g)
  201. # nx.draw_networkx(g)
  202. # plt.show()
  203. # print(g.nodes(data=True))
  204. # print(g.edges(data=True))
  205. Gk = [Gn_init[ig].copy() for ig in sort_idx[0:k]] # the k nearest neighbors
  206. for gi in Gk:
  207. nx.draw(gi, labels=nx.get_node_attributes(gi, 'atom'), with_labels=True)
  208. # nx.draw_networkx(gi)
  209. plt.show()
  210. # draw_Letter_graph(g)
  211. print(gi.nodes(data=True))
  212. print(gi.edges(data=True))
  213. r = 0
  214. itr_total = 0
  215. dis_of_each_itr = [dhat]
  216. nb_updated_iam = 0
  217. nb_updated_k_iam = 0
  218. nb_updated_random = 0
  219. nb_updated_k_random = 0
  220. # is_iam_duplicate = False
  221. while r < r_max: # and not found: # @todo: if not found?# and np.abs(old_dis - cur_dis) > epsilon:
  222. print('\n-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-')
  223. print('Current preimage iteration =', r)
  224. print('Total preimage iteration =', itr_total, '\n')
  225. found_iam = False
  226. Gn_nearest_median = [g.copy() for g in Gk]
  227. if InitIAMWithAllDk: # each graph in D_k is used to initialize IAM.
  228. ghat_new_list = []
  229. for g_tmp in Gk:
  230. Gn_nearest_init = [g_tmp.copy()]
  231. ghat_new_list_tmp, _ = iam_upgraded(Gn_nearest_median,
  232. Gn_nearest_init, params_ged=params_ged, **params_iam)
  233. ghat_new_list += ghat_new_list_tmp
  234. else: # only the best graph in D_k is used to initialize IAM.
  235. Gn_nearest_init = [g.copy() for g in Gk]
  236. ghat_new_list, _ = iam_upgraded(Gn_nearest_median, Gn_nearest_init,
  237. params_ged=params_ged, **params_iam)
  238. # for g in g_tmp_list:
  239. # nx.draw_networkx(g)
  240. # plt.show()
  241. # draw_Letter_graph(g)
  242. # print(g.nodes(data=True))
  243. # print(g.edges(data=True))
  244. # compute distance between \psi and the new generated graphs.
  245. knew = compute_kernel(ghat_new_list + Gn_median, gkernel, False)
  246. dhat_new_list = []
  247. for idx, g_tmp in enumerate(ghat_new_list):
  248. # @todo: the term3 below could use the one at the beginning of the function.
  249. dhat_new_list.append(dis_gstar(idx, range(len(ghat_new_list),
  250. len(ghat_new_list) + len(Gn_median) + 1),
  251. alpha, knew, withterm3=False))
  252. # find the new k nearest graphs.
  253. for idx_g, ghat_new in enumerate(ghat_new_list):
  254. dhat_new = dhat_new_list[idx_g]
  255. # if the new distance is smaller than the max of D_k.
  256. if dhat_new < dis_k[-1] and np.abs(dhat_new - dis_k[-1]) >= epsilon:
  257. # check if the new distance is the same as one in D_k.
  258. is_duplicate = False
  259. for dis_tmp in dis_k[1:-1]:
  260. if np.abs(dhat_new - dis_tmp) < epsilon:
  261. is_duplicate = True
  262. print('IAM: duplicate k nearest graph generated.')
  263. break
  264. if not is_duplicate:
  265. if np.abs(dhat_new - dhat) < epsilon:
  266. print('IAM: I am equal!')
  267. # dhat = dhat_new
  268. # ghat_list = [ghat_new.copy()]
  269. else:
  270. print('IAM: we got better k nearest neighbors!')
  271. nb_updated_k_iam += 1
  272. print('the k nearest neighbors are updated',
  273. nb_updated_k_iam, 'times.')
  274. dis_k = [dhat_new] + dis_k[0:k-1] # add the new nearest distance.
  275. Gk = [ghat_new.copy()] + Gk[0:k-1] # add the corresponding graph.
  276. sort_idx = np.argsort(dis_k)
  277. dis_k = [dis_k[idx] for idx in sort_idx[0:k]] # the new k nearest distances.
  278. Gk = [Gk[idx] for idx in sort_idx[0:k]]
  279. if dhat_new < dhat:
  280. print('IAM: I have smaller distance!')
  281. print(str(dhat) + '->' + str(dhat_new))
  282. dhat = dhat_new
  283. ghat_list = [Gk[0].copy()]
  284. r = 0
  285. nb_updated_iam += 1
  286. print('the graph is updated by IAM', nb_updated_iam,
  287. 'times.')
  288. nx.draw(Gk[0], labels=nx.get_node_attributes(Gk[0], 'atom'),
  289. with_labels=True)
  290. ## plt.savefig("results/gk_iam/simple_two/xx" + str(i) + ".png", format="PNG")
  291. plt.show()
  292. found_iam = True
  293. # when new distance is not smaller than the max of D_k, use random generation.
  294. if not found_iam:
  295. print('Distance not better, switching to random generation now.')
  296. print(str(dhat) + '->' + str(dhat_new))
  297. if InitRandomWithAllDk: # use all k nearest graphs as the initials.
  298. init_list = [g_init.copy() for g_init in Gk]
  299. else: # use just the nearest graph as the initial.
  300. init_list = [Gk[0].copy()]
  301. # number of edges to be changed.
  302. if len(init_list) == 1:
  303. # @todo what if the log is negetive? how to choose alpha (scalar)? seems fdgs is always 1.
  304. # fdgs = dhat_new
  305. fdgs = nb_updated_random + 1
  306. if fdgs < 1:
  307. fdgs = 1
  308. fdgs = int(np.ceil(np.log(fdgs)))
  309. if fdgs < 1:
  310. fdgs += 1
  311. # fdgs = nb_updated_random + 1 # @todo:
  312. fdgs_list = [fdgs]
  313. else:
  314. # @todo what if the log is negetive? how to choose alpha (scalar)?
  315. fdgs_list = np.array(dis_k[:])
  316. if np.min(fdgs_list) < 1:
  317. fdgs_list /= dis_k[0]
  318. fdgs_list = [int(item) for item in np.ceil(np.log(fdgs_list))]
  319. if np.min(fdgs_list) < 1:
  320. fdgs_list = np.array(fdgs_list) + 1
  321. l = 0
  322. found_random = False
  323. while l < l_max and not found_random:
  324. for idx_g, g_tmp in enumerate(init_list):
  325. # add and delete edges.
  326. ghat_new = nx.convert_node_labels_to_integers(g_tmp.copy())
  327. # @todo: should we use just half of the adjacency matrix for undirected graphs?
  328. nb_vpairs = nx.number_of_nodes(ghat_new) * (nx.number_of_nodes(ghat_new) - 1)
  329. np.random.seed()
  330. # which edges to change.
  331. # @todo: what if fdgs is bigger than nb_vpairs?
  332. idx_change = random.sample(range(nb_vpairs), fdgs_list[idx_g] if
  333. fdgs_list[idx_g] < nb_vpairs else nb_vpairs)
  334. # idx_change = np.random.randint(0, nx.number_of_nodes(gs) *
  335. # (nx.number_of_nodes(gs) - 1), fdgs)
  336. for item in idx_change:
  337. node1 = int(item / (nx.number_of_nodes(ghat_new) - 1))
  338. node2 = (item - node1 * (nx.number_of_nodes(ghat_new) - 1))
  339. if node2 >= node1: # skip the self pair.
  340. node2 += 1
  341. # @todo: is the randomness correct?
  342. if not ghat_new.has_edge(node1, node2):
  343. ghat_new.add_edge(node1, node2)
  344. # nx.draw_networkx(gs)
  345. # plt.show()
  346. # nx.draw_networkx(ghat_new)
  347. # plt.show()
  348. else:
  349. ghat_new.remove_edge(node1, node2)
  350. # nx.draw_networkx(gs)
  351. # plt.show()
  352. # nx.draw_networkx(ghat_new)
  353. # plt.show()
  354. # nx.draw_networkx(ghat_new)
  355. # plt.show()
  356. # compute distance between \psi and the new generated graph.
  357. knew = compute_kernel([ghat_new] + Gn_median, gkernel, verbose=False)
  358. dhat_new = dis_gstar(0, [1, 2], alpha, knew, withterm3=False)
  359. # @todo: the new distance is smaller or also equal?
  360. if dhat_new < dis_k[-1] and np.abs(dhat_new - dis_k[-1]) >= epsilon:
  361. # check if the new distance is the same as one in D_k.
  362. is_duplicate = False
  363. for dis_tmp in dis_k[1:-1]:
  364. if np.abs(dhat_new - dis_tmp) < epsilon:
  365. is_duplicate = True
  366. print('Random: duplicate k nearest graph generated.')
  367. break
  368. if not is_duplicate:
  369. if np.abs(dhat_new - dhat) < epsilon:
  370. print('Random: I am equal!')
  371. # dhat = dhat_new
  372. # ghat_list = [ghat_new.copy()]
  373. else:
  374. print('Random: we got better k nearest neighbors!')
  375. print('l =', str(l))
  376. nb_updated_k_random += 1
  377. print('the k nearest neighbors are updated by random generation',
  378. nb_updated_k_random, 'times.')
  379. dis_k = [dhat_new] + dis_k # add the new nearest distances.
  380. Gk = [ghat_new.copy()] + Gk # add the corresponding graphs.
  381. sort_idx = np.argsort(dis_k)
  382. dis_k = [dis_k[idx] for idx in sort_idx[0:k]] # the new k nearest distances.
  383. Gk = [Gk[idx] for idx in sort_idx[0:k]]
  384. if dhat_new < dhat:
  385. print('\nRandom: I am smaller!')
  386. print('l =', str(l))
  387. print(dhat, '->', dhat_new)
  388. dhat = dhat_new
  389. ghat_list = [ghat_new.copy()]
  390. r = 0
  391. nb_updated_random += 1
  392. print('the graph is updated by random generation',
  393. nb_updated_random, 'times.')
  394. nx.draw(ghat_new, labels=nx.get_node_attributes(ghat_new, 'atom'),
  395. with_labels=True)
  396. ## plt.savefig("results/gk_iam/simple_two/xx" + str(i) + ".png", format="PNG")
  397. plt.show()
  398. found_random = True
  399. break
  400. l += 1
  401. if not found_random: # l == l_max:
  402. r += 1
  403. dis_of_each_itr.append(dhat)
  404. itr_total += 1
  405. print('\nthe k shortest distances are', dis_k)
  406. print('the shortest distances for previous iterations are', dis_of_each_itr)
  407. print('\nthe graph is updated by IAM', nb_updated_iam, 'times, and by random generation',
  408. nb_updated_random, 'times.')
  409. print('\nthe k nearest neighbors are updated by IAM', nb_updated_k_iam,
  410. 'times, and by random generation', nb_updated_k_random, 'times.')
  411. print('distances in kernel space:', dis_of_each_itr, '\n')
  412. return dhat, ghat_list, dis_of_each_itr[-1], \
  413. nb_updated_iam, nb_updated_random, nb_updated_k_iam, nb_updated_k_random
  414. ###############################################################################
  415. # useful functions.
  416. def dis_gstar(idx_g, idx_gi, alpha, Kmatrix, term3=0, withterm3=True):
  417. term1 = Kmatrix[idx_g, idx_g]
  418. term2 = 0
  419. for i, a in enumerate(alpha):
  420. term2 += a * Kmatrix[idx_g, idx_gi[i]]
  421. term2 *= 2
  422. if withterm3 == False:
  423. for i1, a1 in enumerate(alpha):
  424. for i2, a2 in enumerate(alpha):
  425. term3 += a1 * a2 * Kmatrix[idx_gi[i1], idx_gi[i2]]
  426. return np.sqrt(term1 - term2 + term3)
  427. def compute_kernel(Gn, graph_kernel, verbose):
  428. if graph_kernel == 'marginalizedkernel':
  429. Kmatrix, _ = marginalizedkernel(Gn, node_label='atom', edge_label=None,
  430. p_quit=0.03, n_iteration=10, remove_totters=False,
  431. n_jobs=multiprocessing.cpu_count(), verbose=verbose)
  432. elif graph_kernel == 'untilhpathkernel':
  433. Kmatrix, _ = untilhpathkernel(Gn, node_label='atom', edge_label=None,
  434. depth=10, k_func='MinMax', compute_method='trie',
  435. n_jobs=multiprocessing.cpu_count(), verbose=verbose)
  436. elif graph_kernel == 'spkernel':
  437. mixkernel = functools.partial(kernelproduct, deltakernel, gaussiankernel)
  438. Kmatrix, _, _ = spkernel(Gn, node_label='atom', node_kernels=
  439. {'symb': deltakernel, 'nsymb': gaussiankernel, 'mix': mixkernel},
  440. n_jobs=multiprocessing.cpu_count(), verbose=verbose)
  441. elif graph_kernel == 'structuralspkernel':
  442. mixkernel = functools.partial(kernelproduct, deltakernel, gaussiankernel)
  443. Kmatrix, _ = structuralspkernel(Gn, node_label='atom', node_kernels=
  444. {'symb': deltakernel, 'nsymb': gaussiankernel, 'mix': mixkernel},
  445. n_jobs=multiprocessing.cpu_count(), verbose=verbose)
  446. # normalization
  447. Kmatrix_diag = Kmatrix.diagonal().copy()
  448. for i in range(len(Kmatrix)):
  449. for j in range(i, len(Kmatrix)):
  450. Kmatrix[i][j] /= np.sqrt(Kmatrix_diag[i] * Kmatrix_diag[j])
  451. Kmatrix[j][i] = Kmatrix[i][j]
  452. return Kmatrix
  453. def gram2distances(Kmatrix):
  454. dmatrix = np.zeros((len(Kmatrix), len(Kmatrix)))
  455. for i1 in range(len(Kmatrix)):
  456. for i2 in range(len(Kmatrix)):
  457. dmatrix[i1, i2] = Kmatrix[i1, i1] + Kmatrix[i2, i2] - 2 * Kmatrix[i1, i2]
  458. dmatrix = np.sqrt(dmatrix)
  459. return dmatrix
  460. ###############################################################################
  461. # Old implementations.
  462. #def gk_iam(Gn, alpha):
  463. # """This function constructs graph pre-image by the iterative pre-image
  464. # framework in reference [1], algorithm 1, where the step of generating new
  465. # graphs randomly is replaced by the IAM algorithm in reference [2].
  466. #
  467. # notes
  468. # -----
  469. # Every time a better graph is acquired, the older one is replaced by it.
  470. # """
  471. # pass
  472. # # compute k nearest neighbors of phi in DN.
  473. # dis_list = [] # distance between g_star and each graph.
  474. # for ig, g in tqdm(enumerate(Gn), desc='computing distances', file=sys.stdout):
  475. # dtemp = k_list[ig] - 2 * (alpha * k_g1_list[ig] + (1 - alpha) *
  476. # k_g2_list[ig]) + (alpha * alpha * k_list[idx1] + alpha *
  477. # (1 - alpha) * k_g2_list[idx1] + (1 - alpha) * alpha *
  478. # k_g1_list[idx2] + (1 - alpha) * (1 - alpha) * k_list[idx2])
  479. # dis_list.append(dtemp)
  480. #
  481. # # sort
  482. # sort_idx = np.argsort(dis_list)
  483. # dis_gs = [dis_list[idis] for idis in sort_idx[0:k]]
  484. # g0hat = Gn[sort_idx[0]] # the nearest neighbor of phi in DN
  485. # if dis_gs[0] == 0: # the exact pre-image.
  486. # print('The exact pre-image is found from the input dataset.')
  487. # return 0, g0hat
  488. # dhat = dis_gs[0] # the nearest distance
  489. # Gk = [Gn[ig] for ig in sort_idx[0:k]] # the k nearest neighbors
  490. # gihat_list = []
  491. #
  492. ## i = 1
  493. # r = 1
  494. # while r < r_max:
  495. # print('r =', r)
  496. ## found = False
  497. # Gs_nearest = Gk + gihat_list
  498. # g_tmp = iam(Gs_nearest)
  499. #
  500. # # compute distance between \psi and the new generated graph.
  501. # knew = marginalizedkernel([g_tmp, g1, g2], node_label='atom', edge_label=None,
  502. # p_quit=lmbda, n_iteration=20, remove_totters=False,
  503. # n_jobs=multiprocessing.cpu_count(), verbose=False)
  504. # dnew = knew[0][0, 0] - 2 * (alpha * knew[0][0, 1] + (1 - alpha) *
  505. # knew[0][0, 2]) + (alpha * alpha * k_list[idx1] + alpha *
  506. # (1 - alpha) * k_g2_list[idx1] + (1 - alpha) * alpha *
  507. # k_g1_list[idx2] + (1 - alpha) * (1 - alpha) * k_list[idx2])
  508. # if dnew <= dhat: # the new distance is smaller
  509. # print('I am smaller!')
  510. # dhat = dnew
  511. # g_new = g_tmp.copy() # found better graph.
  512. # gihat_list = [g_new]
  513. # dis_gs.append(dhat)
  514. # r = 0
  515. # else:
  516. # r += 1
  517. #
  518. # ghat = ([g0hat] if len(gihat_list) == 0 else gihat_list)
  519. #
  520. # return dhat, ghat
  521. #def gk_iam_nearest(Gn, alpha, idx_gi, Kmatrix, k, r_max):
  522. # """This function constructs graph pre-image by the iterative pre-image
  523. # framework in reference [1], algorithm 1, where the step of generating new
  524. # graphs randomly is replaced by the IAM algorithm in reference [2].
  525. #
  526. # notes
  527. # -----
  528. # Every time a better graph is acquired, its distance in kernel space is
  529. # compared with the k nearest ones, and the k nearest distances from the k+1
  530. # distances will be used as the new ones.
  531. # """
  532. # # compute k nearest neighbors of phi in DN.
  533. # dis_list = [] # distance between g_star and each graph.
  534. # for ig, g in tqdm(enumerate(Gn), desc='computing distances', file=sys.stdout):
  535. # dtemp = dis_gstar(ig, idx_gi, alpha, Kmatrix)
  536. ## dtemp = k_list[ig] - 2 * (alpha * k_g1_list[ig] + (1 - alpha) *
  537. ## k_g2_list[ig]) + (alpha * alpha * k_list[0] + alpha *
  538. ## (1 - alpha) * k_g2_list[0] + (1 - alpha) * alpha *
  539. ## k_g1_list[6] + (1 - alpha) * (1 - alpha) * k_list[6])
  540. # dis_list.append(dtemp)
  541. #
  542. # # sort
  543. # sort_idx = np.argsort(dis_list)
  544. # dis_gs = [dis_list[idis] for idis in sort_idx[0:k]] # the k shortest distances
  545. # g0hat = Gn[sort_idx[0]] # the nearest neighbor of phi in DN
  546. # if dis_gs[0] == 0: # the exact pre-image.
  547. # print('The exact pre-image is found from the input dataset.')
  548. # return 0, g0hat
  549. # dhat = dis_gs[0] # the nearest distance
  550. # ghat = g0hat.copy()
  551. # Gk = [Gn[ig].copy() for ig in sort_idx[0:k]] # the k nearest neighbors
  552. # for gi in Gk:
  553. # nx.draw_networkx(gi)
  554. # plt.show()
  555. # print(gi.nodes(data=True))
  556. # print(gi.edges(data=True))
  557. # Gs_nearest = Gk.copy()
  558. ## gihat_list = []
  559. #
  560. ## i = 1
  561. # r = 1
  562. # while r < r_max:
  563. # print('r =', r)
  564. ## found = False
  565. ## Gs_nearest = Gk + gihat_list
  566. ## g_tmp = iam(Gs_nearest)
  567. # g_tmp = test_iam_with_more_graphs_as_init(Gs_nearest, Gs_nearest, c_ei=1, c_er=1, c_es=1)
  568. # nx.draw_networkx(g_tmp)
  569. # plt.show()
  570. # print(g_tmp.nodes(data=True))
  571. # print(g_tmp.edges(data=True))
  572. #
  573. # # compute distance between \psi and the new generated graph.
  574. # gi_list = [Gn[i] for i in idx_gi]
  575. # knew = compute_kernel([g_tmp] + gi_list, 'untilhpathkernel', False)
  576. # dnew = dis_gstar(0, range(1, len(gi_list) + 1), alpha, knew)
  577. #
  578. ## dnew = knew[0, 0] - 2 * (alpha[0] * knew[0, 1] + alpha[1] *
  579. ## knew[0, 2]) + (alpha[0] * alpha[0] * k_list[0] + alpha[0] *
  580. ## alpha[1] * k_g2_list[0] + alpha[1] * alpha[0] *
  581. ## k_g1_list[1] + alpha[1] * alpha[1] * k_list[1])
  582. # if dnew <= dhat and g_tmp != ghat: # the new distance is smaller
  583. # print('I am smaller!')
  584. # print(str(dhat) + '->' + str(dnew))
  585. ## nx.draw_networkx(ghat)
  586. ## plt.show()
  587. ## print('->')
  588. ## nx.draw_networkx(g_tmp)
  589. ## plt.show()
  590. #
  591. # dhat = dnew
  592. # g_new = g_tmp.copy() # found better graph.
  593. # ghat = g_tmp.copy()
  594. # dis_gs.append(dhat) # add the new nearest distance.
  595. # Gs_nearest.append(g_new) # add the corresponding graph.
  596. # sort_idx = np.argsort(dis_gs)
  597. # dis_gs = [dis_gs[idx] for idx in sort_idx[0:k]] # the new k nearest distances.
  598. # Gs_nearest = [Gs_nearest[idx] for idx in sort_idx[0:k]]
  599. # r = 0
  600. # else:
  601. # r += 1
  602. #
  603. # return dhat, ghat
  604. #def gk_iam_nearest_multi(Gn, alpha, idx_gi, Kmatrix, k, r_max):
  605. # """This function constructs graph pre-image by the iterative pre-image
  606. # framework in reference [1], algorithm 1, where the step of generating new
  607. # graphs randomly is replaced by the IAM algorithm in reference [2].
  608. #
  609. # notes
  610. # -----
  611. # Every time a set of n better graphs is acquired, their distances in kernel space are
  612. # compared with the k nearest ones, and the k nearest distances from the k+n
  613. # distances will be used as the new ones.
  614. # """
  615. # Gn_median = [Gn[idx].copy() for idx in idx_gi]
  616. # # compute k nearest neighbors of phi in DN.
  617. # dis_list = [] # distance between g_star and each graph.
  618. # for ig, g in tqdm(enumerate(Gn), desc='computing distances', file=sys.stdout):
  619. # dtemp = dis_gstar(ig, idx_gi, alpha, Kmatrix)
  620. ## dtemp = k_list[ig] - 2 * (alpha * k_g1_list[ig] + (1 - alpha) *
  621. ## k_g2_list[ig]) + (alpha * alpha * k_list[0] + alpha *
  622. ## (1 - alpha) * k_g2_list[0] + (1 - alpha) * alpha *
  623. ## k_g1_list[6] + (1 - alpha) * (1 - alpha) * k_list[6])
  624. # dis_list.append(dtemp)
  625. #
  626. # # sort
  627. # sort_idx = np.argsort(dis_list)
  628. # dis_gs = [dis_list[idis] for idis in sort_idx[0:k]] # the k shortest distances
  629. # nb_best = len(np.argwhere(dis_gs == dis_gs[0]).flatten().tolist())
  630. # g0hat_list = [Gn[idx] for idx in sort_idx[0:nb_best]] # the nearest neighbors of phi in DN
  631. # if dis_gs[0] == 0: # the exact pre-image.
  632. # print('The exact pre-image is found from the input dataset.')
  633. # return 0, g0hat_list
  634. # dhat = dis_gs[0] # the nearest distance
  635. # ghat_list = [g.copy() for g in g0hat_list]
  636. # for g in ghat_list:
  637. # nx.draw_networkx(g)
  638. # plt.show()
  639. # print(g.nodes(data=True))
  640. # print(g.edges(data=True))
  641. # Gk = [Gn[ig].copy() for ig in sort_idx[0:k]] # the k nearest neighbors
  642. # for gi in Gk:
  643. # nx.draw_networkx(gi)
  644. # plt.show()
  645. # print(gi.nodes(data=True))
  646. # print(gi.edges(data=True))
  647. # Gs_nearest = Gk.copy()
  648. ## gihat_list = []
  649. #
  650. ## i = 1
  651. # r = 1
  652. # while r < r_max:
  653. # print('r =', r)
  654. ## found = False
  655. ## Gs_nearest = Gk + gihat_list
  656. ## g_tmp = iam(Gs_nearest)
  657. # g_tmp_list = test_iam_moreGraphsAsInit_tryAllPossibleBestGraphs_deleteNodesInIterations(
  658. # Gn_median, Gs_nearest, c_ei=1, c_er=1, c_es=1)
  659. # for g in g_tmp_list:
  660. # nx.draw_networkx(g)
  661. # plt.show()
  662. # print(g.nodes(data=True))
  663. # print(g.edges(data=True))
  664. #
  665. # # compute distance between \psi and the new generated graphs.
  666. # gi_list = [Gn[i] for i in idx_gi]
  667. # knew = compute_kernel(g_tmp_list + gi_list, 'marginalizedkernel', False)
  668. # dnew_list = []
  669. # for idx, g_tmp in enumerate(g_tmp_list):
  670. # dnew_list.append(dis_gstar(idx, range(len(g_tmp_list),
  671. # len(g_tmp_list) + len(gi_list) + 1), alpha, knew))
  672. #
  673. ## dnew = knew[0, 0] - 2 * (alpha[0] * knew[0, 1] + alpha[1] *
  674. ## knew[0, 2]) + (alpha[0] * alpha[0] * k_list[0] + alpha[0] *
  675. ## alpha[1] * k_g2_list[0] + alpha[1] * alpha[0] *
  676. ## k_g1_list[1] + alpha[1] * alpha[1] * k_list[1])
  677. #
  678. # # find the new k nearest graphs.
  679. # dis_gs = dnew_list + dis_gs # add the new nearest distances.
  680. # Gs_nearest = [g.copy() for g in g_tmp_list] + Gs_nearest # add the corresponding graphs.
  681. # sort_idx = np.argsort(dis_gs)
  682. # if len([i for i in sort_idx[0:k] if i < len(dnew_list)]) > 0:
  683. # print('We got better k nearest neighbors! Hurray!')
  684. # dis_gs = [dis_gs[idx] for idx in sort_idx[0:k]] # the new k nearest distances.
  685. # print(dis_gs[-1])
  686. # Gs_nearest = [Gs_nearest[idx] for idx in sort_idx[0:k]]
  687. # nb_best = len(np.argwhere(dis_gs == dis_gs[0]).flatten().tolist())
  688. # if len([i for i in sort_idx[0:nb_best] if i < len(dnew_list)]) > 0:
  689. # print('I have smaller or equal distance!')
  690. # dhat = dis_gs[0]
  691. # print(str(dhat) + '->' + str(dhat))
  692. # idx_best_list = np.argwhere(dnew_list == dhat).flatten().tolist()
  693. # ghat_list = [g_tmp_list[idx].copy() for idx in idx_best_list]
  694. # for g in ghat_list:
  695. # nx.draw_networkx(g)
  696. # plt.show()
  697. # print(g.nodes(data=True))
  698. # print(g.edges(data=True))
  699. # r = 0
  700. # else:
  701. # r += 1
  702. #
  703. # return dhat, ghat_list

A Python package for graph kernels, graph edit distances and graph pre-image problem.