You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

test_preimage_random.py 16 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398
  1. #!/usr/bin/env python3
  2. # -*- coding: utf-8 -*-
  3. """
  4. Created on Thu Sep 5 15:59:00 2019
  5. @author: ljia
  6. """
  7. import numpy as np
  8. import networkx as nx
  9. import matplotlib.pyplot as plt
  10. import time
  11. import random
  12. #from tqdm import tqdm
  13. from gklearn.utils.graphfiles import loadDataset
  14. from gklearn.preimage.preimage_random import preimage_random
  15. from gklearn.preimage.ged import ged_median
  16. from gklearn.preimage.utils import compute_kernel, get_same_item_indices, remove_edges
  17. ###############################################################################
  18. # tests on different values on grid of median-sets and k.
  19. def test_preimage_random_grid_k_median_nb():
  20. ds = {'name': 'MUTAG', 'dataset': '../datasets/MUTAG/MUTAG_A.txt',
  21. 'extra_params': {}} # node/edge symb
  22. Gn, y_all = loadDataset(ds['dataset'], extra_params=ds['extra_params'])
  23. # Gn = Gn[0:50]
  24. remove_edges(Gn)
  25. gkernel = 'marginalizedkernel'
  26. lmbda = 0.03 # termination probalility
  27. r_max = 5 # iteration limit for pre-image.
  28. l = 500 # update limit for random generation
  29. # alpha_range = np.linspace(0.5, 0.5, 1)
  30. # k = 5 # k nearest neighbors
  31. # parameters for GED function
  32. ged_cost='CHEM_1'
  33. ged_method='IPFP'
  34. saveGXL='gedlib'
  35. # number of graphs; we what to compute the median of these graphs.
  36. nb_median_range = [2, 3, 4, 5, 10, 20, 30, 40, 50, 100]
  37. # number of nearest neighbors.
  38. k_range = [5, 6, 7, 8, 9, 10, 20, 30, 40, 50, 100]
  39. # find out all the graphs classified to positive group 1.
  40. idx_dict = get_same_item_indices(y_all)
  41. Gn = [Gn[i] for i in idx_dict[1]]
  42. # # compute Gram matrix.
  43. # time0 = time.time()
  44. # km = compute_kernel(Gn, gkernel, True)
  45. # time_km = time.time() - time0
  46. # # write Gram matrix to file.
  47. # np.savez('results/gram_matrix_marg_itr10_pq0.03_mutag_positive.gm', gm=km, gmtime=time_km)
  48. time_list = []
  49. dis_ks_min_list = []
  50. sod_gs_list = []
  51. sod_gs_min_list = []
  52. nb_updated_list = []
  53. g_best = []
  54. for idx_nb, nb_median in enumerate(nb_median_range):
  55. print('\n-------------------------------------------------------')
  56. print('number of median graphs =', nb_median)
  57. random.seed(1)
  58. idx_rdm = random.sample(range(len(Gn)), nb_median)
  59. print('graphs chosen:', idx_rdm)
  60. Gn_median = [Gn[idx].copy() for idx in idx_rdm]
  61. # for g in Gn_median:
  62. # nx.draw(g, labels=nx.get_node_attributes(g, 'atom'), with_labels=True)
  63. ## plt.savefig("results/preimage_mix/mutag.png", format="PNG")
  64. # plt.show()
  65. # plt.clf()
  66. ###################################################################
  67. gmfile = np.load('results/gram_matrix_marg_itr10_pq0.03_mutag_positive.gm.npz')
  68. km_tmp = gmfile['gm']
  69. time_km = gmfile['gmtime']
  70. # modify mixed gram matrix.
  71. km = np.zeros((len(Gn) + nb_median, len(Gn) + nb_median))
  72. for i in range(len(Gn)):
  73. for j in range(i, len(Gn)):
  74. km[i, j] = km_tmp[i, j]
  75. km[j, i] = km[i, j]
  76. for i in range(len(Gn)):
  77. for j, idx in enumerate(idx_rdm):
  78. km[i, len(Gn) + j] = km[i, idx]
  79. km[len(Gn) + j, i] = km[i, idx]
  80. for i, idx1 in enumerate(idx_rdm):
  81. for j, idx2 in enumerate(idx_rdm):
  82. km[len(Gn) + i, len(Gn) + j] = km[idx1, idx2]
  83. ###################################################################
  84. alpha_range = [1 / nb_median] * nb_median
  85. time_list.append([])
  86. dis_ks_min_list.append([])
  87. sod_gs_list.append([])
  88. sod_gs_min_list.append([])
  89. nb_updated_list.append([])
  90. g_best.append([])
  91. for k in k_range:
  92. print('\n++++++++++++++++++++++++++++++++++++++++++++++++++++++++++\n')
  93. print('k =', k)
  94. time0 = time.time()
  95. dhat, ghat, nb_updated = preimage_random(Gn, Gn_median, alpha_range,
  96. range(len(Gn), len(Gn) + nb_median), km, k, r_max, l, gkernel)
  97. time_total = time.time() - time0 + time_km
  98. print('time: ', time_total)
  99. time_list[idx_nb].append(time_total)
  100. print('\nsmallest distance in kernel space: ', dhat)
  101. dis_ks_min_list[idx_nb].append(dhat)
  102. g_best[idx_nb].append(ghat)
  103. print('\nnumber of updates of the best graph: ', nb_updated)
  104. nb_updated_list[idx_nb].append(nb_updated)
  105. # show the best graph and save it to file.
  106. print('the shortest distance is', dhat)
  107. print('one of the possible corresponding pre-images is')
  108. nx.draw(ghat, labels=nx.get_node_attributes(ghat, 'atom'),
  109. with_labels=True)
  110. plt.savefig('results/preimage_random/mutag_median_nb' + str(nb_median) +
  111. '_k' + str(k) + '.png', format="PNG")
  112. # plt.show()
  113. plt.clf()
  114. # print(ghat_list[0].nodes(data=True))
  115. # print(ghat_list[0].edges(data=True))
  116. # compute the corresponding sod in graph space.
  117. sod_tmp, _ = ged_median([ghat], Gn_median, ged_cost=ged_cost,
  118. ged_method=ged_method, saveGXL=saveGXL)
  119. sod_gs_list[idx_nb].append(sod_tmp)
  120. sod_gs_min_list[idx_nb].append(np.min(sod_tmp))
  121. print('\nsmallest sod in graph space: ', np.min(sod_tmp))
  122. print('\nsods in graph space: ', sod_gs_list)
  123. print('\nsmallest sod in graph space for each set of median graphs and k: ',
  124. sod_gs_min_list)
  125. print('\nsmallest distance in kernel space for each set of median graphs and k: ',
  126. dis_ks_min_list)
  127. print('\nnumber of updates of the best graph for each set of median graphs and k by IAM: ',
  128. nb_updated_list)
  129. print('\ntimes:', time_list)
  130. ###############################################################################
  131. # tests on different numbers of median-sets.
  132. def test_preimage_random_median_nb():
  133. ds = {'name': 'MUTAG', 'dataset': '../datasets/MUTAG/MUTAG_A.txt',
  134. 'extra_params': {}} # node/edge symb
  135. Gn, y_all = loadDataset(ds['dataset'], extra_params=ds['extra_params'])
  136. # Gn = Gn[0:50]
  137. remove_edges(Gn)
  138. gkernel = 'marginalizedkernel'
  139. lmbda = 0.03 # termination probalility
  140. r_max = 5 # iteration limit for pre-image.
  141. l = 500 # update limit for random generation
  142. # alpha_range = np.linspace(0.5, 0.5, 1)
  143. k = 5 # k nearest neighbors
  144. # parameters for GED function
  145. ged_cost='CHEM_1'
  146. ged_method='IPFP'
  147. saveGXL='gedlib'
  148. # number of graphs; we what to compute the median of these graphs.
  149. nb_median_range = [2, 3, 4, 5, 10, 20, 30, 40, 50, 100]
  150. # find out all the graphs classified to positive group 1.
  151. idx_dict = get_same_item_indices(y_all)
  152. Gn = [Gn[i] for i in idx_dict[1]]
  153. # # compute Gram matrix.
  154. # time0 = time.time()
  155. # km = compute_kernel(Gn, gkernel, True)
  156. # time_km = time.time() - time0
  157. # # write Gram matrix to file.
  158. # np.savez('results/gram_matrix_marg_itr10_pq0.03_mutag_positive.gm', gm=km, gmtime=time_km)
  159. time_list = []
  160. dis_ks_min_list = []
  161. sod_gs_list = []
  162. sod_gs_min_list = []
  163. nb_updated_list = []
  164. g_best = []
  165. for nb_median in nb_median_range:
  166. print('\n-------------------------------------------------------')
  167. print('number of median graphs =', nb_median)
  168. random.seed(1)
  169. idx_rdm = random.sample(range(len(Gn)), nb_median)
  170. print('graphs chosen:', idx_rdm)
  171. Gn_median = [Gn[idx].copy() for idx in idx_rdm]
  172. # for g in Gn_median:
  173. # nx.draw(g, labels=nx.get_node_attributes(g, 'atom'), with_labels=True)
  174. ## plt.savefig("results/preimage_mix/mutag.png", format="PNG")
  175. # plt.show()
  176. # plt.clf()
  177. ###################################################################
  178. gmfile = np.load('results/gram_matrix_marg_itr10_pq0.03_mutag_positive.gm.npz')
  179. km_tmp = gmfile['gm']
  180. time_km = gmfile['gmtime']
  181. # modify mixed gram matrix.
  182. km = np.zeros((len(Gn) + nb_median, len(Gn) + nb_median))
  183. for i in range(len(Gn)):
  184. for j in range(i, len(Gn)):
  185. km[i, j] = km_tmp[i, j]
  186. km[j, i] = km[i, j]
  187. for i in range(len(Gn)):
  188. for j, idx in enumerate(idx_rdm):
  189. km[i, len(Gn) + j] = km[i, idx]
  190. km[len(Gn) + j, i] = km[i, idx]
  191. for i, idx1 in enumerate(idx_rdm):
  192. for j, idx2 in enumerate(idx_rdm):
  193. km[len(Gn) + i, len(Gn) + j] = km[idx1, idx2]
  194. ###################################################################
  195. alpha_range = [1 / nb_median] * nb_median
  196. time0 = time.time()
  197. dhat, ghat, nb_updated = preimage_random(Gn, Gn_median, alpha_range,
  198. range(len(Gn), len(Gn) + nb_median), km, k, r_max, l, gkernel)
  199. time_total = time.time() - time0 + time_km
  200. print('time: ', time_total)
  201. time_list.append(time_total)
  202. print('\nsmallest distance in kernel space: ', dhat)
  203. dis_ks_min_list.append(dhat)
  204. g_best.append(ghat)
  205. print('\nnumber of updates of the best graph: ', nb_updated)
  206. nb_updated_list.append(nb_updated)
  207. # show the best graph and save it to file.
  208. print('the shortest distance is', dhat)
  209. print('one of the possible corresponding pre-images is')
  210. nx.draw(ghat, labels=nx.get_node_attributes(ghat, 'atom'),
  211. with_labels=True)
  212. plt.savefig('results/preimage_random/mutag_median_nb' + str(nb_median) +
  213. '.png', format="PNG")
  214. # plt.show()
  215. plt.clf()
  216. # print(ghat_list[0].nodes(data=True))
  217. # print(ghat_list[0].edges(data=True))
  218. # compute the corresponding sod in graph space.
  219. sod_tmp, _ = ged_median([ghat], Gn_median, ged_cost=ged_cost,
  220. ged_method=ged_method, saveGXL=saveGXL)
  221. sod_gs_list.append(sod_tmp)
  222. sod_gs_min_list.append(np.min(sod_tmp))
  223. print('\nsmallest sod in graph space: ', np.min(sod_tmp))
  224. print('\nsods in graph space: ', sod_gs_list)
  225. print('\nsmallest sod in graph space for each set of median graphs: ', sod_gs_min_list)
  226. print('\nsmallest distance in kernel space for each set of median graphs: ',
  227. dis_ks_min_list)
  228. print('\nnumber of updates of the best graph for each set of median graphs: ',
  229. nb_updated_list)
  230. print('\ntimes:', time_list)
  231. ###############################################################################
  232. # test on the combination of the two randomly chosen graphs. (the same as in the
  233. # random pre-image paper.)
  234. def test_random_preimage_2combination():
  235. ds = {'name': 'MUTAG', 'dataset': '../datasets/MUTAG/MUTAG_A.txt',
  236. 'extra_params': {}} # node/edge symb
  237. Gn, y_all = loadDataset(ds['dataset'], extra_params=ds['extra_params'])
  238. # Gn = Gn[0:12]
  239. remove_edges(Gn)
  240. gkernel = 'marginalizedkernel'
  241. # dis_mat, dis_max, dis_min, dis_mean = kernel_distance_matrix(Gn, gkernel=gkernel)
  242. # print(dis_max, dis_min, dis_mean)
  243. lmbda = 0.03 # termination probalility
  244. r_max = 10 # iteration limit for pre-image.
  245. l = 500
  246. alpha_range = np.linspace(0, 1, 11)
  247. k = 5 # k nearest neighbors
  248. # randomly select two molecules
  249. np.random.seed(1)
  250. idx_gi = [187, 167] # np.random.randint(0, len(Gn), 2)
  251. g1 = Gn[idx_gi[0]].copy()
  252. g2 = Gn[idx_gi[1]].copy()
  253. # nx.draw(g1, labels=nx.get_node_attributes(g1, 'atom'), with_labels=True)
  254. # plt.savefig("results/random_preimage/mutag10.png", format="PNG")
  255. # plt.show()
  256. # nx.draw(g2, labels=nx.get_node_attributes(g2, 'atom'), with_labels=True)
  257. # plt.savefig("results/random_preimage/mutag11.png", format="PNG")
  258. # plt.show()
  259. ######################################################################
  260. # Gn_mix = [g.copy() for g in Gn]
  261. # Gn_mix.append(g1.copy())
  262. # Gn_mix.append(g2.copy())
  263. #
  264. ## g_tmp = iam([g1, g2])
  265. ## nx.draw_networkx(g_tmp)
  266. ## plt.show()
  267. #
  268. # # compute
  269. # time0 = time.time()
  270. # km = compute_kernel(Gn_mix, gkernel, True)
  271. # time_km = time.time() - time0
  272. ###################################################################
  273. idx1 = idx_gi[0]
  274. idx2 = idx_gi[1]
  275. gmfile = np.load('results/gram_matrix_marg_itr10_pq0.03.gm.npz')
  276. km = gmfile['gm']
  277. time_km = gmfile['gmtime']
  278. # modify mixed gram matrix.
  279. for i in range(len(Gn)):
  280. km[i, len(Gn)] = km[i, idx1]
  281. km[i, len(Gn) + 1] = km[i, idx2]
  282. km[len(Gn), i] = km[i, idx1]
  283. km[len(Gn) + 1, i] = km[i, idx2]
  284. km[len(Gn), len(Gn)] = km[idx1, idx1]
  285. km[len(Gn), len(Gn) + 1] = km[idx1, idx2]
  286. km[len(Gn) + 1, len(Gn)] = km[idx2, idx1]
  287. km[len(Gn) + 1, len(Gn) + 1] = km[idx2, idx2]
  288. ###################################################################
  289. time_list = []
  290. nb_updated_list = []
  291. g_best = []
  292. dis_ks_min_list = []
  293. # for each alpha
  294. for alpha in alpha_range:
  295. print('\n-------------------------------------------------------\n')
  296. print('alpha =', alpha)
  297. time0 = time.time()
  298. dhat, ghat, nb_updated = preimage_random(Gn, [g1, g2], [alpha, 1 - alpha],
  299. range(len(Gn), len(Gn) + 2), km,
  300. k, r_max, l, gkernel)
  301. time_total = time.time() - time0 + time_km
  302. print('time: ', time_total)
  303. time_list.append(time_total)
  304. dis_ks_min_list.append(dhat)
  305. g_best.append(ghat)
  306. nb_updated_list.append(nb_updated)
  307. # show best graphs and save them to file.
  308. for idx, item in enumerate(alpha_range):
  309. print('when alpha is', item, 'the shortest distance is', dis_ks_min_list[idx])
  310. print('one of the possible corresponding pre-images is')
  311. nx.draw(g_best[idx], labels=nx.get_node_attributes(g_best[idx], 'atom'),
  312. with_labels=True)
  313. plt.show()
  314. plt.savefig('results/random_preimage/mutag_alpha' + str(item) + '.png', format="PNG")
  315. plt.clf()
  316. print(g_best[idx].nodes(data=True))
  317. print(g_best[idx].edges(data=True))
  318. # # compute the corresponding sod in graph space. (alpha range not considered.)
  319. # sod_tmp, _ = median_distance(g_best[0], Gn_let)
  320. # sod_gs_list.append(sod_tmp)
  321. # sod_gs_min_list.append(np.min(sod_tmp))
  322. # sod_ks_min_list.append(sod_ks)
  323. # nb_updated_list.append(nb_updated)
  324. # print('\nsmallest sod in graph space for each alpha: ', sod_gs_min_list)
  325. print('\nsmallest distance in kernel space for each alpha: ', dis_ks_min_list)
  326. print('\nnumber of updates for each alpha: ', nb_updated_list)
  327. print('\ntimes:', time_list)
  328. ###############################################################################
  329. if __name__ == '__main__':
  330. ###############################################################################
  331. # test on the combination of the two randomly chosen graphs. (the same as in the
  332. # random pre-image paper.)
  333. # test_random_preimage_2combination()
  334. ###############################################################################
  335. # tests all algorithms on different numbers of median-sets.
  336. test_preimage_random_median_nb()
  337. ###############################################################################
  338. # tests all algorithms on different values on grid of median-sets and k.
  339. # test_preimage_random_grid_k_median_nb()

A Python package for graph kernels, graph edit distances and graph pre-image problem.