You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

test_iam.py 35 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842
  1. #!/usr/bin/env python3
  2. # -*- coding: utf-8 -*-
  3. """
  4. Created on Thu Sep 5 15:59:00 2019
  5. @author: ljia
  6. """
  7. import numpy as np
  8. import networkx as nx
  9. import matplotlib.pyplot as plt
  10. import time
  11. import random
  12. #from tqdm import tqdm
  13. #import os
  14. import sys
  15. sys.path.insert(0, "../")
  16. from pygraph.utils.graphfiles import loadDataset
  17. #from pygraph.utils.logger2file import *
  18. from iam import iam_upgraded
  19. from utils import remove_edges, compute_kernel, get_same_item_indices, dis_gstar
  20. #from ged import ged_median
  21. def test_iam_monoterpenoides():
  22. ds = {'name': 'monoterpenoides',
  23. 'dataset': '../datasets/monoterpenoides/dataset_10+.ds'} # node/edge symb
  24. Gn, y_all = loadDataset(ds['dataset'])
  25. # Gn = Gn[0:50]
  26. gkernel = 'untilhpathkernel'
  27. node_label = 'atom'
  28. edge_label = 'bond_type'
  29. # parameters for GED function from the IAM paper.
  30. # fitted edit costs (Gaussian).
  31. c_vi = 0.03620133402089074
  32. c_vr = 0.0417574590207099
  33. c_vs = 0.009992282328587499
  34. c_ei = 0.08293120042342755
  35. c_er = 0.09512220476358019
  36. c_es = 0.09222529696841467
  37. # # fitted edit costs (linear combinations).
  38. # c_vi = 0.1749684054238749
  39. # c_vr = 0.0734054228711457
  40. # c_vs = 0.05017781726016715
  41. # c_ei = 0.1869431164806936
  42. # c_er = 0.32055856948274
  43. # c_es = 0.2569469379247611
  44. # # unfitted edit costs.
  45. # c_vi = 3
  46. # c_vr = 3
  47. # c_vs = 1
  48. # c_ei = 3
  49. # c_er = 3
  50. # c_es = 1
  51. ite_max_iam = 50
  52. epsilon_iam = 0.001
  53. removeNodes = False
  54. connected_iam = False
  55. # parameters for IAM function
  56. # ged_cost = 'CONSTANT'
  57. ged_cost = 'CONSTANT'
  58. ged_method = 'IPFP'
  59. edit_cost_constant = [c_vi, c_vr, c_vs, c_ei, c_er, c_es]
  60. # edit_cost_constant = []
  61. ged_stabilizer = 'min'
  62. ged_repeat = 50
  63. params_ged = {'lib': 'gedlibpy', 'cost': ged_cost, 'method': ged_method,
  64. 'edit_cost_constant': edit_cost_constant,
  65. 'stabilizer': ged_stabilizer, 'repeat': ged_repeat}
  66. # classify graphs according to letters.
  67. time_list = []
  68. dis_ks_min_list = []
  69. dis_ks_set_median_list = []
  70. sod_gs_list = []
  71. g_best = []
  72. sod_set_median_list = []
  73. sod_list_list = []
  74. idx_dict = get_same_item_indices(y_all)
  75. for y_class in idx_dict:
  76. print('\n-------------------------------------------------------')
  77. print('class of y:', y_class)
  78. Gn_class = [Gn[i].copy() for i in idx_dict[y_class]]
  79. time_list.append([])
  80. dis_ks_min_list.append([])
  81. dis_ks_set_median_list.append([])
  82. sod_gs_list.append([])
  83. g_best.append([])
  84. sod_set_median_list.append([])
  85. for repeat in range(50):
  86. idx_rdm = random.sample(range(len(Gn_class)), 10)
  87. print('graphs chosen:', idx_rdm)
  88. Gn_median = [Gn_class[idx].copy() for idx in idx_rdm]
  89. Gn_candidate = [g.copy() for g in Gn_median]
  90. alpha_range = [1 / len(Gn_median)] * len(Gn_median)
  91. time0 = time.time()
  92. G_gen_median_list, sod_gen_median, sod_list, G_set_median_list, sod_set_median \
  93. = iam_upgraded(Gn_median,
  94. Gn_candidate, c_ei=c_ei, c_er=c_er, c_es=c_es, ite_max=ite_max_iam,
  95. epsilon=epsilon_iam, connected=connected_iam, removeNodes=removeNodes,
  96. params_ged=params_ged)
  97. time_total = time.time() - time0
  98. print('\ntime: ', time_total)
  99. time_list[-1].append(time_total)
  100. g_best[-1].append(G_gen_median_list[0])
  101. sod_set_median_list[-1].append(sod_set_median)
  102. print('\nsmallest sod of the set median:', sod_set_median)
  103. sod_gs_list[-1].append(sod_gen_median)
  104. print('\nsmallest sod in graph space:', sod_gen_median)
  105. sod_list_list.append(sod_list)
  106. # show the best graph and save it to file.
  107. print('one of the possible corresponding pre-images is')
  108. nx.draw(G_gen_median_list[0], labels=nx.get_node_attributes(G_gen_median_list[0], 'atom'),
  109. with_labels=True)
  110. # plt.show()
  111. # plt.savefig('results/iam/mutag_median.fit_costs2.001.nb' + str(nb_median) +
  112. # plt.savefig('results/iam/paper_compare/monoter_y' + str(y_class) +
  113. # '_repeat' + str(repeat) + '_' + str(time.time()) +
  114. # '.png', format="PNG")
  115. plt.clf()
  116. # print(G_gen_median_list[0].nodes(data=True))
  117. # print(G_gen_median_list[0].edges(data=True))
  118. # compute distance between \psi and the set median graph.
  119. knew_set_median = compute_kernel(G_set_median_list + Gn_median,
  120. gkernel, node_label, edge_label, False)
  121. dhat_new_set_median_list = []
  122. for idx, g_tmp in enumerate(G_set_median_list):
  123. # @todo: the term3 below could use the one at the beginning of the function.
  124. dhat_new_set_median_list.append(dis_gstar(idx, range(len(G_set_median_list),
  125. len(G_set_median_list) + len(Gn_median) + 1),
  126. alpha_range, knew_set_median, withterm3=False))
  127. print('\ndistance in kernel space of set median: ', dhat_new_set_median_list[0])
  128. dis_ks_set_median_list[-1].append(dhat_new_set_median_list[0])
  129. # compute distance between \psi and the new generated graphs.
  130. knew = compute_kernel(G_gen_median_list + Gn_median, gkernel, node_label,
  131. edge_label, False)
  132. dhat_new_list = []
  133. for idx, g_tmp in enumerate(G_gen_median_list):
  134. # @todo: the term3 below could use the one at the beginning of the function.
  135. dhat_new_list.append(dis_gstar(idx, range(len(G_gen_median_list),
  136. len(G_gen_median_list) + len(Gn_median) + 1),
  137. alpha_range, knew, withterm3=False))
  138. print('\nsmallest distance in kernel space: ', dhat_new_list[0])
  139. dis_ks_min_list[-1].append(dhat_new_list[0])
  140. print('\nsods of the set median for this class:', sod_set_median_list[-1])
  141. print('\nsods in graph space for this class:', sod_gs_list[-1])
  142. print('\ndistance in kernel space of set median for this class:',
  143. dis_ks_set_median_list[-1])
  144. print('\nsmallest distances in kernel space for this class:',
  145. dis_ks_min_list[-1])
  146. print('\ntimes for this class:', time_list[-1])
  147. sod_set_median_list[-1] = np.mean(sod_set_median_list[-1])
  148. sod_gs_list[-1] = np.mean(sod_gs_list[-1])
  149. dis_ks_set_median_list[-1] = np.mean(dis_ks_set_median_list[-1])
  150. dis_ks_min_list[-1] = np.mean(dis_ks_min_list[-1])
  151. time_list[-1] = np.mean(time_list[-1])
  152. print()
  153. print('\nmean sods of the set median for each class:', sod_set_median_list)
  154. print('\nmean sods in graph space for each class:', sod_gs_list)
  155. print('\ndistances in kernel space of set median for each class:',
  156. dis_ks_set_median_list)
  157. print('\nmean smallest distances in kernel space for each class:',
  158. dis_ks_min_list)
  159. print('\nmean times for each class:', time_list)
  160. print('\nmean sods of the set median of all:', np.mean(sod_set_median_list))
  161. print('\nmean sods in graph space of all:', np.mean(sod_gs_list))
  162. print('\nmean distances in kernel space of set median of all:',
  163. np.mean(dis_ks_set_median_list))
  164. print('\nmean smallest distances in kernel space of all:',
  165. np.mean(dis_ks_min_list))
  166. print('\nmean times of all:', np.mean(time_list))
  167. nb_better_sods = 0
  168. nb_worse_sods = 0
  169. nb_same_sods = 0
  170. for sods in sod_list_list:
  171. if sods[0] > sods[-1]:
  172. nb_better_sods += 1
  173. elif sods[0] < sods[-1]:
  174. nb_worse_sods += 1
  175. else:
  176. nb_same_sods += 1
  177. print('\n In', str(len(sod_list_list)), 'sod lists,', str(nb_better_sods),
  178. 'are getting better,', str(nb_worse_sods), 'are getting worse,',
  179. str(nb_same_sods), 'are not changed; ', str(nb_better_sods / len(sod_list_list)),
  180. 'sods are improved.')
  181. def test_iam_mutag():
  182. ds = {'name': 'MUTAG', 'dataset': '../datasets/MUTAG/MUTAG_A.txt',
  183. 'extra_params': {}} # node/edge symb
  184. Gn, y_all = loadDataset(ds['dataset'], extra_params=ds['extra_params'])
  185. # Gn = Gn[0:50]
  186. gkernel = 'untilhpathkernel'
  187. node_label = 'atom'
  188. edge_label = 'bond_type'
  189. # parameters for GED function from the IAM paper.
  190. # fitted edit costs.
  191. c_vi = 0.03523843108436513
  192. c_vr = 0.03347339739350128
  193. c_vs = 0.06871290673612238
  194. c_ei = 0.08591999846720685
  195. c_er = 0.07962086440894103
  196. c_es = 0.08596855855478233
  197. # unfitted edit costs.
  198. # c_vi = 3
  199. # c_vr = 3
  200. # c_vs = 1
  201. # c_ei = 3
  202. # c_er = 3
  203. # c_es = 1
  204. ite_max_iam = 50
  205. epsilon_iam = 0.001
  206. removeNodes = False
  207. connected_iam = False
  208. # parameters for IAM function
  209. # ged_cost = 'CONSTANT'
  210. ged_cost = 'CONSTANT'
  211. ged_method = 'IPFP'
  212. edit_cost_constant = [c_vi, c_vr, c_vs, c_ei, c_er, c_es]
  213. # edit_cost_constant = []
  214. ged_stabilizer = 'min'
  215. ged_repeat = 50
  216. params_ged = {'lib': 'gedlibpy', 'cost': ged_cost, 'method': ged_method,
  217. 'edit_cost_constant': edit_cost_constant,
  218. 'stabilizer': ged_stabilizer, 'repeat': ged_repeat}
  219. # classify graphs according to letters.
  220. time_list = []
  221. dis_ks_min_list = []
  222. dis_ks_set_median_list = []
  223. sod_gs_list = []
  224. g_best = []
  225. sod_set_median_list = []
  226. sod_list_list = []
  227. idx_dict = get_same_item_indices(y_all)
  228. for y_class in idx_dict:
  229. print('\n-------------------------------------------------------')
  230. print('class of y:', y_class)
  231. Gn_class = [Gn[i].copy() for i in idx_dict[y_class]]
  232. time_list.append([])
  233. dis_ks_min_list.append([])
  234. dis_ks_set_median_list.append([])
  235. sod_gs_list.append([])
  236. g_best.append([])
  237. sod_set_median_list.append([])
  238. for repeat in range(50):
  239. idx_rdm = random.sample(range(len(Gn_class)), 10)
  240. print('graphs chosen:', idx_rdm)
  241. Gn_median = [Gn_class[idx].copy() for idx in idx_rdm]
  242. Gn_candidate = [g.copy() for g in Gn_median]
  243. alpha_range = [1 / len(Gn_median)] * len(Gn_median)
  244. time0 = time.time()
  245. G_gen_median_list, sod_gen_median, sod_list, G_set_median_list, sod_set_median \
  246. = iam_upgraded(Gn_median,
  247. Gn_candidate, c_ei=c_ei, c_er=c_er, c_es=c_es, ite_max=ite_max_iam,
  248. epsilon=epsilon_iam, connected=connected_iam, removeNodes=removeNodes,
  249. params_ged=params_ged)
  250. time_total = time.time() - time0
  251. print('\ntime: ', time_total)
  252. time_list[-1].append(time_total)
  253. g_best[-1].append(G_gen_median_list[0])
  254. sod_set_median_list[-1].append(sod_set_median)
  255. print('\nsmallest sod of the set median:', sod_set_median)
  256. sod_gs_list[-1].append(sod_gen_median)
  257. print('\nsmallest sod in graph space:', sod_gen_median)
  258. sod_list_list.append(sod_list)
  259. # show the best graph and save it to file.
  260. print('one of the possible corresponding pre-images is')
  261. nx.draw(G_gen_median_list[0], labels=nx.get_node_attributes(G_gen_median_list[0], 'atom'),
  262. with_labels=True)
  263. # plt.show()
  264. # plt.savefig('results/iam/mutag_median.fit_costs2.001.nb' + str(nb_median) +
  265. # plt.savefig('results/iam/paper_compare/mutag_y' + str(y_class) +
  266. # '_repeat' + str(repeat) + '_' + str(time.time()) +
  267. # '.png', format="PNG")
  268. plt.clf()
  269. # print(G_gen_median_list[0].nodes(data=True))
  270. # print(G_gen_median_list[0].edges(data=True))
  271. # compute distance between \psi and the set median graph.
  272. knew_set_median = compute_kernel(G_set_median_list + Gn_median,
  273. gkernel, node_label, edge_label, False)
  274. dhat_new_set_median_list = []
  275. for idx, g_tmp in enumerate(G_set_median_list):
  276. # @todo: the term3 below could use the one at the beginning of the function.
  277. dhat_new_set_median_list.append(dis_gstar(idx, range(len(G_set_median_list),
  278. len(G_set_median_list) + len(Gn_median) + 1),
  279. alpha_range, knew_set_median, withterm3=False))
  280. print('\ndistance in kernel space of set median: ', dhat_new_set_median_list[0])
  281. dis_ks_set_median_list[-1].append(dhat_new_set_median_list[0])
  282. # compute distance between \psi and the new generated graphs.
  283. knew = compute_kernel(G_gen_median_list + Gn_median, gkernel, node_label,
  284. edge_label, False)
  285. dhat_new_list = []
  286. for idx, g_tmp in enumerate(G_gen_median_list):
  287. # @todo: the term3 below could use the one at the beginning of the function.
  288. dhat_new_list.append(dis_gstar(idx, range(len(G_gen_median_list),
  289. len(G_gen_median_list) + len(Gn_median) + 1),
  290. alpha_range, knew, withterm3=False))
  291. print('\nsmallest distance in kernel space: ', dhat_new_list[0])
  292. dis_ks_min_list[-1].append(dhat_new_list[0])
  293. print('\nsods of the set median for this class:', sod_set_median_list[-1])
  294. print('\nsods in graph space for this class:', sod_gs_list[-1])
  295. print('\ndistance in kernel space of set median for this class:',
  296. dis_ks_set_median_list[-1])
  297. print('\nsmallest distances in kernel space for this class:',
  298. dis_ks_min_list[-1])
  299. print('\ntimes for this class:', time_list[-1])
  300. sod_set_median_list[-1] = np.mean(sod_set_median_list[-1])
  301. sod_gs_list[-1] = np.mean(sod_gs_list[-1])
  302. dis_ks_set_median_list[-1] = np.mean(dis_ks_set_median_list[-1])
  303. dis_ks_min_list[-1] = np.mean(dis_ks_min_list[-1])
  304. time_list[-1] = np.mean(time_list[-1])
  305. print()
  306. print('\nmean sods of the set median for each class:', sod_set_median_list)
  307. print('\nmean sods in graph space for each class:', sod_gs_list)
  308. print('\ndistances in kernel space of set median for each class:',
  309. dis_ks_set_median_list)
  310. print('\nmean smallest distances in kernel space for each class:',
  311. dis_ks_min_list)
  312. print('\nmean times for each class:', time_list)
  313. print('\nmean sods of the set median of all:', np.mean(sod_set_median_list))
  314. print('\nmean sods in graph space of all:', np.mean(sod_gs_list))
  315. print('\nmean distances in kernel space of set median of all:',
  316. np.mean(dis_ks_set_median_list))
  317. print('\nmean smallest distances in kernel space of all:',
  318. np.mean(dis_ks_min_list))
  319. print('\nmean times of all:', np.mean(time_list))
  320. nb_better_sods = 0
  321. nb_worse_sods = 0
  322. nb_same_sods = 0
  323. for sods in sod_list_list:
  324. if sods[0] > sods[-1]:
  325. nb_better_sods += 1
  326. elif sods[0] < sods[-1]:
  327. nb_worse_sods += 1
  328. else:
  329. nb_same_sods += 1
  330. print('\n In', str(len(sod_list_list)), 'sod lists,', str(nb_better_sods),
  331. 'are getting better,', str(nb_worse_sods), 'are getting worse,',
  332. str(nb_same_sods), 'are not changed; ', str(nb_better_sods / len(sod_list_list)),
  333. 'sods are improved.')
  334. ###############################################################################
  335. # tests on different numbers of median-sets.
  336. def test_iam_median_nb():
  337. ds = {'name': 'MUTAG', 'dataset': '../datasets/MUTAG/MUTAG_A.txt',
  338. 'extra_params': {}} # node/edge symb
  339. Gn, y_all = loadDataset(ds['dataset'], extra_params=ds['extra_params'])
  340. # Gn = Gn[0:50]
  341. remove_edges(Gn)
  342. gkernel = 'marginalizedkernel'
  343. lmbda = 0.03 # termination probalility
  344. # # parameters for GED function
  345. # c_vi = 0.037
  346. # c_vr = 0.038
  347. # c_vs = 0.075
  348. # c_ei = 0.001
  349. # c_er = 0.001
  350. # c_es = 0.0
  351. # ite_max_iam = 50
  352. # epsilon_iam = 0.001
  353. # removeNodes = False
  354. # connected_iam = False
  355. # # parameters for IAM function
  356. # ged_cost = 'CONSTANT'
  357. # ged_method = 'IPFP'
  358. # edit_cost_constant = [c_vi, c_vr, c_vs, c_ei, c_er, c_es]
  359. # ged_stabilizer = 'min'
  360. # ged_repeat = 50
  361. # params_ged = {'lib': 'gedlibpy', 'cost': ged_cost, 'method': ged_method,
  362. # 'edit_cost_constant': edit_cost_constant,
  363. # 'stabilizer': ged_stabilizer, 'repeat': ged_repeat}
  364. # parameters for GED function
  365. c_vi = 4
  366. c_vr = 4
  367. c_vs = 2
  368. c_ei = 1
  369. c_er = 1
  370. c_es = 1
  371. ite_max_iam = 50
  372. epsilon_iam = 0.001
  373. removeNodes = False
  374. connected_iam = False
  375. # parameters for IAM function
  376. ged_cost = 'CHEM_1'
  377. ged_method = 'IPFP'
  378. edit_cost_constant = []
  379. ged_stabilizer = 'min'
  380. ged_repeat = 50
  381. params_ged = {'lib': 'gedlibpy', 'cost': ged_cost, 'method': ged_method,
  382. 'edit_cost_constant': edit_cost_constant,
  383. 'stabilizer': ged_stabilizer, 'repeat': ged_repeat}
  384. # find out all the graphs classified to positive group 1.
  385. idx_dict = get_same_item_indices(y_all)
  386. Gn = [Gn[i] for i in idx_dict[1]]
  387. # number of graphs; we what to compute the median of these graphs.
  388. # nb_median_range = [2, 3, 4, 5, 10, 20, 30, 40, 50, 100]
  389. nb_median_range = [len(Gn)]
  390. # # compute Gram matrix.
  391. # time0 = time.time()
  392. # km = compute_kernel(Gn, gkernel, True)
  393. # time_km = time.time() - time0
  394. # # write Gram matrix to file.
  395. # np.savez('results/gram_matrix_marg_itr10_pq0.03_mutag_positive.gm', gm=km, gmtime=time_km)
  396. time_list = []
  397. dis_ks_min_list = []
  398. sod_gs_list = []
  399. # sod_gs_min_list = []
  400. # nb_updated_list = []
  401. # nb_updated_k_list = []
  402. g_best = []
  403. for nb_median in nb_median_range:
  404. print('\n-------------------------------------------------------')
  405. print('number of median graphs =', nb_median)
  406. random.seed(1)
  407. idx_rdm = random.sample(range(len(Gn)), nb_median)
  408. print('graphs chosen:', idx_rdm)
  409. Gn_median = [Gn[idx].copy() for idx in idx_rdm]
  410. Gn_candidate = [g.copy() for g in Gn]
  411. # for g in Gn_median:
  412. # nx.draw(g, labels=nx.get_node_attributes(g, 'atom'), with_labels=True)
  413. ## plt.savefig("results/preimage_mix/mutag.png", format="PNG")
  414. # plt.show()
  415. # plt.clf()
  416. ###################################################################
  417. # gmfile = np.load('results/gram_matrix_marg_itr10_pq0.03_mutag_positive.gm.npz')
  418. # km_tmp = gmfile['gm']
  419. # time_km = gmfile['gmtime']
  420. # # modify mixed gram matrix.
  421. # km = np.zeros((len(Gn) + nb_median, len(Gn) + nb_median))
  422. # for i in range(len(Gn)):
  423. # for j in range(i, len(Gn)):
  424. # km[i, j] = km_tmp[i, j]
  425. # km[j, i] = km[i, j]
  426. # for i in range(len(Gn)):
  427. # for j, idx in enumerate(idx_rdm):
  428. # km[i, len(Gn) + j] = km[i, idx]
  429. # km[len(Gn) + j, i] = km[i, idx]
  430. # for i, idx1 in enumerate(idx_rdm):
  431. # for j, idx2 in enumerate(idx_rdm):
  432. # km[len(Gn) + i, len(Gn) + j] = km[idx1, idx2]
  433. ###################################################################
  434. alpha_range = [1 / nb_median] * nb_median
  435. time0 = time.time()
  436. ghat_new_list, sod_min = iam_upgraded(Gn_median, Gn_candidate,
  437. c_ei=c_ei, c_er=c_er, c_es=c_es, ite_max=ite_max_iam,
  438. epsilon=epsilon_iam, connected=connected_iam, removeNodes=removeNodes,
  439. params_ged=params_ged)
  440. time_total = time.time() - time0
  441. print('\ntime: ', time_total)
  442. time_list.append(time_total)
  443. # compute distance between \psi and the new generated graphs.
  444. knew = compute_kernel(ghat_new_list + Gn_median, gkernel, False)
  445. dhat_new_list = []
  446. for idx, g_tmp in enumerate(ghat_new_list):
  447. # @todo: the term3 below could use the one at the beginning of the function.
  448. dhat_new_list.append(dis_gstar(idx, range(len(ghat_new_list),
  449. len(ghat_new_list) + len(Gn_median) + 1),
  450. alpha_range, knew, withterm3=False))
  451. print('\nsmallest distance in kernel space: ', dhat_new_list[0])
  452. dis_ks_min_list.append(dhat_new_list[0])
  453. g_best.append(ghat_new_list[0])
  454. # show the best graph and save it to file.
  455. # print('the shortest distance is', dhat)
  456. print('one of the possible corresponding pre-images is')
  457. nx.draw(ghat_new_list[0], labels=nx.get_node_attributes(ghat_new_list[0], 'atom'),
  458. with_labels=True)
  459. plt.show()
  460. # plt.savefig('results/iam/mutag_median.fit_costs2.001.nb' + str(nb_median) +
  461. plt.savefig('results/iam/mutag_median_unfit2.nb' + str(nb_median) +
  462. '.png', format="PNG")
  463. plt.clf()
  464. # print(ghat_list[0].nodes(data=True))
  465. # print(ghat_list[0].edges(data=True))
  466. sod_gs_list.append(sod_min)
  467. # sod_gs_min_list.append(np.min(sod_min))
  468. print('\nsmallest sod in graph space: ', sod_min)
  469. print('\nsods in graph space: ', sod_gs_list)
  470. # print('\nsmallest sod in graph space for each set of median graphs: ', sod_gs_min_list)
  471. print('\nsmallest distance in kernel space for each set of median graphs: ',
  472. dis_ks_min_list)
  473. # print('\nnumber of updates of the best graph for each set of median graphs by IAM: ',
  474. # nb_updated_list)
  475. # print('\nnumber of updates of k nearest graphs for each set of median graphs by IAM: ',
  476. # nb_updated_k_list)
  477. print('\ntimes:', time_list)
  478. def test_iam_letter_h():
  479. from median import draw_Letter_graph
  480. ds = {'name': 'Letter-high', 'dataset': '../datasets/Letter-high/Letter-high_A.txt',
  481. 'extra_params': {}} # node nsymb
  482. # ds = {'name': 'Letter-med', 'dataset': '../datasets/Letter-med/Letter-med_A.txt',
  483. # 'extra_params': {}} # node nsymb
  484. # Gn = Gn[0:50]
  485. Gn, y_all = loadDataset(ds['dataset'], extra_params=ds['extra_params'])
  486. gkernel = 'structuralspkernel'
  487. # parameters for GED function from the IAM paper.
  488. c_vi = 3
  489. c_vr = 3
  490. c_vs = 1
  491. c_ei = 3
  492. c_er = 3
  493. c_es = 1
  494. ite_max_iam = 50
  495. epsilon_iam = 0.001
  496. removeNodes = False
  497. connected_iam = False
  498. # parameters for IAM function
  499. # ged_cost = 'CONSTANT'
  500. ged_cost = 'LETTER'
  501. ged_method = 'IPFP'
  502. # edit_cost_constant = [c_vi, c_vr, c_vs, c_ei, c_er, c_es]
  503. edit_cost_constant = []
  504. ged_stabilizer = 'min'
  505. ged_repeat = 50
  506. params_ged = {'lib': 'gedlibpy', 'cost': ged_cost, 'method': ged_method,
  507. 'edit_cost_constant': edit_cost_constant,
  508. 'stabilizer': ged_stabilizer, 'repeat': ged_repeat}
  509. # classify graphs according to letters.
  510. time_list = []
  511. dis_ks_min_list = []
  512. sod_gs_list = []
  513. g_best = []
  514. sod_set_median_list = []
  515. idx_dict = get_same_item_indices(y_all)
  516. for letter in idx_dict:
  517. print('\n-------------------------------------------------------')
  518. print('letter', letter)
  519. Gn_let = [Gn[i].copy() for i in idx_dict[letter]]
  520. time_list.append([])
  521. dis_ks_min_list.append([])
  522. sod_gs_list.append([])
  523. g_best.append([])
  524. sod_set_median_list.append([])
  525. for repeat in range(50):
  526. idx_rdm = random.sample(range(len(Gn_let)), 50)
  527. print('graphs chosen:', idx_rdm)
  528. Gn_median = [Gn_let[idx].copy() for idx in idx_rdm]
  529. Gn_candidate = [g.copy() for g in Gn_median]
  530. alpha_range = [1 / len(Gn_median)] * len(Gn_median)
  531. time0 = time.time()
  532. ghat_new_list, sod_min, sod_set_median = iam_upgraded(Gn_median,
  533. Gn_candidate, c_ei=c_ei, c_er=c_er, c_es=c_es, ite_max=ite_max_iam,
  534. epsilon=epsilon_iam, connected=connected_iam, removeNodes=removeNodes,
  535. params_ged=params_ged)
  536. time_total = time.time() - time0
  537. print('\ntime: ', time_total)
  538. time_list[-1].append(time_total)
  539. g_best[-1].append(ghat_new_list[0])
  540. sod_set_median_list[-1].append(sod_set_median)
  541. print('\nsmallest sod of the set median:', sod_set_median)
  542. sod_gs_list[-1].append(sod_min)
  543. print('\nsmallest sod in graph space:', sod_min)
  544. # show the best graph and save it to file.
  545. print('one of the possible corresponding pre-images is')
  546. draw_Letter_graph(ghat_new_list[0], savepath='results/iam/paper_compare/')
  547. # compute distance between \psi and the new generated graphs.
  548. knew = compute_kernel(ghat_new_list + Gn_median, gkernel, False)
  549. dhat_new_list = []
  550. for idx, g_tmp in enumerate(ghat_new_list):
  551. # @todo: the term3 below could use the one at the beginning of the function.
  552. dhat_new_list.append(dis_gstar(idx, range(len(ghat_new_list),
  553. len(ghat_new_list) + len(Gn_median) + 1),
  554. alpha_range, knew, withterm3=False))
  555. print('\nsmallest distance in kernel space: ', dhat_new_list[0])
  556. dis_ks_min_list[-1].append(dhat_new_list[0])
  557. print('\nsods of the set median for this letter:', sod_set_median_list[-1])
  558. print('\nsods in graph space for this letter:', sod_gs_list[-1])
  559. print('\nsmallest distances in kernel space for this letter:',
  560. dis_ks_min_list[-1])
  561. print('\ntimes for this letter:', time_list[-1])
  562. sod_set_median_list[-1] = np.mean(sod_set_median_list[-1])
  563. sod_gs_list[-1] = np.mean(sod_gs_list[-1])
  564. dis_ks_min_list[-1] = np.mean(dis_ks_min_list[-1])
  565. time_list[-1] = np.mean(time_list[-1])
  566. print('\nmean sods of the set median for each letter:', sod_set_median_list)
  567. print('\nmean sods in graph space for each letter:', sod_gs_list)
  568. print('\nmean smallest distances in kernel space for each letter:',
  569. dis_ks_min_list)
  570. print('\nmean times for each letter:', time_list)
  571. print('\nmean sods of the set median of all:', np.mean(sod_set_median_list))
  572. print('\nmean sods in graph space of all:', np.mean(sod_gs_list))
  573. print('\nmean smallest distances in kernel space of all:',
  574. np.mean(dis_ks_min_list))
  575. print('\nmean times of all:', np.mean(time_list))
  576. def test_iam_fitdistance():
  577. ds = {'name': 'MUTAG', 'dataset': '../datasets/MUTAG/MUTAG_A.txt',
  578. 'extra_params': {}} # node/edge symb
  579. Gn, y_all = loadDataset(ds['dataset'], extra_params=ds['extra_params'])
  580. # Gn = Gn[0:50]
  581. # remove_edges(Gn)
  582. gkernel = 'marginalizedkernel'
  583. node_label = 'atom'
  584. edge_label = 'bond_type'
  585. # lmbda = 0.03 # termination probalility
  586. # # parameters for GED function
  587. # c_vi = 0.037
  588. # c_vr = 0.038
  589. # c_vs = 0.075
  590. # c_ei = 0.001
  591. # c_er = 0.001
  592. # c_es = 0.0
  593. # ite_max_iam = 50
  594. # epsilon_iam = 0.001
  595. # removeNodes = False
  596. # connected_iam = False
  597. # # parameters for IAM function
  598. # ged_cost = 'CONSTANT'
  599. # ged_method = 'IPFP'
  600. # edit_cost_constant = [c_vi, c_vr, c_vs, c_ei, c_er, c_es]
  601. # ged_stabilizer = 'min'
  602. # ged_repeat = 50
  603. # params_ged = {'lib': 'gedlibpy', 'cost': ged_cost, 'method': ged_method,
  604. # 'edit_cost_constant': edit_cost_constant,
  605. # 'stabilizer': ged_stabilizer, 'repeat': ged_repeat}
  606. # parameters for GED function
  607. c_vi = 4
  608. c_vr = 4
  609. c_vs = 2
  610. c_ei = 1
  611. c_er = 1
  612. c_es = 1
  613. ite_max_iam = 50
  614. epsilon_iam = 0.001
  615. removeNodes = False
  616. connected_iam = False
  617. # parameters for IAM function
  618. ged_cost = 'CHEM_1'
  619. ged_method = 'IPFP'
  620. edit_cost_constant = []
  621. ged_stabilizer = 'min'
  622. ged_repeat = 50
  623. params_ged = {'lib': 'gedlibpy', 'cost': ged_cost, 'method': ged_method,
  624. 'edit_cost_constant': edit_cost_constant,
  625. 'stabilizer': ged_stabilizer, 'repeat': ged_repeat}
  626. # find out all the graphs classified to positive group 1.
  627. idx_dict = get_same_item_indices(y_all)
  628. Gn = [Gn[i] for i in idx_dict[1]]
  629. # number of graphs; we what to compute the median of these graphs.
  630. # nb_median_range = [2, 3, 4, 5, 10, 20, 30, 40, 50, 100]
  631. nb_median_range = [10]
  632. # # compute Gram matrix.
  633. # time0 = time.time()
  634. # km = compute_kernel(Gn, gkernel, True)
  635. # time_km = time.time() - time0
  636. # # write Gram matrix to file.
  637. # np.savez('results/gram_matrix_marg_itr10_pq0.03_mutag_positive.gm', gm=km, gmtime=time_km)
  638. time_list = []
  639. dis_ks_min_list = []
  640. dis_ks_gen_median_list = []
  641. sod_gs_list = []
  642. # sod_gs_min_list = []
  643. # nb_updated_list = []
  644. # nb_updated_k_list = []
  645. g_best = []
  646. for nb_median in nb_median_range:
  647. print('\n-------------------------------------------------------')
  648. print('number of median graphs =', nb_median)
  649. random.seed(1)
  650. idx_rdm = random.sample(range(len(Gn)), nb_median)
  651. print('graphs chosen:', idx_rdm)
  652. Gn_median = [Gn[idx].copy() for idx in idx_rdm]
  653. Gn_candidate = [g.copy() for g in Gn_median]
  654. # for g in Gn_median:
  655. # nx.draw(g, labels=nx.get_node_attributes(g, 'atom'), with_labels=True)
  656. ## plt.savefig("results/preimage_mix/mutag.png", format="PNG")
  657. # plt.show()
  658. # plt.clf()
  659. ###################################################################
  660. # gmfile = np.load('results/gram_matrix_marg_itr10_pq0.03_mutag_positive.gm.npz')
  661. # km_tmp = gmfile['gm']
  662. # time_km = gmfile['gmtime']
  663. # # modify mixed gram matrix.
  664. # km = np.zeros((len(Gn) + nb_median, len(Gn) + nb_median))
  665. # for i in range(len(Gn)):
  666. # for j in range(i, len(Gn)):
  667. # km[i, j] = km_tmp[i, j]
  668. # km[j, i] = km[i, j]
  669. # for i in range(len(Gn)):
  670. # for j, idx in enumerate(idx_rdm):
  671. # km[i, len(Gn) + j] = km[i, idx]
  672. # km[len(Gn) + j, i] = km[i, idx]
  673. # for i, idx1 in enumerate(idx_rdm):
  674. # for j, idx2 in enumerate(idx_rdm):
  675. # km[len(Gn) + i, len(Gn) + j] = km[idx1, idx2]
  676. ###################################################################
  677. alpha_range = [1 / nb_median] * nb_median
  678. time0 = time.time()
  679. G_gen_median_list, sod_gen_median, sod_list, G_set_median_list, sod_set_median \
  680. = iam_upgraded(Gn_median, Gn_candidate,
  681. c_ei=c_ei, c_er=c_er, c_es=c_es, ite_max=ite_max_iam,
  682. epsilon=epsilon_iam, connected=connected_iam, removeNodes=removeNodes,
  683. params_ged=params_ged)
  684. time_total = time.time() - time0
  685. print('\ntime: ', time_total)
  686. time_list.append(time_total)
  687. # compute distance between \psi and the new generated graphs.
  688. knew = compute_kernel(G_gen_median_list + Gn_median, gkernel, node_label,
  689. edge_label, False)
  690. dhat_new_list = []
  691. for idx, g_tmp in enumerate(G_gen_median_list):
  692. # @todo: the term3 below could use the one at the beginning of the function.
  693. dhat_new_list.append(dis_gstar(idx, range(len(G_gen_median_list),
  694. len(G_gen_median_list) + len(Gn_median) + 1),
  695. alpha_range, knew, withterm3=False))
  696. print('\nsmallest distance in kernel space: ', dhat_new_list[0])
  697. dis_ks_min_list.append(dhat_new_list[0])
  698. g_best.append(G_gen_median_list[0])
  699. # show the best graph and save it to file.
  700. # print('the shortest distance is', dhat)
  701. print('one of the possible corresponding pre-images is')
  702. nx.draw(G_gen_median_list[0], labels=nx.get_node_attributes(G_gen_median_list[0], 'atom'),
  703. with_labels=True)
  704. plt.show()
  705. # plt.savefig('results/iam/mutag_median.fit_costs2.001.nb' + str(nb_median) +
  706. # plt.savefig('results/iam/mutag_median_unfit2.nb' + str(nb_median) +
  707. # '.png', format="PNG")
  708. plt.clf()
  709. # print(ghat_list[0].nodes(data=True))
  710. # print(ghat_list[0].edges(data=True))
  711. sod_gs_list.append(sod_gen_median)
  712. # sod_gs_min_list.append(np.min(sod_gen_median))
  713. print('\nsmallest sod in graph space: ', sod_gen_median)
  714. print('\nsmallest sod of set median in graph space: ', sod_set_median)
  715. print('\nsods in graph space: ', sod_gs_list)
  716. # print('\nsmallest sod in graph space for each set of median graphs: ', sod_gs_min_list)
  717. print('\nsmallest distance in kernel space for each set of median graphs: ',
  718. dis_ks_min_list)
  719. # print('\nnumber of updates of the best graph for each set of median graphs by IAM: ',
  720. # nb_updated_list)
  721. # print('\nnumber of updates of k nearest graphs for each set of median graphs by IAM: ',
  722. # nb_updated_k_list)
  723. print('\ntimes:', time_list)
  724. ###############################################################################
  725. if __name__ == '__main__':
  726. ###############################################################################
  727. # tests on different numbers of median-sets.
  728. # test_iam_median_nb()
  729. # test_iam_letter_h()
  730. test_iam_monoterpenoides()
  731. # test_iam_mutag()
  732. # test_iam_fitdistance()
  733. # print("test log")

A Python package for graph kernels, graph edit distances and graph pre-image problem.