You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

test_iam.py 40 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967
  1. #!/usr/bin/env python3
  2. # -*- coding: utf-8 -*-
  3. """
  4. Created on Thu Sep 5 15:59:00 2019
  5. @author: ljia
  6. """
  7. import numpy as np
  8. import networkx as nx
  9. import matplotlib.pyplot as plt
  10. import time
  11. import random
  12. #from tqdm import tqdm
  13. #import os
  14. import sys
  15. sys.path.insert(0, "../")
  16. from pygraph.utils.graphfiles import loadDataset
  17. #from pygraph.utils.logger2file import *
  18. from iam import iam_upgraded
  19. from utils import remove_edges, compute_kernel, get_same_item_indices, dis_gstar
  20. #from ged import ged_median
  21. def test_iam_monoterpenoides_with_init40():
  22. gkernel = 'untilhpathkernel'
  23. node_label = 'atom'
  24. edge_label = 'bond_type'
  25. # unfitted edit costs.
  26. c_vi = 3
  27. c_vr = 3
  28. c_vs = 1
  29. c_ei = 3
  30. c_er = 3
  31. c_es = 1
  32. ite_max_iam = 50
  33. epsilon_iam = 0.0001
  34. removeNodes = False
  35. connected_iam = False
  36. # parameters for IAM function
  37. # ged_cost = 'CONSTANT'
  38. ged_cost = 'CONSTANT'
  39. ged_method = 'IPFP'
  40. edit_cost_constant = [c_vi, c_vr, c_vs, c_ei, c_er, c_es]
  41. ged_stabilizer = None
  42. # ged_repeat = 50
  43. algo_options = '--threads 8 --initial-solutions 40 --ratio-runs-from-initial-solutions 1'
  44. params_ged = {'lib': 'gedlibpy', 'cost': ged_cost, 'method': ged_method,
  45. 'edit_cost_constant': edit_cost_constant,
  46. 'algo_options': algo_options,
  47. 'stabilizer': ged_stabilizer}
  48. collection_path = '/media/ljia/DATA/research-repo/codes/others/gedlib/tests_linlin/generated_datsets/monoterpenoides/'
  49. graph_dir = collection_path + 'gxl/'
  50. y_all = ['3', '1', '4', '6', '7', '8', '9', '2']
  51. repeats = 50
  52. # classify graphs according to classes.
  53. time_list = []
  54. dis_ks_min_list = []
  55. dis_ks_set_median_list = []
  56. sod_gs_list = []
  57. g_best = []
  58. sod_set_median_list = []
  59. sod_list_list = []
  60. for y in y_all:
  61. print('\n-------------------------------------------------------')
  62. print('class of y:', y)
  63. time_list.append([])
  64. dis_ks_min_list.append([])
  65. dis_ks_set_median_list.append([])
  66. sod_gs_list.append([])
  67. g_best.append([])
  68. sod_set_median_list.append([])
  69. for repeat in range(repeats):
  70. # load median set.
  71. collection_file = collection_path + 'monoterpenoides_' + y + '_' + str(repeat) + '.xml'
  72. Gn_median, _ = loadDataset(collection_file, extra_params=graph_dir)
  73. Gn_candidate = [g.copy() for g in Gn_median]
  74. time0 = time.time()
  75. G_gen_median_list, sod_gen_median, sod_list, G_set_median_list, sod_set_median \
  76. = iam_upgraded(Gn_median,
  77. Gn_candidate, c_ei=c_ei, c_er=c_er, c_es=c_es, ite_max=ite_max_iam,
  78. epsilon=epsilon_iam, node_label=node_label, edge_label=edge_label,
  79. connected=connected_iam, removeNodes=removeNodes,
  80. params_ged=params_ged)
  81. time_total = time.time() - time0
  82. print('\ntime: ', time_total)
  83. time_list[-1].append(time_total)
  84. g_best[-1].append(G_gen_median_list[0])
  85. sod_set_median_list[-1].append(sod_set_median)
  86. print('\nsmallest sod of the set median:', sod_set_median)
  87. sod_gs_list[-1].append(sod_gen_median)
  88. print('\nsmallest sod in graph space:', sod_gen_median)
  89. sod_list_list.append(sod_list)
  90. # # show the best graph and save it to file.
  91. # print('one of the possible corresponding pre-images is')
  92. # nx.draw(G_gen_median_list[0], labels=nx.get_node_attributes(G_gen_median_list[0], 'atom'),
  93. # with_labels=True)
  94. ## plt.show()
  95. # # plt.savefig('results/iam/mutag_median.fit_costs2.001.nb' + str(nb_median) +
  96. ## plt.savefig('results/iam/paper_compare/monoter_y' + str(y_class) +
  97. ## '_repeat' + str(repeat) + '_' + str(time.time()) +
  98. ## '.png', format="PNG")
  99. # plt.clf()
  100. # # print(G_gen_median_list[0].nodes(data=True))
  101. # # print(G_gen_median_list[0].edges(data=True))
  102. print('\nsods of the set median for this class:', sod_set_median_list[-1])
  103. print('\nsods in graph space for this class:', sod_gs_list[-1])
  104. # print('\ndistance in kernel space of set median for this class:',
  105. # dis_ks_set_median_list[-1])
  106. # print('\nsmallest distances in kernel space for this class:',
  107. # dis_ks_min_list[-1])
  108. print('\ntimes for this class:', time_list[-1])
  109. sod_set_median_list[-1] = np.mean(sod_set_median_list[-1])
  110. sod_gs_list[-1] = np.mean(sod_gs_list[-1])
  111. # dis_ks_set_median_list[-1] = np.mean(dis_ks_set_median_list[-1])
  112. # dis_ks_min_list[-1] = np.mean(dis_ks_min_list[-1])
  113. time_list[-1] = np.mean(time_list[-1])
  114. print()
  115. print('\nmean sods of the set median for each class:', sod_set_median_list)
  116. print('\nmean sods in graph space for each class:', sod_gs_list)
  117. # print('\ndistances in kernel space of set median for each class:',
  118. # dis_ks_set_median_list)
  119. # print('\nmean smallest distances in kernel space for each class:',
  120. # dis_ks_min_list)
  121. print('\nmean times for each class:', time_list)
  122. print('\nmean sods of the set median of all:', np.mean(sod_set_median_list))
  123. print('\nmean sods in graph space of all:', np.mean(sod_gs_list))
  124. # print('\nmean distances in kernel space of set median of all:',
  125. # np.mean(dis_ks_set_median_list))
  126. # print('\nmean smallest distances in kernel space of all:',
  127. # np.mean(dis_ks_min_list))
  128. print('\nmean times of all:', np.mean(time_list))
  129. def test_iam_monoterpenoides():
  130. ds = {'name': 'monoterpenoides',
  131. 'dataset': '../datasets/monoterpenoides/dataset_10+.ds'} # node/edge symb
  132. Gn, y_all = loadDataset(ds['dataset'])
  133. # Gn = Gn[0:50]
  134. gkernel = 'untilhpathkernel'
  135. node_label = 'atom'
  136. edge_label = 'bond_type'
  137. # parameters for GED function from the IAM paper.
  138. # fitted edit costs (Gaussian).
  139. c_vi = 0.03620133402089074
  140. c_vr = 0.0417574590207099
  141. c_vs = 0.009992282328587499
  142. c_ei = 0.08293120042342755
  143. c_er = 0.09512220476358019
  144. c_es = 0.09222529696841467
  145. # # fitted edit costs (linear combinations).
  146. # c_vi = 0.1749684054238749
  147. # c_vr = 0.0734054228711457
  148. # c_vs = 0.05017781726016715
  149. # c_ei = 0.1869431164806936
  150. # c_er = 0.32055856948274
  151. # c_es = 0.2569469379247611
  152. # # unfitted edit costs.
  153. # c_vi = 3
  154. # c_vr = 3
  155. # c_vs = 1
  156. # c_ei = 3
  157. # c_er = 3
  158. # c_es = 1
  159. ite_max_iam = 50
  160. epsilon_iam = 0.001
  161. removeNodes = False
  162. connected_iam = False
  163. # parameters for IAM function
  164. # ged_cost = 'CONSTANT'
  165. ged_cost = 'CONSTANT'
  166. ged_method = 'IPFP'
  167. edit_cost_constant = [c_vi, c_vr, c_vs, c_ei, c_er, c_es]
  168. # edit_cost_constant = []
  169. ged_stabilizer = 'min'
  170. ged_repeat = 50
  171. params_ged = {'lib': 'gedlibpy', 'cost': ged_cost, 'method': ged_method,
  172. 'edit_cost_constant': edit_cost_constant,
  173. 'stabilizer': ged_stabilizer, 'repeat': ged_repeat}
  174. # classify graphs according to letters.
  175. time_list = []
  176. dis_ks_min_list = []
  177. dis_ks_set_median_list = []
  178. sod_gs_list = []
  179. g_best = []
  180. sod_set_median_list = []
  181. sod_list_list = []
  182. idx_dict = get_same_item_indices(y_all)
  183. for y_class in idx_dict:
  184. print('\n-------------------------------------------------------')
  185. print('class of y:', y_class)
  186. Gn_class = [Gn[i].copy() for i in idx_dict[y_class]]
  187. time_list.append([])
  188. dis_ks_min_list.append([])
  189. dis_ks_set_median_list.append([])
  190. sod_gs_list.append([])
  191. g_best.append([])
  192. sod_set_median_list.append([])
  193. for repeat in range(50):
  194. idx_rdm = random.sample(range(len(Gn_class)), 10)
  195. print('graphs chosen:', idx_rdm)
  196. Gn_median = [Gn_class[idx].copy() for idx in idx_rdm]
  197. Gn_candidate = [g.copy() for g in Gn_median]
  198. alpha_range = [1 / len(Gn_median)] * len(Gn_median)
  199. time0 = time.time()
  200. G_gen_median_list, sod_gen_median, sod_list, G_set_median_list, sod_set_median \
  201. = iam_upgraded(Gn_median,
  202. Gn_candidate, c_ei=c_ei, c_er=c_er, c_es=c_es, ite_max=ite_max_iam,
  203. epsilon=epsilon_iam, connected=connected_iam, removeNodes=removeNodes,
  204. params_ged=params_ged)
  205. time_total = time.time() - time0
  206. print('\ntime: ', time_total)
  207. time_list[-1].append(time_total)
  208. g_best[-1].append(G_gen_median_list[0])
  209. sod_set_median_list[-1].append(sod_set_median)
  210. print('\nsmallest sod of the set median:', sod_set_median)
  211. sod_gs_list[-1].append(sod_gen_median)
  212. print('\nsmallest sod in graph space:', sod_gen_median)
  213. sod_list_list.append(sod_list)
  214. # show the best graph and save it to file.
  215. print('one of the possible corresponding pre-images is')
  216. nx.draw(G_gen_median_list[0], labels=nx.get_node_attributes(G_gen_median_list[0], 'atom'),
  217. with_labels=True)
  218. # plt.show()
  219. # plt.savefig('results/iam/mutag_median.fit_costs2.001.nb' + str(nb_median) +
  220. # plt.savefig('results/iam/paper_compare/monoter_y' + str(y_class) +
  221. # '_repeat' + str(repeat) + '_' + str(time.time()) +
  222. # '.png', format="PNG")
  223. plt.clf()
  224. # print(G_gen_median_list[0].nodes(data=True))
  225. # print(G_gen_median_list[0].edges(data=True))
  226. # compute distance between \psi and the set median graph.
  227. knew_set_median = compute_kernel(G_set_median_list + Gn_median,
  228. gkernel, node_label, edge_label, False)
  229. dhat_new_set_median_list = []
  230. for idx, g_tmp in enumerate(G_set_median_list):
  231. # @todo: the term3 below could use the one at the beginning of the function.
  232. dhat_new_set_median_list.append(dis_gstar(idx, range(len(G_set_median_list),
  233. len(G_set_median_list) + len(Gn_median) + 1),
  234. alpha_range, knew_set_median, withterm3=False))
  235. print('\ndistance in kernel space of set median: ', dhat_new_set_median_list[0])
  236. dis_ks_set_median_list[-1].append(dhat_new_set_median_list[0])
  237. # compute distance between \psi and the new generated graphs.
  238. knew = compute_kernel(G_gen_median_list + Gn_median, gkernel, node_label,
  239. edge_label, False)
  240. dhat_new_list = []
  241. for idx, g_tmp in enumerate(G_gen_median_list):
  242. # @todo: the term3 below could use the one at the beginning of the function.
  243. dhat_new_list.append(dis_gstar(idx, range(len(G_gen_median_list),
  244. len(G_gen_median_list) + len(Gn_median) + 1),
  245. alpha_range, knew, withterm3=False))
  246. print('\nsmallest distance in kernel space: ', dhat_new_list[0])
  247. dis_ks_min_list[-1].append(dhat_new_list[0])
  248. print('\nsods of the set median for this class:', sod_set_median_list[-1])
  249. print('\nsods in graph space for this class:', sod_gs_list[-1])
  250. print('\ndistance in kernel space of set median for this class:',
  251. dis_ks_set_median_list[-1])
  252. print('\nsmallest distances in kernel space for this class:',
  253. dis_ks_min_list[-1])
  254. print('\ntimes for this class:', time_list[-1])
  255. sod_set_median_list[-1] = np.mean(sod_set_median_list[-1])
  256. sod_gs_list[-1] = np.mean(sod_gs_list[-1])
  257. dis_ks_set_median_list[-1] = np.mean(dis_ks_set_median_list[-1])
  258. dis_ks_min_list[-1] = np.mean(dis_ks_min_list[-1])
  259. time_list[-1] = np.mean(time_list[-1])
  260. print()
  261. print('\nmean sods of the set median for each class:', sod_set_median_list)
  262. print('\nmean sods in graph space for each class:', sod_gs_list)
  263. print('\ndistances in kernel space of set median for each class:',
  264. dis_ks_set_median_list)
  265. print('\nmean smallest distances in kernel space for each class:',
  266. dis_ks_min_list)
  267. print('\nmean times for each class:', time_list)
  268. print('\nmean sods of the set median of all:', np.mean(sod_set_median_list))
  269. print('\nmean sods in graph space of all:', np.mean(sod_gs_list))
  270. print('\nmean distances in kernel space of set median of all:',
  271. np.mean(dis_ks_set_median_list))
  272. print('\nmean smallest distances in kernel space of all:',
  273. np.mean(dis_ks_min_list))
  274. print('\nmean times of all:', np.mean(time_list))
  275. nb_better_sods = 0
  276. nb_worse_sods = 0
  277. nb_same_sods = 0
  278. for sods in sod_list_list:
  279. if sods[0] > sods[-1]:
  280. nb_better_sods += 1
  281. elif sods[0] < sods[-1]:
  282. nb_worse_sods += 1
  283. else:
  284. nb_same_sods += 1
  285. print('\n In', str(len(sod_list_list)), 'sod lists,', str(nb_better_sods),
  286. 'are getting better,', str(nb_worse_sods), 'are getting worse,',
  287. str(nb_same_sods), 'are not changed; ', str(nb_better_sods / len(sod_list_list)),
  288. 'sods are improved.')
  289. def test_iam_mutag():
  290. ds = {'name': 'MUTAG', 'dataset': '../datasets/MUTAG/MUTAG_A.txt',
  291. 'extra_params': {}} # node/edge symb
  292. Gn, y_all = loadDataset(ds['dataset'], extra_params=ds['extra_params'])
  293. # Gn = Gn[0:50]
  294. gkernel = 'untilhpathkernel'
  295. node_label = 'atom'
  296. edge_label = 'bond_type'
  297. # parameters for GED function from the IAM paper.
  298. # fitted edit costs.
  299. c_vi = 0.03523843108436513
  300. c_vr = 0.03347339739350128
  301. c_vs = 0.06871290673612238
  302. c_ei = 0.08591999846720685
  303. c_er = 0.07962086440894103
  304. c_es = 0.08596855855478233
  305. # unfitted edit costs.
  306. # c_vi = 3
  307. # c_vr = 3
  308. # c_vs = 1
  309. # c_ei = 3
  310. # c_er = 3
  311. # c_es = 1
  312. ite_max_iam = 50
  313. epsilon_iam = 0.001
  314. removeNodes = False
  315. connected_iam = False
  316. # parameters for IAM function
  317. # ged_cost = 'CONSTANT'
  318. ged_cost = 'CONSTANT'
  319. ged_method = 'IPFP'
  320. edit_cost_constant = [c_vi, c_vr, c_vs, c_ei, c_er, c_es]
  321. # edit_cost_constant = []
  322. ged_stabilizer = 'min'
  323. ged_repeat = 50
  324. params_ged = {'lib': 'gedlibpy', 'cost': ged_cost, 'method': ged_method,
  325. 'edit_cost_constant': edit_cost_constant,
  326. 'stabilizer': ged_stabilizer, 'repeat': ged_repeat}
  327. # classify graphs according to letters.
  328. time_list = []
  329. dis_ks_min_list = []
  330. dis_ks_set_median_list = []
  331. sod_gs_list = []
  332. g_best = []
  333. sod_set_median_list = []
  334. sod_list_list = []
  335. idx_dict = get_same_item_indices(y_all)
  336. for y_class in idx_dict:
  337. print('\n-------------------------------------------------------')
  338. print('class of y:', y_class)
  339. Gn_class = [Gn[i].copy() for i in idx_dict[y_class]]
  340. time_list.append([])
  341. dis_ks_min_list.append([])
  342. dis_ks_set_median_list.append([])
  343. sod_gs_list.append([])
  344. g_best.append([])
  345. sod_set_median_list.append([])
  346. for repeat in range(50):
  347. idx_rdm = random.sample(range(len(Gn_class)), 10)
  348. print('graphs chosen:', idx_rdm)
  349. Gn_median = [Gn_class[idx].copy() for idx in idx_rdm]
  350. Gn_candidate = [g.copy() for g in Gn_median]
  351. alpha_range = [1 / len(Gn_median)] * len(Gn_median)
  352. time0 = time.time()
  353. G_gen_median_list, sod_gen_median, sod_list, G_set_median_list, sod_set_median \
  354. = iam_upgraded(Gn_median,
  355. Gn_candidate, c_ei=c_ei, c_er=c_er, c_es=c_es, ite_max=ite_max_iam,
  356. epsilon=epsilon_iam, connected=connected_iam, removeNodes=removeNodes,
  357. params_ged=params_ged)
  358. time_total = time.time() - time0
  359. print('\ntime: ', time_total)
  360. time_list[-1].append(time_total)
  361. g_best[-1].append(G_gen_median_list[0])
  362. sod_set_median_list[-1].append(sod_set_median)
  363. print('\nsmallest sod of the set median:', sod_set_median)
  364. sod_gs_list[-1].append(sod_gen_median)
  365. print('\nsmallest sod in graph space:', sod_gen_median)
  366. sod_list_list.append(sod_list)
  367. # show the best graph and save it to file.
  368. print('one of the possible corresponding pre-images is')
  369. nx.draw(G_gen_median_list[0], labels=nx.get_node_attributes(G_gen_median_list[0], 'atom'),
  370. with_labels=True)
  371. # plt.show()
  372. # plt.savefig('results/iam/mutag_median.fit_costs2.001.nb' + str(nb_median) +
  373. # plt.savefig('results/iam/paper_compare/mutag_y' + str(y_class) +
  374. # '_repeat' + str(repeat) + '_' + str(time.time()) +
  375. # '.png', format="PNG")
  376. plt.clf()
  377. # print(G_gen_median_list[0].nodes(data=True))
  378. # print(G_gen_median_list[0].edges(data=True))
  379. # compute distance between \psi and the set median graph.
  380. knew_set_median = compute_kernel(G_set_median_list + Gn_median,
  381. gkernel, node_label, edge_label, False)
  382. dhat_new_set_median_list = []
  383. for idx, g_tmp in enumerate(G_set_median_list):
  384. # @todo: the term3 below could use the one at the beginning of the function.
  385. dhat_new_set_median_list.append(dis_gstar(idx, range(len(G_set_median_list),
  386. len(G_set_median_list) + len(Gn_median) + 1),
  387. alpha_range, knew_set_median, withterm3=False))
  388. print('\ndistance in kernel space of set median: ', dhat_new_set_median_list[0])
  389. dis_ks_set_median_list[-1].append(dhat_new_set_median_list[0])
  390. # compute distance between \psi and the new generated graphs.
  391. knew = compute_kernel(G_gen_median_list + Gn_median, gkernel, node_label,
  392. edge_label, False)
  393. dhat_new_list = []
  394. for idx, g_tmp in enumerate(G_gen_median_list):
  395. # @todo: the term3 below could use the one at the beginning of the function.
  396. dhat_new_list.append(dis_gstar(idx, range(len(G_gen_median_list),
  397. len(G_gen_median_list) + len(Gn_median) + 1),
  398. alpha_range, knew, withterm3=False))
  399. print('\nsmallest distance in kernel space: ', dhat_new_list[0])
  400. dis_ks_min_list[-1].append(dhat_new_list[0])
  401. print('\nsods of the set median for this class:', sod_set_median_list[-1])
  402. print('\nsods in graph space for this class:', sod_gs_list[-1])
  403. print('\ndistance in kernel space of set median for this class:',
  404. dis_ks_set_median_list[-1])
  405. print('\nsmallest distances in kernel space for this class:',
  406. dis_ks_min_list[-1])
  407. print('\ntimes for this class:', time_list[-1])
  408. sod_set_median_list[-1] = np.mean(sod_set_median_list[-1])
  409. sod_gs_list[-1] = np.mean(sod_gs_list[-1])
  410. dis_ks_set_median_list[-1] = np.mean(dis_ks_set_median_list[-1])
  411. dis_ks_min_list[-1] = np.mean(dis_ks_min_list[-1])
  412. time_list[-1] = np.mean(time_list[-1])
  413. print()
  414. print('\nmean sods of the set median for each class:', sod_set_median_list)
  415. print('\nmean sods in graph space for each class:', sod_gs_list)
  416. print('\ndistances in kernel space of set median for each class:',
  417. dis_ks_set_median_list)
  418. print('\nmean smallest distances in kernel space for each class:',
  419. dis_ks_min_list)
  420. print('\nmean times for each class:', time_list)
  421. print('\nmean sods of the set median of all:', np.mean(sod_set_median_list))
  422. print('\nmean sods in graph space of all:', np.mean(sod_gs_list))
  423. print('\nmean distances in kernel space of set median of all:',
  424. np.mean(dis_ks_set_median_list))
  425. print('\nmean smallest distances in kernel space of all:',
  426. np.mean(dis_ks_min_list))
  427. print('\nmean times of all:', np.mean(time_list))
  428. nb_better_sods = 0
  429. nb_worse_sods = 0
  430. nb_same_sods = 0
  431. for sods in sod_list_list:
  432. if sods[0] > sods[-1]:
  433. nb_better_sods += 1
  434. elif sods[0] < sods[-1]:
  435. nb_worse_sods += 1
  436. else:
  437. nb_same_sods += 1
  438. print('\n In', str(len(sod_list_list)), 'sod lists,', str(nb_better_sods),
  439. 'are getting better,', str(nb_worse_sods), 'are getting worse,',
  440. str(nb_same_sods), 'are not changed; ', str(nb_better_sods / len(sod_list_list)),
  441. 'sods are improved.')
  442. ###############################################################################
  443. # tests on different numbers of median-sets.
  444. def test_iam_median_nb():
  445. ds = {'name': 'MUTAG', 'dataset': '../datasets/MUTAG/MUTAG_A.txt',
  446. 'extra_params': {}} # node/edge symb
  447. Gn, y_all = loadDataset(ds['dataset'], extra_params=ds['extra_params'])
  448. # Gn = Gn[0:50]
  449. remove_edges(Gn)
  450. gkernel = 'marginalizedkernel'
  451. lmbda = 0.03 # termination probalility
  452. # # parameters for GED function
  453. # c_vi = 0.037
  454. # c_vr = 0.038
  455. # c_vs = 0.075
  456. # c_ei = 0.001
  457. # c_er = 0.001
  458. # c_es = 0.0
  459. # ite_max_iam = 50
  460. # epsilon_iam = 0.001
  461. # removeNodes = False
  462. # connected_iam = False
  463. # # parameters for IAM function
  464. # ged_cost = 'CONSTANT'
  465. # ged_method = 'IPFP'
  466. # edit_cost_constant = [c_vi, c_vr, c_vs, c_ei, c_er, c_es]
  467. # ged_stabilizer = 'min'
  468. # ged_repeat = 50
  469. # params_ged = {'lib': 'gedlibpy', 'cost': ged_cost, 'method': ged_method,
  470. # 'edit_cost_constant': edit_cost_constant,
  471. # 'stabilizer': ged_stabilizer, 'repeat': ged_repeat}
  472. # parameters for GED function
  473. c_vi = 4
  474. c_vr = 4
  475. c_vs = 2
  476. c_ei = 1
  477. c_er = 1
  478. c_es = 1
  479. ite_max_iam = 50
  480. epsilon_iam = 0.001
  481. removeNodes = False
  482. connected_iam = False
  483. # parameters for IAM function
  484. ged_cost = 'CHEM_1'
  485. ged_method = 'IPFP'
  486. edit_cost_constant = []
  487. ged_stabilizer = 'min'
  488. ged_repeat = 50
  489. params_ged = {'lib': 'gedlibpy', 'cost': ged_cost, 'method': ged_method,
  490. 'edit_cost_constant': edit_cost_constant,
  491. 'stabilizer': ged_stabilizer, 'repeat': ged_repeat}
  492. # find out all the graphs classified to positive group 1.
  493. idx_dict = get_same_item_indices(y_all)
  494. Gn = [Gn[i] for i in idx_dict[1]]
  495. # number of graphs; we what to compute the median of these graphs.
  496. # nb_median_range = [2, 3, 4, 5, 10, 20, 30, 40, 50, 100]
  497. nb_median_range = [len(Gn)]
  498. # # compute Gram matrix.
  499. # time0 = time.time()
  500. # km = compute_kernel(Gn, gkernel, True)
  501. # time_km = time.time() - time0
  502. # # write Gram matrix to file.
  503. # np.savez('results/gram_matrix_marg_itr10_pq0.03_mutag_positive.gm', gm=km, gmtime=time_km)
  504. time_list = []
  505. dis_ks_min_list = []
  506. sod_gs_list = []
  507. # sod_gs_min_list = []
  508. # nb_updated_list = []
  509. # nb_updated_k_list = []
  510. g_best = []
  511. for nb_median in nb_median_range:
  512. print('\n-------------------------------------------------------')
  513. print('number of median graphs =', nb_median)
  514. random.seed(1)
  515. idx_rdm = random.sample(range(len(Gn)), nb_median)
  516. print('graphs chosen:', idx_rdm)
  517. Gn_median = [Gn[idx].copy() for idx in idx_rdm]
  518. Gn_candidate = [g.copy() for g in Gn]
  519. # for g in Gn_median:
  520. # nx.draw(g, labels=nx.get_node_attributes(g, 'atom'), with_labels=True)
  521. ## plt.savefig("results/preimage_mix/mutag.png", format="PNG")
  522. # plt.show()
  523. # plt.clf()
  524. ###################################################################
  525. # gmfile = np.load('results/gram_matrix_marg_itr10_pq0.03_mutag_positive.gm.npz')
  526. # km_tmp = gmfile['gm']
  527. # time_km = gmfile['gmtime']
  528. # # modify mixed gram matrix.
  529. # km = np.zeros((len(Gn) + nb_median, len(Gn) + nb_median))
  530. # for i in range(len(Gn)):
  531. # for j in range(i, len(Gn)):
  532. # km[i, j] = km_tmp[i, j]
  533. # km[j, i] = km[i, j]
  534. # for i in range(len(Gn)):
  535. # for j, idx in enumerate(idx_rdm):
  536. # km[i, len(Gn) + j] = km[i, idx]
  537. # km[len(Gn) + j, i] = km[i, idx]
  538. # for i, idx1 in enumerate(idx_rdm):
  539. # for j, idx2 in enumerate(idx_rdm):
  540. # km[len(Gn) + i, len(Gn) + j] = km[idx1, idx2]
  541. ###################################################################
  542. alpha_range = [1 / nb_median] * nb_median
  543. time0 = time.time()
  544. ghat_new_list, sod_min = iam_upgraded(Gn_median, Gn_candidate,
  545. c_ei=c_ei, c_er=c_er, c_es=c_es, ite_max=ite_max_iam,
  546. epsilon=epsilon_iam, connected=connected_iam, removeNodes=removeNodes,
  547. params_ged=params_ged)
  548. time_total = time.time() - time0
  549. print('\ntime: ', time_total)
  550. time_list.append(time_total)
  551. # compute distance between \psi and the new generated graphs.
  552. knew = compute_kernel(ghat_new_list + Gn_median, gkernel, False)
  553. dhat_new_list = []
  554. for idx, g_tmp in enumerate(ghat_new_list):
  555. # @todo: the term3 below could use the one at the beginning of the function.
  556. dhat_new_list.append(dis_gstar(idx, range(len(ghat_new_list),
  557. len(ghat_new_list) + len(Gn_median) + 1),
  558. alpha_range, knew, withterm3=False))
  559. print('\nsmallest distance in kernel space: ', dhat_new_list[0])
  560. dis_ks_min_list.append(dhat_new_list[0])
  561. g_best.append(ghat_new_list[0])
  562. # show the best graph and save it to file.
  563. # print('the shortest distance is', dhat)
  564. print('one of the possible corresponding pre-images is')
  565. nx.draw(ghat_new_list[0], labels=nx.get_node_attributes(ghat_new_list[0], 'atom'),
  566. with_labels=True)
  567. plt.show()
  568. # plt.savefig('results/iam/mutag_median.fit_costs2.001.nb' + str(nb_median) +
  569. plt.savefig('results/iam/mutag_median_unfit2.nb' + str(nb_median) +
  570. '.png', format="PNG")
  571. plt.clf()
  572. # print(ghat_list[0].nodes(data=True))
  573. # print(ghat_list[0].edges(data=True))
  574. sod_gs_list.append(sod_min)
  575. # sod_gs_min_list.append(np.min(sod_min))
  576. print('\nsmallest sod in graph space: ', sod_min)
  577. print('\nsods in graph space: ', sod_gs_list)
  578. # print('\nsmallest sod in graph space for each set of median graphs: ', sod_gs_min_list)
  579. print('\nsmallest distance in kernel space for each set of median graphs: ',
  580. dis_ks_min_list)
  581. # print('\nnumber of updates of the best graph for each set of median graphs by IAM: ',
  582. # nb_updated_list)
  583. # print('\nnumber of updates of k nearest graphs for each set of median graphs by IAM: ',
  584. # nb_updated_k_list)
  585. print('\ntimes:', time_list)
  586. def test_iam_letter_h():
  587. from median import draw_Letter_graph
  588. ds = {'name': 'Letter-high', 'dataset': '../datasets/Letter-high/Letter-high_A.txt',
  589. 'extra_params': {}} # node nsymb
  590. # ds = {'name': 'Letter-med', 'dataset': '../datasets/Letter-med/Letter-med_A.txt',
  591. # 'extra_params': {}} # node nsymb
  592. # Gn = Gn[0:50]
  593. Gn, y_all = loadDataset(ds['dataset'], extra_params=ds['extra_params'])
  594. gkernel = 'structuralspkernel'
  595. # parameters for GED function from the IAM paper.
  596. c_vi = 3
  597. c_vr = 3
  598. c_vs = 1
  599. c_ei = 3
  600. c_er = 3
  601. c_es = 1
  602. ite_max_iam = 50
  603. epsilon_iam = 0.001
  604. removeNodes = False
  605. connected_iam = False
  606. # parameters for IAM function
  607. # ged_cost = 'CONSTANT'
  608. ged_cost = 'LETTER'
  609. ged_method = 'IPFP'
  610. # edit_cost_constant = [c_vi, c_vr, c_vs, c_ei, c_er, c_es]
  611. edit_cost_constant = []
  612. ged_stabilizer = 'min'
  613. ged_repeat = 50
  614. params_ged = {'lib': 'gedlibpy', 'cost': ged_cost, 'method': ged_method,
  615. 'edit_cost_constant': edit_cost_constant,
  616. 'stabilizer': ged_stabilizer, 'repeat': ged_repeat}
  617. # classify graphs according to letters.
  618. time_list = []
  619. dis_ks_min_list = []
  620. sod_gs_list = []
  621. g_best = []
  622. sod_set_median_list = []
  623. idx_dict = get_same_item_indices(y_all)
  624. for letter in idx_dict:
  625. print('\n-------------------------------------------------------')
  626. print('letter', letter)
  627. Gn_let = [Gn[i].copy() for i in idx_dict[letter]]
  628. time_list.append([])
  629. dis_ks_min_list.append([])
  630. sod_gs_list.append([])
  631. g_best.append([])
  632. sod_set_median_list.append([])
  633. for repeat in range(50):
  634. idx_rdm = random.sample(range(len(Gn_let)), 50)
  635. print('graphs chosen:', idx_rdm)
  636. Gn_median = [Gn_let[idx].copy() for idx in idx_rdm]
  637. Gn_candidate = [g.copy() for g in Gn_median]
  638. alpha_range = [1 / len(Gn_median)] * len(Gn_median)
  639. time0 = time.time()
  640. ghat_new_list, sod_min, sod_set_median = iam_upgraded(Gn_median,
  641. Gn_candidate, c_ei=c_ei, c_er=c_er, c_es=c_es, ite_max=ite_max_iam,
  642. epsilon=epsilon_iam, connected=connected_iam, removeNodes=removeNodes,
  643. params_ged=params_ged)
  644. time_total = time.time() - time0
  645. print('\ntime: ', time_total)
  646. time_list[-1].append(time_total)
  647. g_best[-1].append(ghat_new_list[0])
  648. sod_set_median_list[-1].append(sod_set_median)
  649. print('\nsmallest sod of the set median:', sod_set_median)
  650. sod_gs_list[-1].append(sod_min)
  651. print('\nsmallest sod in graph space:', sod_min)
  652. # show the best graph and save it to file.
  653. print('one of the possible corresponding pre-images is')
  654. draw_Letter_graph(ghat_new_list[0], savepath='results/iam/paper_compare/')
  655. # compute distance between \psi and the new generated graphs.
  656. knew = compute_kernel(ghat_new_list + Gn_median, gkernel, False)
  657. dhat_new_list = []
  658. for idx, g_tmp in enumerate(ghat_new_list):
  659. # @todo: the term3 below could use the one at the beginning of the function.
  660. dhat_new_list.append(dis_gstar(idx, range(len(ghat_new_list),
  661. len(ghat_new_list) + len(Gn_median) + 1),
  662. alpha_range, knew, withterm3=False))
  663. print('\nsmallest distance in kernel space: ', dhat_new_list[0])
  664. dis_ks_min_list[-1].append(dhat_new_list[0])
  665. print('\nsods of the set median for this letter:', sod_set_median_list[-1])
  666. print('\nsods in graph space for this letter:', sod_gs_list[-1])
  667. print('\nsmallest distances in kernel space for this letter:',
  668. dis_ks_min_list[-1])
  669. print('\ntimes for this letter:', time_list[-1])
  670. sod_set_median_list[-1] = np.mean(sod_set_median_list[-1])
  671. sod_gs_list[-1] = np.mean(sod_gs_list[-1])
  672. dis_ks_min_list[-1] = np.mean(dis_ks_min_list[-1])
  673. time_list[-1] = np.mean(time_list[-1])
  674. print('\nmean sods of the set median for each letter:', sod_set_median_list)
  675. print('\nmean sods in graph space for each letter:', sod_gs_list)
  676. print('\nmean smallest distances in kernel space for each letter:',
  677. dis_ks_min_list)
  678. print('\nmean times for each letter:', time_list)
  679. print('\nmean sods of the set median of all:', np.mean(sod_set_median_list))
  680. print('\nmean sods in graph space of all:', np.mean(sod_gs_list))
  681. print('\nmean smallest distances in kernel space of all:',
  682. np.mean(dis_ks_min_list))
  683. print('\nmean times of all:', np.mean(time_list))
  684. def test_iam_fitdistance():
  685. ds = {'name': 'MUTAG', 'dataset': '../datasets/MUTAG/MUTAG_A.txt',
  686. 'extra_params': {}} # node/edge symb
  687. Gn, y_all = loadDataset(ds['dataset'], extra_params=ds['extra_params'])
  688. # Gn = Gn[0:50]
  689. # remove_edges(Gn)
  690. gkernel = 'marginalizedkernel'
  691. node_label = 'atom'
  692. edge_label = 'bond_type'
  693. # lmbda = 0.03 # termination probalility
  694. # # parameters for GED function
  695. # c_vi = 0.037
  696. # c_vr = 0.038
  697. # c_vs = 0.075
  698. # c_ei = 0.001
  699. # c_er = 0.001
  700. # c_es = 0.0
  701. # ite_max_iam = 50
  702. # epsilon_iam = 0.001
  703. # removeNodes = False
  704. # connected_iam = False
  705. # # parameters for IAM function
  706. # ged_cost = 'CONSTANT'
  707. # ged_method = 'IPFP'
  708. # edit_cost_constant = [c_vi, c_vr, c_vs, c_ei, c_er, c_es]
  709. # ged_stabilizer = 'min'
  710. # ged_repeat = 50
  711. # params_ged = {'lib': 'gedlibpy', 'cost': ged_cost, 'method': ged_method,
  712. # 'edit_cost_constant': edit_cost_constant,
  713. # 'stabilizer': ged_stabilizer, 'repeat': ged_repeat}
  714. # parameters for GED function
  715. c_vi = 4
  716. c_vr = 4
  717. c_vs = 2
  718. c_ei = 1
  719. c_er = 1
  720. c_es = 1
  721. ite_max_iam = 50
  722. epsilon_iam = 0.001
  723. removeNodes = False
  724. connected_iam = False
  725. # parameters for IAM function
  726. ged_cost = 'CHEM_1'
  727. ged_method = 'IPFP'
  728. edit_cost_constant = []
  729. ged_stabilizer = 'min'
  730. ged_repeat = 50
  731. params_ged = {'lib': 'gedlibpy', 'cost': ged_cost, 'method': ged_method,
  732. 'edit_cost_constant': edit_cost_constant,
  733. 'stabilizer': ged_stabilizer, 'repeat': ged_repeat}
  734. # find out all the graphs classified to positive group 1.
  735. idx_dict = get_same_item_indices(y_all)
  736. Gn = [Gn[i] for i in idx_dict[1]]
  737. # number of graphs; we what to compute the median of these graphs.
  738. # nb_median_range = [2, 3, 4, 5, 10, 20, 30, 40, 50, 100]
  739. nb_median_range = [10]
  740. # # compute Gram matrix.
  741. # time0 = time.time()
  742. # km = compute_kernel(Gn, gkernel, True)
  743. # time_km = time.time() - time0
  744. # # write Gram matrix to file.
  745. # np.savez('results/gram_matrix_marg_itr10_pq0.03_mutag_positive.gm', gm=km, gmtime=time_km)
  746. time_list = []
  747. dis_ks_min_list = []
  748. dis_ks_gen_median_list = []
  749. sod_gs_list = []
  750. # sod_gs_min_list = []
  751. # nb_updated_list = []
  752. # nb_updated_k_list = []
  753. g_best = []
  754. for nb_median in nb_median_range:
  755. print('\n-------------------------------------------------------')
  756. print('number of median graphs =', nb_median)
  757. random.seed(1)
  758. idx_rdm = random.sample(range(len(Gn)), nb_median)
  759. print('graphs chosen:', idx_rdm)
  760. Gn_median = [Gn[idx].copy() for idx in idx_rdm]
  761. Gn_candidate = [g.copy() for g in Gn_median]
  762. # for g in Gn_median:
  763. # nx.draw(g, labels=nx.get_node_attributes(g, 'atom'), with_labels=True)
  764. ## plt.savefig("results/preimage_mix/mutag.png", format="PNG")
  765. # plt.show()
  766. # plt.clf()
  767. ###################################################################
  768. # gmfile = np.load('results/gram_matrix_marg_itr10_pq0.03_mutag_positive.gm.npz')
  769. # km_tmp = gmfile['gm']
  770. # time_km = gmfile['gmtime']
  771. # # modify mixed gram matrix.
  772. # km = np.zeros((len(Gn) + nb_median, len(Gn) + nb_median))
  773. # for i in range(len(Gn)):
  774. # for j in range(i, len(Gn)):
  775. # km[i, j] = km_tmp[i, j]
  776. # km[j, i] = km[i, j]
  777. # for i in range(len(Gn)):
  778. # for j, idx in enumerate(idx_rdm):
  779. # km[i, len(Gn) + j] = km[i, idx]
  780. # km[len(Gn) + j, i] = km[i, idx]
  781. # for i, idx1 in enumerate(idx_rdm):
  782. # for j, idx2 in enumerate(idx_rdm):
  783. # km[len(Gn) + i, len(Gn) + j] = km[idx1, idx2]
  784. ###################################################################
  785. alpha_range = [1 / nb_median] * nb_median
  786. time0 = time.time()
  787. G_gen_median_list, sod_gen_median, sod_list, G_set_median_list, sod_set_median \
  788. = iam_upgraded(Gn_median, Gn_candidate,
  789. c_ei=c_ei, c_er=c_er, c_es=c_es, ite_max=ite_max_iam,
  790. epsilon=epsilon_iam, connected=connected_iam, removeNodes=removeNodes,
  791. params_ged=params_ged)
  792. time_total = time.time() - time0
  793. print('\ntime: ', time_total)
  794. time_list.append(time_total)
  795. # compute distance between \psi and the new generated graphs.
  796. knew = compute_kernel(G_gen_median_list + Gn_median, gkernel, node_label,
  797. edge_label, False)
  798. dhat_new_list = []
  799. for idx, g_tmp in enumerate(G_gen_median_list):
  800. # @todo: the term3 below could use the one at the beginning of the function.
  801. dhat_new_list.append(dis_gstar(idx, range(len(G_gen_median_list),
  802. len(G_gen_median_list) + len(Gn_median) + 1),
  803. alpha_range, knew, withterm3=False))
  804. print('\nsmallest distance in kernel space: ', dhat_new_list[0])
  805. dis_ks_min_list.append(dhat_new_list[0])
  806. g_best.append(G_gen_median_list[0])
  807. # show the best graph and save it to file.
  808. # print('the shortest distance is', dhat)
  809. print('one of the possible corresponding pre-images is')
  810. nx.draw(G_gen_median_list[0], labels=nx.get_node_attributes(G_gen_median_list[0], 'atom'),
  811. with_labels=True)
  812. plt.show()
  813. # plt.savefig('results/iam/mutag_median.fit_costs2.001.nb' + str(nb_median) +
  814. # plt.savefig('results/iam/mutag_median_unfit2.nb' + str(nb_median) +
  815. # '.png', format="PNG")
  816. plt.clf()
  817. # print(ghat_list[0].nodes(data=True))
  818. # print(ghat_list[0].edges(data=True))
  819. sod_gs_list.append(sod_gen_median)
  820. # sod_gs_min_list.append(np.min(sod_gen_median))
  821. print('\nsmallest sod in graph space: ', sod_gen_median)
  822. print('\nsmallest sod of set median in graph space: ', sod_set_median)
  823. print('\nsods in graph space: ', sod_gs_list)
  824. # print('\nsmallest sod in graph space for each set of median graphs: ', sod_gs_min_list)
  825. print('\nsmallest distance in kernel space for each set of median graphs: ',
  826. dis_ks_min_list)
  827. # print('\nnumber of updates of the best graph for each set of median graphs by IAM: ',
  828. # nb_updated_list)
  829. # print('\nnumber of updates of k nearest graphs for each set of median graphs by IAM: ',
  830. # nb_updated_k_list)
  831. print('\ntimes:', time_list)
  832. ###############################################################################
  833. if __name__ == '__main__':
  834. ###############################################################################
  835. # tests on different numbers of median-sets.
  836. # test_iam_median_nb()
  837. # test_iam_letter_h()
  838. # test_iam_monoterpenoides()
  839. # test_iam_mutag()
  840. # test_iam_fitdistance()
  841. # print("test log")
  842. test_iam_monoterpenoides_with_init40()

A Python package for graph kernels, graph edit distances and graph pre-image problem.