You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

test_iam.py 40 kB

5 years ago
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964
  1. #!/usr/bin/env python3
  2. # -*- coding: utf-8 -*-
  3. """
  4. Created on Thu Sep 5 15:59:00 2019
  5. @author: ljia
  6. """
  7. import numpy as np
  8. import networkx as nx
  9. import matplotlib.pyplot as plt
  10. import time
  11. import random
  12. #from tqdm import tqdm
  13. from gklearn.utils.graphfiles import loadDataset
  14. #from gklearn.utils.logger2file import *
  15. from gklearn.preimage.iam import iam_upgraded
  16. from gklearn.preimage.utils import remove_edges, compute_kernel, get_same_item_indices, dis_gstar
  17. #from gklearn.preimage.ged import ged_median
  18. def test_iam_monoterpenoides_with_init40():
  19. gkernel = 'untilhpathkernel'
  20. node_label = 'atom'
  21. edge_label = 'bond_type'
  22. # unfitted edit costs.
  23. c_vi = 3
  24. c_vr = 3
  25. c_vs = 1
  26. c_ei = 3
  27. c_er = 3
  28. c_es = 1
  29. ite_max_iam = 50
  30. epsilon_iam = 0.0001
  31. removeNodes = False
  32. connected_iam = False
  33. # parameters for IAM function
  34. # ged_cost = 'CONSTANT'
  35. ged_cost = 'CONSTANT'
  36. ged_method = 'IPFP'
  37. edit_cost_constant = [c_vi, c_vr, c_vs, c_ei, c_er, c_es]
  38. ged_stabilizer = None
  39. # ged_repeat = 50
  40. algo_options = '--threads 8 --initial-solutions 40 --ratio-runs-from-initial-solutions 1'
  41. params_ged = {'lib': 'gedlibpy', 'cost': ged_cost, 'method': ged_method,
  42. 'edit_cost_constant': edit_cost_constant,
  43. 'algo_options': algo_options,
  44. 'stabilizer': ged_stabilizer}
  45. collection_path = os.path.dirname(os.path.realpath(__file__)) + '/cpp_ext/generated_datsets/monoterpenoides/'
  46. graph_dir = collection_path + 'gxl/'
  47. y_all = ['3', '1', '4', '6', '7', '8', '9', '2']
  48. repeats = 50
  49. # classify graphs according to classes.
  50. time_list = []
  51. dis_ks_min_list = []
  52. dis_ks_set_median_list = []
  53. sod_gs_list = []
  54. g_best = []
  55. sod_set_median_list = []
  56. sod_list_list = []
  57. for y in y_all:
  58. print('\n-------------------------------------------------------')
  59. print('class of y:', y)
  60. time_list.append([])
  61. dis_ks_min_list.append([])
  62. dis_ks_set_median_list.append([])
  63. sod_gs_list.append([])
  64. g_best.append([])
  65. sod_set_median_list.append([])
  66. for repeat in range(repeats):
  67. # load median set.
  68. collection_file = collection_path + 'monoterpenoides_' + y + '_' + str(repeat) + '.xml'
  69. Gn_median, _ = loadDataset(collection_file, extra_params=graph_dir)
  70. Gn_candidate = [g.copy() for g in Gn_median]
  71. time0 = time.time()
  72. G_gen_median_list, sod_gen_median, sod_list, G_set_median_list, sod_set_median \
  73. = iam_upgraded(Gn_median,
  74. Gn_candidate, c_ei=c_ei, c_er=c_er, c_es=c_es, ite_max=ite_max_iam,
  75. epsilon=epsilon_iam, node_label=node_label, edge_label=edge_label,
  76. connected=connected_iam, removeNodes=removeNodes,
  77. params_ged=params_ged)
  78. time_total = time.time() - time0
  79. print('\ntime: ', time_total)
  80. time_list[-1].append(time_total)
  81. g_best[-1].append(G_gen_median_list[0])
  82. sod_set_median_list[-1].append(sod_set_median)
  83. print('\nsmallest sod of the set median:', sod_set_median)
  84. sod_gs_list[-1].append(sod_gen_median)
  85. print('\nsmallest sod in graph space:', sod_gen_median)
  86. sod_list_list.append(sod_list)
  87. # # show the best graph and save it to file.
  88. # print('one of the possible corresponding pre-images is')
  89. # nx.draw(G_gen_median_list[0], labels=nx.get_node_attributes(G_gen_median_list[0], 'atom'),
  90. # with_labels=True)
  91. ## plt.show()
  92. # # plt.savefig('results/iam/mutag_median.fit_costs2.001.nb' + str(nb_median) +
  93. ## plt.savefig('results/iam/paper_compare/monoter_y' + str(y_class) +
  94. ## '_repeat' + str(repeat) + '_' + str(time.time()) +
  95. ## '.png', format="PNG")
  96. # plt.clf()
  97. # # print(G_gen_median_list[0].nodes(data=True))
  98. # # print(G_gen_median_list[0].edges(data=True))
  99. print('\nsods of the set median for this class:', sod_set_median_list[-1])
  100. print('\nsods in graph space for this class:', sod_gs_list[-1])
  101. # print('\ndistance in kernel space of set median for this class:',
  102. # dis_ks_set_median_list[-1])
  103. # print('\nsmallest distances in kernel space for this class:',
  104. # dis_ks_min_list[-1])
  105. print('\ntimes for this class:', time_list[-1])
  106. sod_set_median_list[-1] = np.mean(sod_set_median_list[-1])
  107. sod_gs_list[-1] = np.mean(sod_gs_list[-1])
  108. # dis_ks_set_median_list[-1] = np.mean(dis_ks_set_median_list[-1])
  109. # dis_ks_min_list[-1] = np.mean(dis_ks_min_list[-1])
  110. time_list[-1] = np.mean(time_list[-1])
  111. print()
  112. print('\nmean sods of the set median for each class:', sod_set_median_list)
  113. print('\nmean sods in graph space for each class:', sod_gs_list)
  114. # print('\ndistances in kernel space of set median for each class:',
  115. # dis_ks_set_median_list)
  116. # print('\nmean smallest distances in kernel space for each class:',
  117. # dis_ks_min_list)
  118. print('\nmean times for each class:', time_list)
  119. print('\nmean sods of the set median of all:', np.mean(sod_set_median_list))
  120. print('\nmean sods in graph space of all:', np.mean(sod_gs_list))
  121. # print('\nmean distances in kernel space of set median of all:',
  122. # np.mean(dis_ks_set_median_list))
  123. # print('\nmean smallest distances in kernel space of all:',
  124. # np.mean(dis_ks_min_list))
  125. print('\nmean times of all:', np.mean(time_list))
  126. def test_iam_monoterpenoides():
  127. ds = {'name': 'monoterpenoides',
  128. 'dataset': '../datasets/monoterpenoides/dataset_10+.ds'} # node/edge symb
  129. Gn, y_all = loadDataset(ds['dataset'])
  130. # Gn = Gn[0:50]
  131. gkernel = 'untilhpathkernel'
  132. node_label = 'atom'
  133. edge_label = 'bond_type'
  134. # parameters for GED function from the IAM paper.
  135. # fitted edit costs (Gaussian).
  136. c_vi = 0.03620133402089074
  137. c_vr = 0.0417574590207099
  138. c_vs = 0.009992282328587499
  139. c_ei = 0.08293120042342755
  140. c_er = 0.09512220476358019
  141. c_es = 0.09222529696841467
  142. # # fitted edit costs (linear combinations).
  143. # c_vi = 0.1749684054238749
  144. # c_vr = 0.0734054228711457
  145. # c_vs = 0.05017781726016715
  146. # c_ei = 0.1869431164806936
  147. # c_er = 0.32055856948274
  148. # c_es = 0.2569469379247611
  149. # # unfitted edit costs.
  150. # c_vi = 3
  151. # c_vr = 3
  152. # c_vs = 1
  153. # c_ei = 3
  154. # c_er = 3
  155. # c_es = 1
  156. ite_max_iam = 50
  157. epsilon_iam = 0.001
  158. removeNodes = False
  159. connected_iam = False
  160. # parameters for IAM function
  161. # ged_cost = 'CONSTANT'
  162. ged_cost = 'CONSTANT'
  163. ged_method = 'IPFP'
  164. edit_cost_constant = [c_vi, c_vr, c_vs, c_ei, c_er, c_es]
  165. # edit_cost_constant = []
  166. ged_stabilizer = 'min'
  167. ged_repeat = 50
  168. params_ged = {'lib': 'gedlibpy', 'cost': ged_cost, 'method': ged_method,
  169. 'edit_cost_constant': edit_cost_constant,
  170. 'stabilizer': ged_stabilizer, 'repeat': ged_repeat}
  171. # classify graphs according to letters.
  172. time_list = []
  173. dis_ks_min_list = []
  174. dis_ks_set_median_list = []
  175. sod_gs_list = []
  176. g_best = []
  177. sod_set_median_list = []
  178. sod_list_list = []
  179. idx_dict = get_same_item_indices(y_all)
  180. for y_class in idx_dict:
  181. print('\n-------------------------------------------------------')
  182. print('class of y:', y_class)
  183. Gn_class = [Gn[i].copy() for i in idx_dict[y_class]]
  184. time_list.append([])
  185. dis_ks_min_list.append([])
  186. dis_ks_set_median_list.append([])
  187. sod_gs_list.append([])
  188. g_best.append([])
  189. sod_set_median_list.append([])
  190. for repeat in range(50):
  191. idx_rdm = random.sample(range(len(Gn_class)), 10)
  192. print('graphs chosen:', idx_rdm)
  193. Gn_median = [Gn_class[idx].copy() for idx in idx_rdm]
  194. Gn_candidate = [g.copy() for g in Gn_median]
  195. alpha_range = [1 / len(Gn_median)] * len(Gn_median)
  196. time0 = time.time()
  197. G_gen_median_list, sod_gen_median, sod_list, G_set_median_list, sod_set_median \
  198. = iam_upgraded(Gn_median,
  199. Gn_candidate, c_ei=c_ei, c_er=c_er, c_es=c_es, ite_max=ite_max_iam,
  200. epsilon=epsilon_iam, connected=connected_iam, removeNodes=removeNodes,
  201. params_ged=params_ged)
  202. time_total = time.time() - time0
  203. print('\ntime: ', time_total)
  204. time_list[-1].append(time_total)
  205. g_best[-1].append(G_gen_median_list[0])
  206. sod_set_median_list[-1].append(sod_set_median)
  207. print('\nsmallest sod of the set median:', sod_set_median)
  208. sod_gs_list[-1].append(sod_gen_median)
  209. print('\nsmallest sod in graph space:', sod_gen_median)
  210. sod_list_list.append(sod_list)
  211. # show the best graph and save it to file.
  212. print('one of the possible corresponding pre-images is')
  213. nx.draw(G_gen_median_list[0], labels=nx.get_node_attributes(G_gen_median_list[0], 'atom'),
  214. with_labels=True)
  215. # plt.show()
  216. # plt.savefig('results/iam/mutag_median.fit_costs2.001.nb' + str(nb_median) +
  217. # plt.savefig('results/iam/paper_compare/monoter_y' + str(y_class) +
  218. # '_repeat' + str(repeat) + '_' + str(time.time()) +
  219. # '.png', format="PNG")
  220. plt.clf()
  221. # print(G_gen_median_list[0].nodes(data=True))
  222. # print(G_gen_median_list[0].edges(data=True))
  223. # compute distance between \psi and the set median graph.
  224. knew_set_median = compute_kernel(G_set_median_list + Gn_median,
  225. gkernel, node_label, edge_label, False)
  226. dhat_new_set_median_list = []
  227. for idx, g_tmp in enumerate(G_set_median_list):
  228. # @todo: the term3 below could use the one at the beginning of the function.
  229. dhat_new_set_median_list.append(dis_gstar(idx, range(len(G_set_median_list),
  230. len(G_set_median_list) + len(Gn_median) + 1),
  231. alpha_range, knew_set_median, withterm3=False))
  232. print('\ndistance in kernel space of set median: ', dhat_new_set_median_list[0])
  233. dis_ks_set_median_list[-1].append(dhat_new_set_median_list[0])
  234. # compute distance between \psi and the new generated graphs.
  235. knew = compute_kernel(G_gen_median_list + Gn_median, gkernel, node_label,
  236. edge_label, False)
  237. dhat_new_list = []
  238. for idx, g_tmp in enumerate(G_gen_median_list):
  239. # @todo: the term3 below could use the one at the beginning of the function.
  240. dhat_new_list.append(dis_gstar(idx, range(len(G_gen_median_list),
  241. len(G_gen_median_list) + len(Gn_median) + 1),
  242. alpha_range, knew, withterm3=False))
  243. print('\nsmallest distance in kernel space: ', dhat_new_list[0])
  244. dis_ks_min_list[-1].append(dhat_new_list[0])
  245. print('\nsods of the set median for this class:', sod_set_median_list[-1])
  246. print('\nsods in graph space for this class:', sod_gs_list[-1])
  247. print('\ndistance in kernel space of set median for this class:',
  248. dis_ks_set_median_list[-1])
  249. print('\nsmallest distances in kernel space for this class:',
  250. dis_ks_min_list[-1])
  251. print('\ntimes for this class:', time_list[-1])
  252. sod_set_median_list[-1] = np.mean(sod_set_median_list[-1])
  253. sod_gs_list[-1] = np.mean(sod_gs_list[-1])
  254. dis_ks_set_median_list[-1] = np.mean(dis_ks_set_median_list[-1])
  255. dis_ks_min_list[-1] = np.mean(dis_ks_min_list[-1])
  256. time_list[-1] = np.mean(time_list[-1])
  257. print()
  258. print('\nmean sods of the set median for each class:', sod_set_median_list)
  259. print('\nmean sods in graph space for each class:', sod_gs_list)
  260. print('\ndistances in kernel space of set median for each class:',
  261. dis_ks_set_median_list)
  262. print('\nmean smallest distances in kernel space for each class:',
  263. dis_ks_min_list)
  264. print('\nmean times for each class:', time_list)
  265. print('\nmean sods of the set median of all:', np.mean(sod_set_median_list))
  266. print('\nmean sods in graph space of all:', np.mean(sod_gs_list))
  267. print('\nmean distances in kernel space of set median of all:',
  268. np.mean(dis_ks_set_median_list))
  269. print('\nmean smallest distances in kernel space of all:',
  270. np.mean(dis_ks_min_list))
  271. print('\nmean times of all:', np.mean(time_list))
  272. nb_better_sods = 0
  273. nb_worse_sods = 0
  274. nb_same_sods = 0
  275. for sods in sod_list_list:
  276. if sods[0] > sods[-1]:
  277. nb_better_sods += 1
  278. elif sods[0] < sods[-1]:
  279. nb_worse_sods += 1
  280. else:
  281. nb_same_sods += 1
  282. print('\n In', str(len(sod_list_list)), 'sod lists,', str(nb_better_sods),
  283. 'are getting better,', str(nb_worse_sods), 'are getting worse,',
  284. str(nb_same_sods), 'are not changed; ', str(nb_better_sods / len(sod_list_list)),
  285. 'sods are improved.')
  286. def test_iam_mutag():
  287. ds = {'name': 'MUTAG', 'dataset': '../datasets/MUTAG/MUTAG_A.txt',
  288. 'extra_params': {}} # node/edge symb
  289. Gn, y_all = loadDataset(ds['dataset'], extra_params=ds['extra_params'])
  290. # Gn = Gn[0:50]
  291. gkernel = 'untilhpathkernel'
  292. node_label = 'atom'
  293. edge_label = 'bond_type'
  294. # parameters for GED function from the IAM paper.
  295. # fitted edit costs.
  296. c_vi = 0.03523843108436513
  297. c_vr = 0.03347339739350128
  298. c_vs = 0.06871290673612238
  299. c_ei = 0.08591999846720685
  300. c_er = 0.07962086440894103
  301. c_es = 0.08596855855478233
  302. # unfitted edit costs.
  303. # c_vi = 3
  304. # c_vr = 3
  305. # c_vs = 1
  306. # c_ei = 3
  307. # c_er = 3
  308. # c_es = 1
  309. ite_max_iam = 50
  310. epsilon_iam = 0.001
  311. removeNodes = False
  312. connected_iam = False
  313. # parameters for IAM function
  314. # ged_cost = 'CONSTANT'
  315. ged_cost = 'CONSTANT'
  316. ged_method = 'IPFP'
  317. edit_cost_constant = [c_vi, c_vr, c_vs, c_ei, c_er, c_es]
  318. # edit_cost_constant = []
  319. ged_stabilizer = 'min'
  320. ged_repeat = 50
  321. params_ged = {'lib': 'gedlibpy', 'cost': ged_cost, 'method': ged_method,
  322. 'edit_cost_constant': edit_cost_constant,
  323. 'stabilizer': ged_stabilizer, 'repeat': ged_repeat}
  324. # classify graphs according to letters.
  325. time_list = []
  326. dis_ks_min_list = []
  327. dis_ks_set_median_list = []
  328. sod_gs_list = []
  329. g_best = []
  330. sod_set_median_list = []
  331. sod_list_list = []
  332. idx_dict = get_same_item_indices(y_all)
  333. for y_class in idx_dict:
  334. print('\n-------------------------------------------------------')
  335. print('class of y:', y_class)
  336. Gn_class = [Gn[i].copy() for i in idx_dict[y_class]]
  337. time_list.append([])
  338. dis_ks_min_list.append([])
  339. dis_ks_set_median_list.append([])
  340. sod_gs_list.append([])
  341. g_best.append([])
  342. sod_set_median_list.append([])
  343. for repeat in range(50):
  344. idx_rdm = random.sample(range(len(Gn_class)), 10)
  345. print('graphs chosen:', idx_rdm)
  346. Gn_median = [Gn_class[idx].copy() for idx in idx_rdm]
  347. Gn_candidate = [g.copy() for g in Gn_median]
  348. alpha_range = [1 / len(Gn_median)] * len(Gn_median)
  349. time0 = time.time()
  350. G_gen_median_list, sod_gen_median, sod_list, G_set_median_list, sod_set_median \
  351. = iam_upgraded(Gn_median,
  352. Gn_candidate, c_ei=c_ei, c_er=c_er, c_es=c_es, ite_max=ite_max_iam,
  353. epsilon=epsilon_iam, connected=connected_iam, removeNodes=removeNodes,
  354. params_ged=params_ged)
  355. time_total = time.time() - time0
  356. print('\ntime: ', time_total)
  357. time_list[-1].append(time_total)
  358. g_best[-1].append(G_gen_median_list[0])
  359. sod_set_median_list[-1].append(sod_set_median)
  360. print('\nsmallest sod of the set median:', sod_set_median)
  361. sod_gs_list[-1].append(sod_gen_median)
  362. print('\nsmallest sod in graph space:', sod_gen_median)
  363. sod_list_list.append(sod_list)
  364. # show the best graph and save it to file.
  365. print('one of the possible corresponding pre-images is')
  366. nx.draw(G_gen_median_list[0], labels=nx.get_node_attributes(G_gen_median_list[0], 'atom'),
  367. with_labels=True)
  368. # plt.show()
  369. # plt.savefig('results/iam/mutag_median.fit_costs2.001.nb' + str(nb_median) +
  370. # plt.savefig('results/iam/paper_compare/mutag_y' + str(y_class) +
  371. # '_repeat' + str(repeat) + '_' + str(time.time()) +
  372. # '.png', format="PNG")
  373. plt.clf()
  374. # print(G_gen_median_list[0].nodes(data=True))
  375. # print(G_gen_median_list[0].edges(data=True))
  376. # compute distance between \psi and the set median graph.
  377. knew_set_median = compute_kernel(G_set_median_list + Gn_median,
  378. gkernel, node_label, edge_label, False)
  379. dhat_new_set_median_list = []
  380. for idx, g_tmp in enumerate(G_set_median_list):
  381. # @todo: the term3 below could use the one at the beginning of the function.
  382. dhat_new_set_median_list.append(dis_gstar(idx, range(len(G_set_median_list),
  383. len(G_set_median_list) + len(Gn_median) + 1),
  384. alpha_range, knew_set_median, withterm3=False))
  385. print('\ndistance in kernel space of set median: ', dhat_new_set_median_list[0])
  386. dis_ks_set_median_list[-1].append(dhat_new_set_median_list[0])
  387. # compute distance between \psi and the new generated graphs.
  388. knew = compute_kernel(G_gen_median_list + Gn_median, gkernel, node_label,
  389. edge_label, False)
  390. dhat_new_list = []
  391. for idx, g_tmp in enumerate(G_gen_median_list):
  392. # @todo: the term3 below could use the one at the beginning of the function.
  393. dhat_new_list.append(dis_gstar(idx, range(len(G_gen_median_list),
  394. len(G_gen_median_list) + len(Gn_median) + 1),
  395. alpha_range, knew, withterm3=False))
  396. print('\nsmallest distance in kernel space: ', dhat_new_list[0])
  397. dis_ks_min_list[-1].append(dhat_new_list[0])
  398. print('\nsods of the set median for this class:', sod_set_median_list[-1])
  399. print('\nsods in graph space for this class:', sod_gs_list[-1])
  400. print('\ndistance in kernel space of set median for this class:',
  401. dis_ks_set_median_list[-1])
  402. print('\nsmallest distances in kernel space for this class:',
  403. dis_ks_min_list[-1])
  404. print('\ntimes for this class:', time_list[-1])
  405. sod_set_median_list[-1] = np.mean(sod_set_median_list[-1])
  406. sod_gs_list[-1] = np.mean(sod_gs_list[-1])
  407. dis_ks_set_median_list[-1] = np.mean(dis_ks_set_median_list[-1])
  408. dis_ks_min_list[-1] = np.mean(dis_ks_min_list[-1])
  409. time_list[-1] = np.mean(time_list[-1])
  410. print()
  411. print('\nmean sods of the set median for each class:', sod_set_median_list)
  412. print('\nmean sods in graph space for each class:', sod_gs_list)
  413. print('\ndistances in kernel space of set median for each class:',
  414. dis_ks_set_median_list)
  415. print('\nmean smallest distances in kernel space for each class:',
  416. dis_ks_min_list)
  417. print('\nmean times for each class:', time_list)
  418. print('\nmean sods of the set median of all:', np.mean(sod_set_median_list))
  419. print('\nmean sods in graph space of all:', np.mean(sod_gs_list))
  420. print('\nmean distances in kernel space of set median of all:',
  421. np.mean(dis_ks_set_median_list))
  422. print('\nmean smallest distances in kernel space of all:',
  423. np.mean(dis_ks_min_list))
  424. print('\nmean times of all:', np.mean(time_list))
  425. nb_better_sods = 0
  426. nb_worse_sods = 0
  427. nb_same_sods = 0
  428. for sods in sod_list_list:
  429. if sods[0] > sods[-1]:
  430. nb_better_sods += 1
  431. elif sods[0] < sods[-1]:
  432. nb_worse_sods += 1
  433. else:
  434. nb_same_sods += 1
  435. print('\n In', str(len(sod_list_list)), 'sod lists,', str(nb_better_sods),
  436. 'are getting better,', str(nb_worse_sods), 'are getting worse,',
  437. str(nb_same_sods), 'are not changed; ', str(nb_better_sods / len(sod_list_list)),
  438. 'sods are improved.')
  439. ###############################################################################
  440. # tests on different numbers of median-sets.
  441. def test_iam_median_nb():
  442. ds = {'name': 'MUTAG', 'dataset': '../datasets/MUTAG/MUTAG_A.txt',
  443. 'extra_params': {}} # node/edge symb
  444. Gn, y_all = loadDataset(ds['dataset'], extra_params=ds['extra_params'])
  445. # Gn = Gn[0:50]
  446. remove_edges(Gn)
  447. gkernel = 'marginalizedkernel'
  448. lmbda = 0.03 # termination probalility
  449. # # parameters for GED function
  450. # c_vi = 0.037
  451. # c_vr = 0.038
  452. # c_vs = 0.075
  453. # c_ei = 0.001
  454. # c_er = 0.001
  455. # c_es = 0.0
  456. # ite_max_iam = 50
  457. # epsilon_iam = 0.001
  458. # removeNodes = False
  459. # connected_iam = False
  460. # # parameters for IAM function
  461. # ged_cost = 'CONSTANT'
  462. # ged_method = 'IPFP'
  463. # edit_cost_constant = [c_vi, c_vr, c_vs, c_ei, c_er, c_es]
  464. # ged_stabilizer = 'min'
  465. # ged_repeat = 50
  466. # params_ged = {'lib': 'gedlibpy', 'cost': ged_cost, 'method': ged_method,
  467. # 'edit_cost_constant': edit_cost_constant,
  468. # 'stabilizer': ged_stabilizer, 'repeat': ged_repeat}
  469. # parameters for GED function
  470. c_vi = 4
  471. c_vr = 4
  472. c_vs = 2
  473. c_ei = 1
  474. c_er = 1
  475. c_es = 1
  476. ite_max_iam = 50
  477. epsilon_iam = 0.001
  478. removeNodes = False
  479. connected_iam = False
  480. # parameters for IAM function
  481. ged_cost = 'CHEM_1'
  482. ged_method = 'IPFP'
  483. edit_cost_constant = []
  484. ged_stabilizer = 'min'
  485. ged_repeat = 50
  486. params_ged = {'lib': 'gedlibpy', 'cost': ged_cost, 'method': ged_method,
  487. 'edit_cost_constant': edit_cost_constant,
  488. 'stabilizer': ged_stabilizer, 'repeat': ged_repeat}
  489. # find out all the graphs classified to positive group 1.
  490. idx_dict = get_same_item_indices(y_all)
  491. Gn = [Gn[i] for i in idx_dict[1]]
  492. # number of graphs; we what to compute the median of these graphs.
  493. # nb_median_range = [2, 3, 4, 5, 10, 20, 30, 40, 50, 100]
  494. nb_median_range = [len(Gn)]
  495. # # compute Gram matrix.
  496. # time0 = time.time()
  497. # km = compute_kernel(Gn, gkernel, True)
  498. # time_km = time.time() - time0
  499. # # write Gram matrix to file.
  500. # np.savez('results/gram_matrix_marg_itr10_pq0.03_mutag_positive.gm', gm=km, gmtime=time_km)
  501. time_list = []
  502. dis_ks_min_list = []
  503. sod_gs_list = []
  504. # sod_gs_min_list = []
  505. # nb_updated_list = []
  506. # nb_updated_k_list = []
  507. g_best = []
  508. for nb_median in nb_median_range:
  509. print('\n-------------------------------------------------------')
  510. print('number of median graphs =', nb_median)
  511. random.seed(1)
  512. idx_rdm = random.sample(range(len(Gn)), nb_median)
  513. print('graphs chosen:', idx_rdm)
  514. Gn_median = [Gn[idx].copy() for idx in idx_rdm]
  515. Gn_candidate = [g.copy() for g in Gn]
  516. # for g in Gn_median:
  517. # nx.draw(g, labels=nx.get_node_attributes(g, 'atom'), with_labels=True)
  518. ## plt.savefig("results/preimage_mix/mutag.png", format="PNG")
  519. # plt.show()
  520. # plt.clf()
  521. ###################################################################
  522. # gmfile = np.load('results/gram_matrix_marg_itr10_pq0.03_mutag_positive.gm.npz')
  523. # km_tmp = gmfile['gm']
  524. # time_km = gmfile['gmtime']
  525. # # modify mixed gram matrix.
  526. # km = np.zeros((len(Gn) + nb_median, len(Gn) + nb_median))
  527. # for i in range(len(Gn)):
  528. # for j in range(i, len(Gn)):
  529. # km[i, j] = km_tmp[i, j]
  530. # km[j, i] = km[i, j]
  531. # for i in range(len(Gn)):
  532. # for j, idx in enumerate(idx_rdm):
  533. # km[i, len(Gn) + j] = km[i, idx]
  534. # km[len(Gn) + j, i] = km[i, idx]
  535. # for i, idx1 in enumerate(idx_rdm):
  536. # for j, idx2 in enumerate(idx_rdm):
  537. # km[len(Gn) + i, len(Gn) + j] = km[idx1, idx2]
  538. ###################################################################
  539. alpha_range = [1 / nb_median] * nb_median
  540. time0 = time.time()
  541. ghat_new_list, sod_min = iam_upgraded(Gn_median, Gn_candidate,
  542. c_ei=c_ei, c_er=c_er, c_es=c_es, ite_max=ite_max_iam,
  543. epsilon=epsilon_iam, connected=connected_iam, removeNodes=removeNodes,
  544. params_ged=params_ged)
  545. time_total = time.time() - time0
  546. print('\ntime: ', time_total)
  547. time_list.append(time_total)
  548. # compute distance between \psi and the new generated graphs.
  549. knew = compute_kernel(ghat_new_list + Gn_median, gkernel, False)
  550. dhat_new_list = []
  551. for idx, g_tmp in enumerate(ghat_new_list):
  552. # @todo: the term3 below could use the one at the beginning of the function.
  553. dhat_new_list.append(dis_gstar(idx, range(len(ghat_new_list),
  554. len(ghat_new_list) + len(Gn_median) + 1),
  555. alpha_range, knew, withterm3=False))
  556. print('\nsmallest distance in kernel space: ', dhat_new_list[0])
  557. dis_ks_min_list.append(dhat_new_list[0])
  558. g_best.append(ghat_new_list[0])
  559. # show the best graph and save it to file.
  560. # print('the shortest distance is', dhat)
  561. print('one of the possible corresponding pre-images is')
  562. nx.draw(ghat_new_list[0], labels=nx.get_node_attributes(ghat_new_list[0], 'atom'),
  563. with_labels=True)
  564. plt.show()
  565. # plt.savefig('results/iam/mutag_median.fit_costs2.001.nb' + str(nb_median) +
  566. plt.savefig('results/iam/mutag_median_unfit2.nb' + str(nb_median) +
  567. '.png', format="PNG")
  568. plt.clf()
  569. # print(ghat_list[0].nodes(data=True))
  570. # print(ghat_list[0].edges(data=True))
  571. sod_gs_list.append(sod_min)
  572. # sod_gs_min_list.append(np.min(sod_min))
  573. print('\nsmallest sod in graph space: ', sod_min)
  574. print('\nsods in graph space: ', sod_gs_list)
  575. # print('\nsmallest sod in graph space for each set of median graphs: ', sod_gs_min_list)
  576. print('\nsmallest distance in kernel space for each set of median graphs: ',
  577. dis_ks_min_list)
  578. # print('\nnumber of updates of the best graph for each set of median graphs by IAM: ',
  579. # nb_updated_list)
  580. # print('\nnumber of updates of k nearest graphs for each set of median graphs by IAM: ',
  581. # nb_updated_k_list)
  582. print('\ntimes:', time_list)
  583. def test_iam_letter_h():
  584. from median import draw_Letter_graph
  585. ds = {'name': 'Letter-high', 'dataset': '../datasets/Letter-high/Letter-high_A.txt',
  586. 'extra_params': {}} # node nsymb
  587. # ds = {'name': 'Letter-med', 'dataset': '../datasets/Letter-med/Letter-med_A.txt',
  588. # 'extra_params': {}} # node nsymb
  589. # Gn = Gn[0:50]
  590. Gn, y_all = loadDataset(ds['dataset'], extra_params=ds['extra_params'])
  591. gkernel = 'structuralspkernel'
  592. # parameters for GED function from the IAM paper.
  593. c_vi = 3
  594. c_vr = 3
  595. c_vs = 1
  596. c_ei = 3
  597. c_er = 3
  598. c_es = 1
  599. ite_max_iam = 50
  600. epsilon_iam = 0.001
  601. removeNodes = False
  602. connected_iam = False
  603. # parameters for IAM function
  604. # ged_cost = 'CONSTANT'
  605. ged_cost = 'LETTER'
  606. ged_method = 'IPFP'
  607. # edit_cost_constant = [c_vi, c_vr, c_vs, c_ei, c_er, c_es]
  608. edit_cost_constant = []
  609. ged_stabilizer = 'min'
  610. ged_repeat = 50
  611. params_ged = {'lib': 'gedlibpy', 'cost': ged_cost, 'method': ged_method,
  612. 'edit_cost_constant': edit_cost_constant,
  613. 'stabilizer': ged_stabilizer, 'repeat': ged_repeat}
  614. # classify graphs according to letters.
  615. time_list = []
  616. dis_ks_min_list = []
  617. sod_gs_list = []
  618. g_best = []
  619. sod_set_median_list = []
  620. idx_dict = get_same_item_indices(y_all)
  621. for letter in idx_dict:
  622. print('\n-------------------------------------------------------')
  623. print('letter', letter)
  624. Gn_let = [Gn[i].copy() for i in idx_dict[letter]]
  625. time_list.append([])
  626. dis_ks_min_list.append([])
  627. sod_gs_list.append([])
  628. g_best.append([])
  629. sod_set_median_list.append([])
  630. for repeat in range(50):
  631. idx_rdm = random.sample(range(len(Gn_let)), 50)
  632. print('graphs chosen:', idx_rdm)
  633. Gn_median = [Gn_let[idx].copy() for idx in idx_rdm]
  634. Gn_candidate = [g.copy() for g in Gn_median]
  635. alpha_range = [1 / len(Gn_median)] * len(Gn_median)
  636. time0 = time.time()
  637. ghat_new_list, sod_min, sod_set_median = iam_upgraded(Gn_median,
  638. Gn_candidate, c_ei=c_ei, c_er=c_er, c_es=c_es, ite_max=ite_max_iam,
  639. epsilon=epsilon_iam, connected=connected_iam, removeNodes=removeNodes,
  640. params_ged=params_ged)
  641. time_total = time.time() - time0
  642. print('\ntime: ', time_total)
  643. time_list[-1].append(time_total)
  644. g_best[-1].append(ghat_new_list[0])
  645. sod_set_median_list[-1].append(sod_set_median)
  646. print('\nsmallest sod of the set median:', sod_set_median)
  647. sod_gs_list[-1].append(sod_min)
  648. print('\nsmallest sod in graph space:', sod_min)
  649. # show the best graph and save it to file.
  650. print('one of the possible corresponding pre-images is')
  651. draw_Letter_graph(ghat_new_list[0], savepath='results/iam/paper_compare/')
  652. # compute distance between \psi and the new generated graphs.
  653. knew = compute_kernel(ghat_new_list + Gn_median, gkernel, False)
  654. dhat_new_list = []
  655. for idx, g_tmp in enumerate(ghat_new_list):
  656. # @todo: the term3 below could use the one at the beginning of the function.
  657. dhat_new_list.append(dis_gstar(idx, range(len(ghat_new_list),
  658. len(ghat_new_list) + len(Gn_median) + 1),
  659. alpha_range, knew, withterm3=False))
  660. print('\nsmallest distance in kernel space: ', dhat_new_list[0])
  661. dis_ks_min_list[-1].append(dhat_new_list[0])
  662. print('\nsods of the set median for this letter:', sod_set_median_list[-1])
  663. print('\nsods in graph space for this letter:', sod_gs_list[-1])
  664. print('\nsmallest distances in kernel space for this letter:',
  665. dis_ks_min_list[-1])
  666. print('\ntimes for this letter:', time_list[-1])
  667. sod_set_median_list[-1] = np.mean(sod_set_median_list[-1])
  668. sod_gs_list[-1] = np.mean(sod_gs_list[-1])
  669. dis_ks_min_list[-1] = np.mean(dis_ks_min_list[-1])
  670. time_list[-1] = np.mean(time_list[-1])
  671. print('\nmean sods of the set median for each letter:', sod_set_median_list)
  672. print('\nmean sods in graph space for each letter:', sod_gs_list)
  673. print('\nmean smallest distances in kernel space for each letter:',
  674. dis_ks_min_list)
  675. print('\nmean times for each letter:', time_list)
  676. print('\nmean sods of the set median of all:', np.mean(sod_set_median_list))
  677. print('\nmean sods in graph space of all:', np.mean(sod_gs_list))
  678. print('\nmean smallest distances in kernel space of all:',
  679. np.mean(dis_ks_min_list))
  680. print('\nmean times of all:', np.mean(time_list))
  681. def test_iam_fitdistance():
  682. ds = {'name': 'MUTAG', 'dataset': '../datasets/MUTAG/MUTAG_A.txt',
  683. 'extra_params': {}} # node/edge symb
  684. Gn, y_all = loadDataset(ds['dataset'], extra_params=ds['extra_params'])
  685. # Gn = Gn[0:50]
  686. # remove_edges(Gn)
  687. gkernel = 'marginalizedkernel'
  688. node_label = 'atom'
  689. edge_label = 'bond_type'
  690. # lmbda = 0.03 # termination probalility
  691. # # parameters for GED function
  692. # c_vi = 0.037
  693. # c_vr = 0.038
  694. # c_vs = 0.075
  695. # c_ei = 0.001
  696. # c_er = 0.001
  697. # c_es = 0.0
  698. # ite_max_iam = 50
  699. # epsilon_iam = 0.001
  700. # removeNodes = False
  701. # connected_iam = False
  702. # # parameters for IAM function
  703. # ged_cost = 'CONSTANT'
  704. # ged_method = 'IPFP'
  705. # edit_cost_constant = [c_vi, c_vr, c_vs, c_ei, c_er, c_es]
  706. # ged_stabilizer = 'min'
  707. # ged_repeat = 50
  708. # params_ged = {'lib': 'gedlibpy', 'cost': ged_cost, 'method': ged_method,
  709. # 'edit_cost_constant': edit_cost_constant,
  710. # 'stabilizer': ged_stabilizer, 'repeat': ged_repeat}
  711. # parameters for GED function
  712. c_vi = 4
  713. c_vr = 4
  714. c_vs = 2
  715. c_ei = 1
  716. c_er = 1
  717. c_es = 1
  718. ite_max_iam = 50
  719. epsilon_iam = 0.001
  720. removeNodes = False
  721. connected_iam = False
  722. # parameters for IAM function
  723. ged_cost = 'CHEM_1'
  724. ged_method = 'IPFP'
  725. edit_cost_constant = []
  726. ged_stabilizer = 'min'
  727. ged_repeat = 50
  728. params_ged = {'lib': 'gedlibpy', 'cost': ged_cost, 'method': ged_method,
  729. 'edit_cost_constant': edit_cost_constant,
  730. 'stabilizer': ged_stabilizer, 'repeat': ged_repeat}
  731. # find out all the graphs classified to positive group 1.
  732. idx_dict = get_same_item_indices(y_all)
  733. Gn = [Gn[i] for i in idx_dict[1]]
  734. # number of graphs; we what to compute the median of these graphs.
  735. # nb_median_range = [2, 3, 4, 5, 10, 20, 30, 40, 50, 100]
  736. nb_median_range = [10]
  737. # # compute Gram matrix.
  738. # time0 = time.time()
  739. # km = compute_kernel(Gn, gkernel, True)
  740. # time_km = time.time() - time0
  741. # # write Gram matrix to file.
  742. # np.savez('results/gram_matrix_marg_itr10_pq0.03_mutag_positive.gm', gm=km, gmtime=time_km)
  743. time_list = []
  744. dis_ks_min_list = []
  745. dis_ks_gen_median_list = []
  746. sod_gs_list = []
  747. # sod_gs_min_list = []
  748. # nb_updated_list = []
  749. # nb_updated_k_list = []
  750. g_best = []
  751. for nb_median in nb_median_range:
  752. print('\n-------------------------------------------------------')
  753. print('number of median graphs =', nb_median)
  754. random.seed(1)
  755. idx_rdm = random.sample(range(len(Gn)), nb_median)
  756. print('graphs chosen:', idx_rdm)
  757. Gn_median = [Gn[idx].copy() for idx in idx_rdm]
  758. Gn_candidate = [g.copy() for g in Gn_median]
  759. # for g in Gn_median:
  760. # nx.draw(g, labels=nx.get_node_attributes(g, 'atom'), with_labels=True)
  761. ## plt.savefig("results/preimage_mix/mutag.png", format="PNG")
  762. # plt.show()
  763. # plt.clf()
  764. ###################################################################
  765. # gmfile = np.load('results/gram_matrix_marg_itr10_pq0.03_mutag_positive.gm.npz')
  766. # km_tmp = gmfile['gm']
  767. # time_km = gmfile['gmtime']
  768. # # modify mixed gram matrix.
  769. # km = np.zeros((len(Gn) + nb_median, len(Gn) + nb_median))
  770. # for i in range(len(Gn)):
  771. # for j in range(i, len(Gn)):
  772. # km[i, j] = km_tmp[i, j]
  773. # km[j, i] = km[i, j]
  774. # for i in range(len(Gn)):
  775. # for j, idx in enumerate(idx_rdm):
  776. # km[i, len(Gn) + j] = km[i, idx]
  777. # km[len(Gn) + j, i] = km[i, idx]
  778. # for i, idx1 in enumerate(idx_rdm):
  779. # for j, idx2 in enumerate(idx_rdm):
  780. # km[len(Gn) + i, len(Gn) + j] = km[idx1, idx2]
  781. ###################################################################
  782. alpha_range = [1 / nb_median] * nb_median
  783. time0 = time.time()
  784. G_gen_median_list, sod_gen_median, sod_list, G_set_median_list, sod_set_median \
  785. = iam_upgraded(Gn_median, Gn_candidate,
  786. c_ei=c_ei, c_er=c_er, c_es=c_es, ite_max=ite_max_iam,
  787. epsilon=epsilon_iam, connected=connected_iam, removeNodes=removeNodes,
  788. params_ged=params_ged)
  789. time_total = time.time() - time0
  790. print('\ntime: ', time_total)
  791. time_list.append(time_total)
  792. # compute distance between \psi and the new generated graphs.
  793. knew = compute_kernel(G_gen_median_list + Gn_median, gkernel, node_label,
  794. edge_label, False)
  795. dhat_new_list = []
  796. for idx, g_tmp in enumerate(G_gen_median_list):
  797. # @todo: the term3 below could use the one at the beginning of the function.
  798. dhat_new_list.append(dis_gstar(idx, range(len(G_gen_median_list),
  799. len(G_gen_median_list) + len(Gn_median) + 1),
  800. alpha_range, knew, withterm3=False))
  801. print('\nsmallest distance in kernel space: ', dhat_new_list[0])
  802. dis_ks_min_list.append(dhat_new_list[0])
  803. g_best.append(G_gen_median_list[0])
  804. # show the best graph and save it to file.
  805. # print('the shortest distance is', dhat)
  806. print('one of the possible corresponding pre-images is')
  807. nx.draw(G_gen_median_list[0], labels=nx.get_node_attributes(G_gen_median_list[0], 'atom'),
  808. with_labels=True)
  809. plt.show()
  810. # plt.savefig('results/iam/mutag_median.fit_costs2.001.nb' + str(nb_median) +
  811. # plt.savefig('results/iam/mutag_median_unfit2.nb' + str(nb_median) +
  812. # '.png', format="PNG")
  813. plt.clf()
  814. # print(ghat_list[0].nodes(data=True))
  815. # print(ghat_list[0].edges(data=True))
  816. sod_gs_list.append(sod_gen_median)
  817. # sod_gs_min_list.append(np.min(sod_gen_median))
  818. print('\nsmallest sod in graph space: ', sod_gen_median)
  819. print('\nsmallest sod of set median in graph space: ', sod_set_median)
  820. print('\nsods in graph space: ', sod_gs_list)
  821. # print('\nsmallest sod in graph space for each set of median graphs: ', sod_gs_min_list)
  822. print('\nsmallest distance in kernel space for each set of median graphs: ',
  823. dis_ks_min_list)
  824. # print('\nnumber of updates of the best graph for each set of median graphs by IAM: ',
  825. # nb_updated_list)
  826. # print('\nnumber of updates of k nearest graphs for each set of median graphs by IAM: ',
  827. # nb_updated_k_list)
  828. print('\ntimes:', time_list)
  829. ###############################################################################
  830. if __name__ == '__main__':
  831. ###############################################################################
  832. # tests on different numbers of median-sets.
  833. # test_iam_median_nb()
  834. # test_iam_letter_h()
  835. # test_iam_monoterpenoides()
  836. # test_iam_mutag()
  837. # test_iam_fitdistance()
  838. # print("test log")
  839. test_iam_monoterpenoides_with_init40()

A Python package for graph kernels, graph edit distances and graph pre-image problem.