You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

run_gk_iam.py 28 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704
  1. #!/usr/bin/env python3
  2. # -*- coding: utf-8 -*-
  3. """
  4. Created on Thu Jul 4 12:20:16 2019
  5. @author: ljia
  6. """
  7. import numpy as np
  8. import networkx as nx
  9. import matplotlib.pyplot as plt
  10. import time
  11. from tqdm import tqdm
  12. import sys
  13. sys.path.insert(0, "../")
  14. from pygraph.utils.graphfiles import loadDataset
  15. from median import draw_Letter_graph
  16. # --------------------------- These are tests --------------------------------#
  17. def test_who_is_the_closest_in_kernel_space(Gn):
  18. idx_gi = [0, 6]
  19. g1 = Gn[idx_gi[0]]
  20. g2 = Gn[idx_gi[1]]
  21. # create the "median" graph.
  22. gnew = g2.copy()
  23. gnew.remove_node(0)
  24. nx.draw_networkx(gnew)
  25. plt.show()
  26. print(gnew.nodes(data=True))
  27. Gn = [gnew] + Gn
  28. # compute gram matrix
  29. Kmatrix = compute_kernel(Gn, 'untilhpathkernel', True)
  30. # the distance matrix
  31. dmatrix = gram2distances(Kmatrix)
  32. print(np.sort(dmatrix[idx_gi[0] + 1]))
  33. print(np.argsort(dmatrix[idx_gi[0] + 1]))
  34. print(np.sort(dmatrix[idx_gi[1] + 1]))
  35. print(np.argsort(dmatrix[idx_gi[1] + 1]))
  36. # for all g in Gn, compute (d(g1, g) + d(g2, g)) / 2
  37. dis_median = [(dmatrix[i, idx_gi[0] + 1] + dmatrix[i, idx_gi[1] + 1]) / 2 for i in range(len(Gn))]
  38. print(np.sort(dis_median))
  39. print(np.argsort(dis_median))
  40. return
  41. def test_who_is_the_closest_in_GED_space(Gn):
  42. from iam import GED
  43. idx_gi = [0, 6]
  44. g1 = Gn[idx_gi[0]]
  45. g2 = Gn[idx_gi[1]]
  46. # create the "median" graph.
  47. gnew = g2.copy()
  48. gnew.remove_node(0)
  49. nx.draw_networkx(gnew)
  50. plt.show()
  51. print(gnew.nodes(data=True))
  52. Gn = [gnew] + Gn
  53. # compute GEDs
  54. ged_matrix = np.zeros((len(Gn), len(Gn)))
  55. for i1 in tqdm(range(len(Gn)), desc='computing GEDs', file=sys.stdout):
  56. for i2 in range(len(Gn)):
  57. dis, _, _ = GED(Gn[i1], Gn[i2], lib='gedlib')
  58. ged_matrix[i1, i2] = dis
  59. print(np.sort(ged_matrix[idx_gi[0] + 1]))
  60. print(np.argsort(ged_matrix[idx_gi[0] + 1]))
  61. print(np.sort(ged_matrix[idx_gi[1] + 1]))
  62. print(np.argsort(ged_matrix[idx_gi[1] + 1]))
  63. # for all g in Gn, compute (GED(g1, g) + GED(g2, g)) / 2
  64. dis_median = [(ged_matrix[i, idx_gi[0] + 1] + ged_matrix[i, idx_gi[1] + 1]) / 2 for i in range(len(Gn))]
  65. print(np.sort(dis_median))
  66. print(np.argsort(dis_median))
  67. return
  68. def test_will_IAM_give_the_median_graph_we_wanted(Gn):
  69. idx_gi = [0, 6]
  70. g1 = Gn[idx_gi[0]].copy()
  71. g2 = Gn[idx_gi[1]].copy()
  72. # del Gn[idx_gi[0]]
  73. # del Gn[idx_gi[1] - 1]
  74. g_median = test_iam_with_more_graphs_as_init([g1, g2], [g1, g2], c_ei=1, c_er=1, c_es=1)
  75. # g_median = test_iam_with_more_graphs_as_init(Gn, Gn, c_ei=1, c_er=1, c_es=1)
  76. nx.draw_networkx(g_median)
  77. plt.show()
  78. print(g_median.nodes(data=True))
  79. print(g_median.edges(data=True))
  80. def test_new_IAM_allGraph_deleteNodes(Gn):
  81. idx_gi = [0, 6]
  82. # g1 = Gn[idx_gi[0]].copy()
  83. # g2 = Gn[idx_gi[1]].copy()
  84. # g1 = nx.Graph(name='haha')
  85. # g1.add_nodes_from([(0, {'atom': 'C'}), (1, {'atom': 'O'}), (2, {'atom': 'C'})])
  86. # g1.add_edges_from([(0, 1, {'bond_type': '1'}), (1, 2, {'bond_type': '1'})])
  87. # g2 = nx.Graph(name='hahaha')
  88. # g2.add_nodes_from([(0, {'atom': 'C'}), (1, {'atom': 'O'}), (2, {'atom': 'C'}),
  89. # (3, {'atom': 'O'}), (4, {'atom': 'C'})])
  90. # g2.add_edges_from([(0, 1, {'bond_type': '1'}), (1, 2, {'bond_type': '1'}),
  91. # (2, 3, {'bond_type': '1'}), (3, 4, {'bond_type': '1'})])
  92. g1 = nx.Graph(name='haha')
  93. g1.add_nodes_from([(0, {'atom': 'C'}), (1, {'atom': 'C'}), (2, {'atom': 'C'}),
  94. (3, {'atom': 'S'}), (4, {'atom': 'S'})])
  95. g1.add_edges_from([(0, 1, {'bond_type': '1'}), (1, 2, {'bond_type': '1'}),
  96. (2, 3, {'bond_type': '1'}), (2, 4, {'bond_type': '1'})])
  97. g2 = nx.Graph(name='hahaha')
  98. g2.add_nodes_from([(0, {'atom': 'C'}), (1, {'atom': 'C'}), (2, {'atom': 'C'}),
  99. (3, {'atom': 'O'}), (4, {'atom': 'O'})])
  100. g2.add_edges_from([(0, 1, {'bond_type': '1'}), (1, 2, {'bond_type': '1'}),
  101. (2, 3, {'bond_type': '1'}), (2, 4, {'bond_type': '1'})])
  102. # g2 = g1.copy()
  103. # g2.add_nodes_from([(3, {'atom': 'O'})])
  104. # g2.add_nodes_from([(4, {'atom': 'C'})])
  105. # g2.add_edges_from([(1, 3, {'bond_type': '1'})])
  106. # g2.add_edges_from([(3, 4, {'bond_type': '1'})])
  107. # del Gn[idx_gi[0]]
  108. # del Gn[idx_gi[1] - 1]
  109. nx.draw_networkx(g1)
  110. plt.show()
  111. print(g1.nodes(data=True))
  112. print(g1.edges(data=True))
  113. nx.draw_networkx(g2)
  114. plt.show()
  115. print(g2.nodes(data=True))
  116. print(g2.edges(data=True))
  117. g_median = test_iam_moreGraphsAsInit_tryAllPossibleBestGraphs_deleteNodesInIterations([g1, g2], [g1, g2], c_ei=1, c_er=1, c_es=1)
  118. # g_median = test_iam_moreGraphsAsInit_tryAllPossibleBestGraphs_deleteNodesInIterations(Gn, Gn, c_ei=1, c_er=1, c_es=1)
  119. nx.draw_networkx(g_median)
  120. plt.show()
  121. print(g_median.nodes(data=True))
  122. print(g_median.edges(data=True))
  123. def test_the_simple_two(Gn, gkernel):
  124. from gk_iam import gk_iam_nearest_multi, compute_kernel
  125. lmbda = 0.03 # termination probalility
  126. r_max = 10 # recursions
  127. l = 500
  128. alpha_range = np.linspace(0.5, 0.5, 1)
  129. k = 2 # k nearest neighbors
  130. # randomly select two molecules
  131. np.random.seed(1)
  132. idx_gi = [0, 6] # np.random.randint(0, len(Gn), 2)
  133. g1 = Gn[idx_gi[0]]
  134. g2 = Gn[idx_gi[1]]
  135. Gn_mix = [g.copy() for g in Gn]
  136. Gn_mix.append(g1.copy())
  137. Gn_mix.append(g2.copy())
  138. # g_tmp = iam([g1, g2])
  139. # nx.draw_networkx(g_tmp)
  140. # plt.show()
  141. # compute
  142. # k_list = [] # kernel between each graph and itself.
  143. # k_g1_list = [] # kernel between each graph and g1
  144. # k_g2_list = [] # kernel between each graph and g2
  145. # for ig, g in tqdm(enumerate(Gn), desc='computing self kernels', file=sys.stdout):
  146. # ktemp = compute_kernel([g, g1, g2], 'marginalizedkernel', False)
  147. # k_list.append(ktemp[0][0, 0])
  148. # k_g1_list.append(ktemp[0][0, 1])
  149. # k_g2_list.append(ktemp[0][0, 2])
  150. km = compute_kernel(Gn_mix, gkernel, True)
  151. # k_list = np.diag(km) # kernel between each graph and itself.
  152. # k_g1_list = km[idx_gi[0]] # kernel between each graph and g1
  153. # k_g2_list = km[idx_gi[1]] # kernel between each graph and g2
  154. g_best = []
  155. dis_best = []
  156. # for each alpha
  157. for alpha in alpha_range:
  158. print('alpha =', alpha)
  159. dhat, ghat_list = gk_iam_nearest_multi(Gn, [g1, g2], [alpha, 1 - alpha],
  160. range(len(Gn), len(Gn) + 2), km,
  161. k, r_max,gkernel)
  162. dis_best.append(dhat)
  163. g_best.append(ghat_list)
  164. for idx, item in enumerate(alpha_range):
  165. print('when alpha is', item, 'the shortest distance is', dis_best[idx])
  166. print('the corresponding pre-images are')
  167. for g in g_best[idx]:
  168. nx.draw_networkx(g)
  169. plt.show()
  170. print(g.nodes(data=True))
  171. print(g.edges(data=True))
  172. def test_remove_bests(Gn, gkernel):
  173. from gk_iam import gk_iam_nearest_multi, compute_kernel
  174. lmbda = 0.03 # termination probalility
  175. r_max = 10 # recursions
  176. l = 500
  177. alpha_range = np.linspace(0.5, 0.5, 1)
  178. k = 20 # k nearest neighbors
  179. # randomly select two molecules
  180. np.random.seed(1)
  181. idx_gi = [0, 6] # np.random.randint(0, len(Gn), 2)
  182. g1 = Gn[idx_gi[0]]
  183. g2 = Gn[idx_gi[1]]
  184. # remove the best 2 graphs.
  185. del Gn[idx_gi[0]]
  186. del Gn[idx_gi[1] - 1]
  187. # del Gn[8]
  188. Gn_mix = [g.copy() for g in Gn]
  189. Gn_mix.append(g1.copy())
  190. Gn_mix.append(g2.copy())
  191. # compute
  192. km = compute_kernel(Gn_mix, gkernel, True)
  193. g_best = []
  194. dis_best = []
  195. # for each alpha
  196. for alpha in alpha_range:
  197. print('alpha =', alpha)
  198. dhat, ghat_list = gk_iam_nearest_multi(Gn, [g1, g2], [alpha, 1 - alpha],
  199. range(len(Gn), len(Gn) + 2), km,
  200. k, r_max, gkernel)
  201. dis_best.append(dhat)
  202. g_best.append(ghat_list)
  203. for idx, item in enumerate(alpha_range):
  204. print('when alpha is', item, 'the shortest distance is', dis_best[idx])
  205. print('the corresponding pre-images are')
  206. for g in g_best[idx]:
  207. draw_Letter_graph(g)
  208. # nx.draw_networkx(g)
  209. # plt.show()
  210. print(g.nodes(data=True))
  211. print(g.edges(data=True))
  212. ###############################################################################
  213. # Tests on dataset Letter-H.
  214. def test_gkiam_letter_h():
  215. from gk_iam import gk_iam_nearest_multi, compute_kernel
  216. from iam import median_distance
  217. ds = {'name': 'Letter-high', 'dataset': '../datasets/Letter-high/Letter-high_A.txt',
  218. 'extra_params': {}} # node nsymb
  219. # ds = {'name': 'Letter-med', 'dataset': '../datasets/Letter-med/Letter-med_A.txt',
  220. # 'extra_params': {}} # node nsymb
  221. Gn, y_all = loadDataset(ds['dataset'], extra_params=ds['extra_params'])
  222. gkernel = 'structuralspkernel'
  223. lmbda = 0.03 # termination probalility
  224. r_max = 3 # recursions
  225. # alpha_range = np.linspace(0.5, 0.5, 1)
  226. k = 10 # k nearest neighbors
  227. # classify graphs according to letters.
  228. idx_dict = get_same_item_indices(y_all)
  229. time_list = []
  230. sod_ks_min_list = []
  231. sod_gs_list = []
  232. sod_gs_min_list = []
  233. nb_updated_list = []
  234. for letter in idx_dict:
  235. print('\n-------------------------------------------------------\n')
  236. Gn_let = [Gn[i].copy() for i in idx_dict[letter]]
  237. Gn_mix = Gn_let + [g.copy() for g in Gn_let]
  238. alpha_range = np.linspace(1 / len(Gn_let), 1 / len(Gn_let), 1)
  239. # compute
  240. time0 = time.time()
  241. km = compute_kernel(Gn_mix, gkernel, True)
  242. g_best = []
  243. dis_best = []
  244. # for each alpha
  245. for alpha in alpha_range:
  246. print('alpha =', alpha)
  247. dhat, ghat_list, sod_ks, nb_updated = gk_iam_nearest_multi(Gn_let,
  248. Gn_let, [alpha] * len(Gn_let), range(len(Gn_let), len(Gn_mix)),
  249. km, k, r_max, gkernel, c_ei=1.7, c_er=1.7, c_es=1.7,
  250. ged_cost='LETTER', ged_method='IPFP', saveGXL='gedlib-letter')
  251. dis_best.append(dhat)
  252. g_best.append(ghat_list)
  253. time_list.append(time.time() - time0)
  254. # show best graphs and save them to file.
  255. for idx, item in enumerate(alpha_range):
  256. print('when alpha is', item, 'the shortest distance is', dis_best[idx])
  257. print('the corresponding pre-images are')
  258. for g in g_best[idx]:
  259. draw_Letter_graph(g, savepath='results/gk_iam/')
  260. # nx.draw_networkx(g)
  261. # plt.show()
  262. print(g.nodes(data=True))
  263. print(g.edges(data=True))
  264. # compute the corresponding sod in graph space. (alpha range not considered.)
  265. sod_tmp, _ = median_distance(g_best[0], Gn_let, ged_cost='LETTER',
  266. ged_method='IPFP', saveGXL='gedlib-letter')
  267. sod_gs_list.append(sod_tmp)
  268. sod_gs_min_list.append(np.min(sod_tmp))
  269. sod_ks_min_list.append(sod_ks)
  270. nb_updated_list.append(nb_updated)
  271. print('\nsods in graph space: ', sod_gs_list)
  272. print('\nsmallest sod in graph space for each letter: ', sod_gs_min_list)
  273. print('\nsmallest sod in kernel space for each letter: ', sod_ks_min_list)
  274. print('\nnumber of updates for each letter: ', nb_updated_list)
  275. print('\ntimes:', time_list)
  276. def get_same_item_indices(ls):
  277. """Get the indices of the same items in a list. Return a dict keyed by items.
  278. """
  279. idx_dict = {}
  280. for idx, item in enumerate(ls):
  281. if item in idx_dict:
  282. idx_dict[item].append(idx)
  283. else:
  284. idx_dict[item] = [idx]
  285. return idx_dict
  286. #def compute_letter_median_by_average(Gn):
  287. # return g_median
  288. def test_iam_letter_h():
  289. from iam import test_iam_moreGraphsAsInit_tryAllPossibleBestGraphs_deleteNodesInIterations
  290. from gk_iam import dis_gstar, compute_kernel
  291. ds = {'name': 'Letter-high', 'dataset': '../datasets/Letter-high/Letter-high_A.txt',
  292. 'extra_params': {}} # node nsymb
  293. # ds = {'name': 'Letter-med', 'dataset': '../datasets/Letter-med/Letter-med_A.txt',
  294. # 'extra_params': {}} # node nsymb
  295. Gn, y_all = loadDataset(ds['dataset'], extra_params=ds['extra_params'])
  296. lmbda = 0.03 # termination probalility
  297. # alpha_range = np.linspace(0.5, 0.5, 1)
  298. # classify graphs according to letters.
  299. idx_dict = get_same_item_indices(y_all)
  300. time_list = []
  301. sod_list = []
  302. sod_min_list = []
  303. for letter in idx_dict:
  304. Gn_let = [Gn[i].copy() for i in idx_dict[letter]]
  305. alpha_range = np.linspace(1 / len(Gn_let), 1 / len(Gn_let), 1)
  306. # compute
  307. g_best = []
  308. dis_best = []
  309. time0 = time.time()
  310. # for each alpha
  311. for alpha in alpha_range:
  312. print('alpha =', alpha)
  313. ghat_list, dhat = test_iam_moreGraphsAsInit_tryAllPossibleBestGraphs_deleteNodesInIterations(
  314. Gn_let, Gn_let, c_ei=1.7, c_er=1.7, c_es=1.7,
  315. ged_cost='LETTER', ged_method='IPFP', saveGXL='gedlib-letter')
  316. dis_best.append(dhat)
  317. g_best.append(ghat_list)
  318. time_list.append(time.time() - time0)
  319. # show best graphs and save them to file.
  320. for idx, item in enumerate(alpha_range):
  321. print('when alpha is', item, 'the shortest distance is', dis_best[idx])
  322. print('the corresponding pre-images are')
  323. for g in g_best[idx]:
  324. draw_Letter_graph(g, savepath='results/iam/')
  325. # nx.draw_networkx(g)
  326. # plt.show()
  327. print(g.nodes(data=True))
  328. print(g.edges(data=True))
  329. # compute the corresponding sod in kernel space. (alpha range not considered.)
  330. gkernel = 'structuralspkernel'
  331. sod_tmp = []
  332. Gn_mix = g_best[0] + Gn_let
  333. km = compute_kernel(Gn_mix, gkernel, True)
  334. for ig, g in tqdm(enumerate(g_best[0]), desc='computing kernel sod', file=sys.stdout):
  335. dtemp = dis_gstar(ig, range(len(g_best[0]), len(Gn_mix)),
  336. [alpha_range[0]] * len(Gn_let), km, withterm3=False)
  337. sod_tmp.append(dtemp)
  338. sod_list.append(sod_tmp)
  339. sod_min_list.append(np.min(sod_tmp))
  340. print('\nsods in kernel space: ', sod_list)
  341. print('\nsmallest sod in kernel space for each letter: ', sod_min_list)
  342. print('\ntimes:', time_list)
  343. def test_random_preimage_letter_h():
  344. from preimage_random import preimage_random, compute_kernel
  345. ds = {'name': 'Letter-high', 'dataset': '../datasets/Letter-high/Letter-high_A.txt',
  346. 'extra_params': {}} # node nsymb
  347. # ds = {'name': 'Letter-med', 'dataset': '../datasets/Letter-med/Letter-med_A.txt',
  348. # 'extra_params': {}} # node nsymb
  349. # ds = {'name': 'MUTAG', 'dataset': '../datasets/MUTAG/MUTAG_A.txt',
  350. # 'extra_params': {}} # node/edge symb
  351. # ds = {'name': 'Acyclic', 'dataset': '../datasets/monoterpenoides/trainset_9.ds',
  352. # 'extra_params': {}}
  353. # ds = {'name': 'Acyclic', 'dataset': '../datasets/acyclic/dataset_bps.ds',
  354. # 'extra_params': {}} # node symb
  355. Gn, y_all = loadDataset(ds['dataset'], extra_params=ds['extra_params'])
  356. gkernel = 'structuralspkernel'
  357. # lmbda = 0.03 # termination probalility
  358. r_max = 3 # 10 # recursions
  359. l = 500
  360. # alpha_range = np.linspace(0.5, 0.5, 1)
  361. #alpha_range = np.linspace(0.1, 0.9, 9)
  362. k = 10 # 5 # k nearest neighbors
  363. # classify graphs according to letters.
  364. idx_dict = get_same_item_indices(y_all)
  365. time_list = []
  366. sod_list = []
  367. sod_min_list = []
  368. for letter in idx_dict:
  369. print('\n-------------------------------------------------------\n')
  370. Gn_let = [Gn[i].copy() for i in idx_dict[letter]]
  371. Gn_mix = Gn_let + [g.copy() for g in Gn_let]
  372. alpha_range = np.linspace(1 / len(Gn_let), 1 / len(Gn_let), 1)
  373. # compute
  374. time0 = time.time()
  375. km = compute_kernel(Gn_mix, gkernel, True)
  376. g_best = []
  377. dis_best = []
  378. # for each alpha
  379. for alpha in alpha_range:
  380. print('alpha =', alpha)
  381. dhat, ghat_list = preimage_random(Gn_let, Gn_let, [alpha] * len(Gn_let),
  382. range(len(Gn_let), len(Gn_mix)), km,
  383. k, r_max, gkernel, c_ei=1.7,
  384. c_er=1.7, c_es=1.7)
  385. dis_best.append(dhat)
  386. g_best.append(ghat_list)
  387. time_list.append(time.time() - time0)
  388. # show best graphs and save them to file.
  389. for idx, item in enumerate(alpha_range):
  390. print('when alpha is', item, 'the shortest distance is', dis_best[idx])
  391. print('the corresponding pre-images are')
  392. for g in g_best[idx]:
  393. draw_Letter_graph(g, savepath='results/gk_iam/')
  394. # nx.draw_networkx(g)
  395. # plt.show()
  396. print(g.nodes(data=True))
  397. print(g.edges(data=True))
  398. # compute the corresponding sod in graph space. (alpha range not considered.)
  399. sod_tmp, _ = median_distance(g_best[0], Gn_let)
  400. sod_list.append(sod_tmp)
  401. sod_min_list.append(np.min(sod_tmp))
  402. print('\nsods in graph space: ', sod_list)
  403. print('\nsmallest sod in graph space for each letter: ', sod_min_list)
  404. print('\ntimes:', time_list)
  405. def test_gkiam_mutag():
  406. from gk_iam import gk_iam_nearest_multi, compute_kernel
  407. from iam import median_distance
  408. ds = {'name': 'Letter-high', 'dataset': '../datasets/Letter-high/Letter-high_A.txt',
  409. 'extra_params': {}} # node nsymb
  410. # ds = {'name': 'Letter-med', 'dataset': '../datasets/Letter-med/Letter-med_A.txt',
  411. # 'extra_params': {}} # node nsymb
  412. Gn, y_all = loadDataset(ds['dataset'], extra_params=ds['extra_params'])
  413. gkernel = 'structuralspkernel'
  414. lmbda = 0.03 # termination probalility
  415. r_max = 3 # recursions
  416. # alpha_range = np.linspace(0.5, 0.5, 1)
  417. k = 20 # k nearest neighbors
  418. # classify graphs according to letters.
  419. idx_dict = get_same_item_indices(y_all)
  420. time_list = []
  421. sod_ks_min_list = []
  422. sod_gs_list = []
  423. sod_gs_min_list = []
  424. nb_updated_list = []
  425. for letter in idx_dict:
  426. print('\n-------------------------------------------------------\n')
  427. Gn_let = [Gn[i].copy() for i in idx_dict[letter]]
  428. Gn_mix = Gn_let + [g.copy() for g in Gn_let]
  429. alpha_range = np.linspace(1 / len(Gn_let), 1 / len(Gn_let), 1)
  430. # compute
  431. time0 = time.time()
  432. km = compute_kernel(Gn_mix, gkernel, True)
  433. g_best = []
  434. dis_best = []
  435. # for each alpha
  436. for alpha in alpha_range:
  437. print('alpha =', alpha)
  438. dhat, ghat_list, sod_ks, nb_updated = gk_iam_nearest_multi(Gn_let, Gn_let, [alpha] * len(Gn_let),
  439. range(len(Gn_let), len(Gn_mix)), km,
  440. k, r_max, gkernel, c_ei=1.7,
  441. c_er=1.7, c_es=1.7)
  442. dis_best.append(dhat)
  443. g_best.append(ghat_list)
  444. time_list.append(time.time() - time0)
  445. # show best graphs and save them to file.
  446. for idx, item in enumerate(alpha_range):
  447. print('when alpha is', item, 'the shortest distance is', dis_best[idx])
  448. print('the corresponding pre-images are')
  449. for g in g_best[idx]:
  450. draw_Letter_graph(g, savepath='results/gk_iam/')
  451. # nx.draw_networkx(g)
  452. # plt.show()
  453. print(g.nodes(data=True))
  454. print(g.edges(data=True))
  455. # compute the corresponding sod in graph space. (alpha range not considered.)
  456. sod_tmp, _ = median_distance(g_best[0], Gn_let)
  457. sod_gs_list.append(sod_tmp)
  458. sod_gs_min_list.append(np.min(sod_tmp))
  459. sod_ks_min_list.append(sod_ks)
  460. nb_updated_list.append(nb_updated)
  461. print('\nsods in graph space: ', sod_gs_list)
  462. print('\nsmallest sod in graph space for each letter: ', sod_gs_min_list)
  463. print('\nsmallest sod in kernel space for each letter: ', sod_ks_min_list)
  464. print('\nnumber of updates for each letter: ', nb_updated_list)
  465. print('\ntimes:', time_list)
  466. ###############################################################################
  467. # Re-test.
  468. def retest_the_simple_two():
  469. from gk_iam import gk_iam_nearest_multi, compute_kernel
  470. from iam import median_distance
  471. from test_random_mutag import remove_edges
  472. # The two simple graphs.
  473. # g1 = nx.Graph(name='haha')
  474. # g1.add_nodes_from([(0, {'atom': 'C'}), (1, {'atom': 'O'}), (2, {'atom': 'C'})])
  475. # g1.add_edges_from([(0, 1, {'bond_type': '1'}), (1, 2, {'bond_type': '1'})])
  476. # g2 = nx.Graph(name='hahaha')
  477. # g2.add_nodes_from([(0, {'atom': 'C'}), (1, {'atom': 'O'}), (2, {'atom': 'C'}),
  478. # (3, {'atom': 'O'}), (4, {'atom': 'C'})])
  479. # g2.add_edges_from([(0, 1, {'bond_type': '1'}), (1, 2, {'bond_type': '1'}),
  480. # (2, 3, {'bond_type': '1'}), (3, 4, {'bond_type': '1'})])
  481. g1 = nx.Graph(name='haha')
  482. g1.add_nodes_from([(0, {'atom': 'C'}), (1, {'atom': 'C'}), (2, {'atom': 'C'}),
  483. (3, {'atom': 'S'}), (4, {'atom': 'S'})])
  484. g1.add_edges_from([(0, 1, {'bond_type': '1'}), (1, 2, {'bond_type': '1'}),
  485. (2, 3, {'bond_type': '1'}), (2, 4, {'bond_type': '1'})])
  486. g2 = nx.Graph(name='hahaha')
  487. g2.add_nodes_from([(0, {'atom': 'C'}), (1, {'atom': 'C'}), (2, {'atom': 'C'}),
  488. (3, {'atom': 'O'}), (4, {'atom': 'O'})])
  489. g2.add_edges_from([(0, 1, {'bond_type': '1'}), (1, 2, {'bond_type': '1'}),
  490. (2, 3, {'bond_type': '1'}), (2, 4, {'bond_type': '1'})])
  491. # # randomly select two molecules
  492. # np.random.seed(1)
  493. # idx_gi = [0, 6] # np.random.randint(0, len(Gn), 2)
  494. # g1 = Gn[idx_gi[0]]
  495. # g2 = Gn[idx_gi[1]]
  496. # Gn_mix = [g.copy() for g in Gn]
  497. # Gn_mix.append(g1.copy())
  498. # Gn_mix.append(g2.copy())
  499. Gn = [g1.copy(), g2.copy()]
  500. remove_edges(Gn)
  501. gkernel = 'marginalizedkernel'
  502. lmbda = 0.03 # termination probalility
  503. r_max = 10 # recursions
  504. # l = 500
  505. alpha_range = np.linspace(0.5, 0.5, 1)
  506. k = 2 # k nearest neighbors
  507. epsilon = 1e-6
  508. ged_cost='CHEM_1'
  509. ged_method='IPFP'
  510. saveGXL='gedlib'
  511. c_ei=1
  512. c_er=1
  513. c_es=1
  514. Gn_mix = Gn + [g1.copy(), g2.copy()]
  515. # compute
  516. time0 = time.time()
  517. km = compute_kernel(Gn_mix, gkernel, True)
  518. time_km = time.time() - time0
  519. time_list = []
  520. sod_ks_min_list = []
  521. sod_gs_list = []
  522. sod_gs_min_list = []
  523. nb_updated_list = []
  524. g_best = []
  525. # for each alpha
  526. for alpha in alpha_range:
  527. print('\n-------------------------------------------------------\n')
  528. print('alpha =', alpha)
  529. time0 = time.time()
  530. dhat, ghat_list, sod_ks, nb_updated = gk_iam_nearest_multi(Gn, [g1, g2],
  531. [alpha, 1 - alpha], range(len(Gn), len(Gn) + 2), km, k, r_max,
  532. gkernel, c_ei=c_ei, c_er=c_er, c_es=c_es, epsilon=epsilon,
  533. ged_cost=ged_cost, ged_method=ged_method, saveGXL=saveGXL)
  534. time_total = time.time() - time0 + time_km
  535. print('time: ', time_total)
  536. time_list.append(time_total)
  537. sod_ks_min_list.append(dhat)
  538. g_best.append(ghat_list)
  539. nb_updated_list.append(nb_updated)
  540. # show best graphs and save them to file.
  541. for idx, item in enumerate(alpha_range):
  542. print('when alpha is', item, 'the shortest distance is', sod_ks_min_list[idx])
  543. print('one of the possible corresponding pre-images is')
  544. nx.draw(g_best[idx][0], labels=nx.get_node_attributes(g_best[idx][0], 'atom'),
  545. with_labels=True)
  546. plt.savefig('results/gk_iam/mutag_alpha' + str(item) + '.png', format="PNG")
  547. plt.show()
  548. print(g_best[idx][0].nodes(data=True))
  549. print(g_best[idx][0].edges(data=True))
  550. # for g in g_best[idx]:
  551. # draw_Letter_graph(g, savepath='results/gk_iam/')
  552. ## nx.draw_networkx(g)
  553. ## plt.show()
  554. # print(g.nodes(data=True))
  555. # print(g.edges(data=True))
  556. # compute the corresponding sod in graph space.
  557. for idx, item in enumerate(alpha_range):
  558. sod_tmp, _ = median_distance(g_best[0], [g1, g2], ged_cost=ged_cost,
  559. ged_method=ged_method, saveGXL=saveGXL)
  560. sod_gs_list.append(sod_tmp)
  561. sod_gs_min_list.append(np.min(sod_tmp))
  562. print('\nsods in graph space: ', sod_gs_list)
  563. print('\nsmallest sod in graph space for each alpha: ', sod_gs_min_list)
  564. print('\nsmallest sod in kernel space for each alpha: ', sod_ks_min_list)
  565. print('\nnumber of updates for each alpha: ', nb_updated_list)
  566. print('\ntimes:', time_list)
  567. if __name__ == '__main__':
  568. # ds = {'name': 'MUTAG', 'dataset': '../datasets/MUTAG/MUTAG_A.txt',
  569. # 'extra_params': {}} # node/edge symb
  570. # ds = {'name': 'Letter-high', 'dataset': '../datasets/Letter-high/Letter-high_A.txt',
  571. # 'extra_params': {}} # node nsymb
  572. # ds = {'name': 'Acyclic', 'dataset': '../datasets/monoterpenoides/trainset_9.ds',
  573. # 'extra_params': {}}
  574. # ds = {'name': 'Acyclic', 'dataset': '../datasets/acyclic/dataset_bps.ds',
  575. # 'extra_params': {}} # node symb
  576. # Gn, y_all = loadDataset(ds['dataset'], extra_params=ds['extra_params'])
  577. # Gn = Gn[0:20]
  578. # import networkx.algorithms.isomorphism as iso
  579. # G1 = nx.MultiDiGraph()
  580. # G2 = nx.MultiDiGraph()
  581. # G1.add_nodes_from([1,2,3], fill='red')
  582. # G2.add_nodes_from([10,20,30,40], fill='red')
  583. # nx.add_path(G1, [1,2,3,4], weight=3, linewidth=2.5)
  584. # nx.add_path(G2, [10,20,30,40], weight=3)
  585. # nm = iso.categorical_node_match('fill', 'red')
  586. # print(nx.is_isomorphic(G1, G2, node_match=nm))
  587. #
  588. # test_new_IAM_allGraph_deleteNodes(Gn)
  589. # test_will_IAM_give_the_median_graph_we_wanted(Gn)
  590. # test_who_is_the_closest_in_GED_space(Gn)
  591. # test_who_is_the_closest_in_kernel_space(Gn)
  592. # test_the_simple_two(Gn, 'untilhpathkernel')
  593. # test_remove_bests(Gn, 'untilhpathkernel')
  594. # test_gkiam_letter_h()
  595. # test_iam_letter_h()
  596. # test_random_preimage_letter_h
  597. ###############################################################################
  598. # retests.
  599. retest_the_simple_two()

A Python package for graph kernels, graph edit distances and graph pre-image problem.