You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

test_others.py 27 kB

5 years ago
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686
  1. #!/usr/bin/env python3
  2. # -*- coding: utf-8 -*-
  3. """
  4. Created on Thu Jul 4 12:20:16 2019
  5. @author: ljia
  6. """
  7. import numpy as np
  8. import networkx as nx
  9. import matplotlib.pyplot as plt
  10. import time
  11. from tqdm import tqdm
  12. from gklearn.utils.graphfiles import loadDataset
  13. from gklearn.preimage.median import draw_Letter_graph
  14. from gklearn.preimage.ged import GED, ged_median
  15. from gklearn.preimage.utils import get_same_item_indices, compute_kernel, gram2distances, \
  16. dis_gstar, remove_edges
  17. # --------------------------- These are tests --------------------------------#
  18. def test_who_is_the_closest_in_kernel_space(Gn):
  19. idx_gi = [0, 6]
  20. g1 = Gn[idx_gi[0]]
  21. g2 = Gn[idx_gi[1]]
  22. # create the "median" graph.
  23. gnew = g2.copy()
  24. gnew.remove_node(0)
  25. nx.draw_networkx(gnew)
  26. plt.show()
  27. print(gnew.nodes(data=True))
  28. Gn = [gnew] + Gn
  29. # compute gram matrix
  30. Kmatrix = compute_kernel(Gn, 'untilhpathkernel', True)
  31. # the distance matrix
  32. dmatrix = gram2distances(Kmatrix)
  33. print(np.sort(dmatrix[idx_gi[0] + 1]))
  34. print(np.argsort(dmatrix[idx_gi[0] + 1]))
  35. print(np.sort(dmatrix[idx_gi[1] + 1]))
  36. print(np.argsort(dmatrix[idx_gi[1] + 1]))
  37. # for all g in Gn, compute (d(g1, g) + d(g2, g)) / 2
  38. dis_median = [(dmatrix[i, idx_gi[0] + 1] + dmatrix[i, idx_gi[1] + 1]) / 2 for i in range(len(Gn))]
  39. print(np.sort(dis_median))
  40. print(np.argsort(dis_median))
  41. return
  42. def test_who_is_the_closest_in_GED_space(Gn):
  43. idx_gi = [0, 6]
  44. g1 = Gn[idx_gi[0]]
  45. g2 = Gn[idx_gi[1]]
  46. # create the "median" graph.
  47. gnew = g2.copy()
  48. gnew.remove_node(0)
  49. nx.draw_networkx(gnew)
  50. plt.show()
  51. print(gnew.nodes(data=True))
  52. Gn = [gnew] + Gn
  53. # compute GEDs
  54. ged_matrix = np.zeros((len(Gn), len(Gn)))
  55. for i1 in tqdm(range(len(Gn)), desc='computing GEDs', file=sys.stdout):
  56. for i2 in range(len(Gn)):
  57. dis, _, _ = GED(Gn[i1], Gn[i2], lib='gedlib')
  58. ged_matrix[i1, i2] = dis
  59. print(np.sort(ged_matrix[idx_gi[0] + 1]))
  60. print(np.argsort(ged_matrix[idx_gi[0] + 1]))
  61. print(np.sort(ged_matrix[idx_gi[1] + 1]))
  62. print(np.argsort(ged_matrix[idx_gi[1] + 1]))
  63. # for all g in Gn, compute (GED(g1, g) + GED(g2, g)) / 2
  64. dis_median = [(ged_matrix[i, idx_gi[0] + 1] + ged_matrix[i, idx_gi[1] + 1]) / 2 for i in range(len(Gn))]
  65. print(np.sort(dis_median))
  66. print(np.argsort(dis_median))
  67. return
  68. def test_will_IAM_give_the_median_graph_we_wanted(Gn):
  69. idx_gi = [0, 6]
  70. g1 = Gn[idx_gi[0]].copy()
  71. g2 = Gn[idx_gi[1]].copy()
  72. # del Gn[idx_gi[0]]
  73. # del Gn[idx_gi[1] - 1]
  74. g_median = test_iam_with_more_graphs_as_init([g1, g2], [g1, g2], c_ei=1, c_er=1, c_es=1)
  75. # g_median = test_iam_with_more_graphs_as_init(Gn, Gn, c_ei=1, c_er=1, c_es=1)
  76. nx.draw_networkx(g_median)
  77. plt.show()
  78. print(g_median.nodes(data=True))
  79. print(g_median.edges(data=True))
  80. def test_new_IAM_allGraph_deleteNodes(Gn):
  81. idx_gi = [0, 6]
  82. # g1 = Gn[idx_gi[0]].copy()
  83. # g2 = Gn[idx_gi[1]].copy()
  84. # g1 = nx.Graph(name='haha')
  85. # g1.add_nodes_from([(0, {'atom': 'C'}), (1, {'atom': 'O'}), (2, {'atom': 'C'})])
  86. # g1.add_edges_from([(0, 1, {'bond_type': '1'}), (1, 2, {'bond_type': '1'})])
  87. # g2 = nx.Graph(name='hahaha')
  88. # g2.add_nodes_from([(0, {'atom': 'C'}), (1, {'atom': 'O'}), (2, {'atom': 'C'}),
  89. # (3, {'atom': 'O'}), (4, {'atom': 'C'})])
  90. # g2.add_edges_from([(0, 1, {'bond_type': '1'}), (1, 2, {'bond_type': '1'}),
  91. # (2, 3, {'bond_type': '1'}), (3, 4, {'bond_type': '1'})])
  92. g1 = nx.Graph(name='haha')
  93. g1.add_nodes_from([(0, {'atom': 'C'}), (1, {'atom': 'C'}), (2, {'atom': 'C'}),
  94. (3, {'atom': 'S'}), (4, {'atom': 'S'})])
  95. g1.add_edges_from([(0, 1, {'bond_type': '1'}), (1, 2, {'bond_type': '1'}),
  96. (2, 3, {'bond_type': '1'}), (2, 4, {'bond_type': '1'})])
  97. g2 = nx.Graph(name='hahaha')
  98. g2.add_nodes_from([(0, {'atom': 'C'}), (1, {'atom': 'C'}), (2, {'atom': 'C'}),
  99. (3, {'atom': 'O'}), (4, {'atom': 'O'})])
  100. g2.add_edges_from([(0, 1, {'bond_type': '1'}), (1, 2, {'bond_type': '1'}),
  101. (2, 3, {'bond_type': '1'}), (2, 4, {'bond_type': '1'})])
  102. # g2 = g1.copy()
  103. # g2.add_nodes_from([(3, {'atom': 'O'})])
  104. # g2.add_nodes_from([(4, {'atom': 'C'})])
  105. # g2.add_edges_from([(1, 3, {'bond_type': '1'})])
  106. # g2.add_edges_from([(3, 4, {'bond_type': '1'})])
  107. # del Gn[idx_gi[0]]
  108. # del Gn[idx_gi[1] - 1]
  109. nx.draw_networkx(g1)
  110. plt.show()
  111. print(g1.nodes(data=True))
  112. print(g1.edges(data=True))
  113. nx.draw_networkx(g2)
  114. plt.show()
  115. print(g2.nodes(data=True))
  116. print(g2.edges(data=True))
  117. g_median = test_iam_moreGraphsAsInit_tryAllPossibleBestGraphs_deleteNodesInIterations([g1, g2], [g1, g2], c_ei=1, c_er=1, c_es=1)
  118. # g_median = test_iam_moreGraphsAsInit_tryAllPossibleBestGraphs_deleteNodesInIterations(Gn, Gn, c_ei=1, c_er=1, c_es=1)
  119. nx.draw_networkx(g_median)
  120. plt.show()
  121. print(g_median.nodes(data=True))
  122. print(g_median.edges(data=True))
  123. def test_the_simple_two(Gn, gkernel):
  124. from gk_iam import gk_iam_nearest_multi
  125. lmbda = 0.03 # termination probalility
  126. r_max = 10 # recursions
  127. l = 500
  128. alpha_range = np.linspace(0.5, 0.5, 1)
  129. k = 2 # k nearest neighbors
  130. # randomly select two molecules
  131. np.random.seed(1)
  132. idx_gi = [0, 6] # np.random.randint(0, len(Gn), 2)
  133. g1 = Gn[idx_gi[0]]
  134. g2 = Gn[idx_gi[1]]
  135. Gn_mix = [g.copy() for g in Gn]
  136. Gn_mix.append(g1.copy())
  137. Gn_mix.append(g2.copy())
  138. # g_tmp = iam([g1, g2])
  139. # nx.draw_networkx(g_tmp)
  140. # plt.show()
  141. # compute
  142. # k_list = [] # kernel between each graph and itself.
  143. # k_g1_list = [] # kernel between each graph and g1
  144. # k_g2_list = [] # kernel between each graph and g2
  145. # for ig, g in tqdm(enumerate(Gn), desc='computing self kernels', file=sys.stdout):
  146. # ktemp = compute_kernel([g, g1, g2], 'marginalizedkernel', False)
  147. # k_list.append(ktemp[0][0, 0])
  148. # k_g1_list.append(ktemp[0][0, 1])
  149. # k_g2_list.append(ktemp[0][0, 2])
  150. km = compute_kernel(Gn_mix, gkernel, True)
  151. # k_list = np.diag(km) # kernel between each graph and itself.
  152. # k_g1_list = km[idx_gi[0]] # kernel between each graph and g1
  153. # k_g2_list = km[idx_gi[1]] # kernel between each graph and g2
  154. g_best = []
  155. dis_best = []
  156. # for each alpha
  157. for alpha in alpha_range:
  158. print('alpha =', alpha)
  159. dhat, ghat_list = gk_iam_nearest_multi(Gn, [g1, g2], [alpha, 1 - alpha],
  160. range(len(Gn), len(Gn) + 2), km,
  161. k, r_max,gkernel)
  162. dis_best.append(dhat)
  163. g_best.append(ghat_list)
  164. for idx, item in enumerate(alpha_range):
  165. print('when alpha is', item, 'the shortest distance is', dis_best[idx])
  166. print('the corresponding pre-images are')
  167. for g in g_best[idx]:
  168. nx.draw_networkx(g)
  169. plt.show()
  170. print(g.nodes(data=True))
  171. print(g.edges(data=True))
  172. def test_remove_bests(Gn, gkernel):
  173. from gk_iam import gk_iam_nearest_multi
  174. lmbda = 0.03 # termination probalility
  175. r_max = 10 # recursions
  176. l = 500
  177. alpha_range = np.linspace(0.5, 0.5, 1)
  178. k = 20 # k nearest neighbors
  179. # randomly select two molecules
  180. np.random.seed(1)
  181. idx_gi = [0, 6] # np.random.randint(0, len(Gn), 2)
  182. g1 = Gn[idx_gi[0]]
  183. g2 = Gn[idx_gi[1]]
  184. # remove the best 2 graphs.
  185. del Gn[idx_gi[0]]
  186. del Gn[idx_gi[1] - 1]
  187. # del Gn[8]
  188. Gn_mix = [g.copy() for g in Gn]
  189. Gn_mix.append(g1.copy())
  190. Gn_mix.append(g2.copy())
  191. # compute
  192. km = compute_kernel(Gn_mix, gkernel, True)
  193. g_best = []
  194. dis_best = []
  195. # for each alpha
  196. for alpha in alpha_range:
  197. print('alpha =', alpha)
  198. dhat, ghat_list = gk_iam_nearest_multi(Gn, [g1, g2], [alpha, 1 - alpha],
  199. range(len(Gn), len(Gn) + 2), km,
  200. k, r_max, gkernel)
  201. dis_best.append(dhat)
  202. g_best.append(ghat_list)
  203. for idx, item in enumerate(alpha_range):
  204. print('when alpha is', item, 'the shortest distance is', dis_best[idx])
  205. print('the corresponding pre-images are')
  206. for g in g_best[idx]:
  207. draw_Letter_graph(g)
  208. # nx.draw_networkx(g)
  209. # plt.show()
  210. print(g.nodes(data=True))
  211. print(g.edges(data=True))
  212. ###############################################################################
  213. # Tests on dataset Letter-H.
  214. def test_gkiam_letter_h():
  215. from gk_iam import gk_iam_nearest_multi
  216. ds = {'name': 'Letter-high', 'dataset': '../datasets/Letter-high/Letter-high_A.txt',
  217. 'extra_params': {}} # node nsymb
  218. # ds = {'name': 'Letter-med', 'dataset': '../datasets/Letter-med/Letter-med_A.txt',
  219. # 'extra_params': {}} # node nsymb
  220. Gn, y_all = loadDataset(ds['dataset'], extra_params=ds['extra_params'])
  221. gkernel = 'structuralspkernel'
  222. lmbda = 0.03 # termination probalility
  223. r_max = 3 # recursions
  224. # alpha_range = np.linspace(0.5, 0.5, 1)
  225. k = 10 # k nearest neighbors
  226. # classify graphs according to letters.
  227. idx_dict = get_same_item_indices(y_all)
  228. time_list = []
  229. sod_ks_min_list = []
  230. sod_gs_list = []
  231. sod_gs_min_list = []
  232. nb_updated_list = []
  233. for letter in idx_dict:
  234. print('\n-------------------------------------------------------\n')
  235. Gn_let = [Gn[i].copy() for i in idx_dict[letter]]
  236. Gn_mix = Gn_let + [g.copy() for g in Gn_let]
  237. alpha_range = np.linspace(1 / len(Gn_let), 1 / len(Gn_let), 1)
  238. # compute
  239. time0 = time.time()
  240. km = compute_kernel(Gn_mix, gkernel, True)
  241. g_best = []
  242. dis_best = []
  243. # for each alpha
  244. for alpha in alpha_range:
  245. print('alpha =', alpha)
  246. dhat, ghat_list, sod_ks, nb_updated = gk_iam_nearest_multi(Gn_let,
  247. Gn_let, [alpha] * len(Gn_let), range(len(Gn_let), len(Gn_mix)),
  248. km, k, r_max, gkernel, c_ei=1.7, c_er=1.7, c_es=1.7,
  249. ged_cost='LETTER', ged_method='IPFP', saveGXL='gedlib-letter')
  250. dis_best.append(dhat)
  251. g_best.append(ghat_list)
  252. time_list.append(time.time() - time0)
  253. # show best graphs and save them to file.
  254. for idx, item in enumerate(alpha_range):
  255. print('when alpha is', item, 'the shortest distance is', dis_best[idx])
  256. print('the corresponding pre-images are')
  257. for g in g_best[idx]:
  258. draw_Letter_graph(g, savepath='results/gk_iam/')
  259. # nx.draw_networkx(g)
  260. # plt.show()
  261. print(g.nodes(data=True))
  262. print(g.edges(data=True))
  263. # compute the corresponding sod in graph space. (alpha range not considered.)
  264. sod_tmp, _ = ged_median(g_best[0], Gn_let, ged_cost='LETTER',
  265. ged_method='IPFP', saveGXL='gedlib-letter')
  266. sod_gs_list.append(sod_tmp)
  267. sod_gs_min_list.append(np.min(sod_tmp))
  268. sod_ks_min_list.append(sod_ks)
  269. nb_updated_list.append(nb_updated)
  270. print('\nsods in graph space: ', sod_gs_list)
  271. print('\nsmallest sod in graph space for each letter: ', sod_gs_min_list)
  272. print('\nsmallest sod in kernel space for each letter: ', sod_ks_min_list)
  273. print('\nnumber of updates for each letter: ', nb_updated_list)
  274. print('\ntimes:', time_list)
  275. #def compute_letter_median_by_average(Gn):
  276. # return g_median
  277. def test_iam_letter_h():
  278. from iam import test_iam_moreGraphsAsInit_tryAllPossibleBestGraphs_deleteNodesInIterations
  279. ds = {'name': 'Letter-high', 'dataset': '../datasets/Letter-high/Letter-high_A.txt',
  280. 'extra_params': {}} # node nsymb
  281. # ds = {'name': 'Letter-med', 'dataset': '../datasets/Letter-med/Letter-med_A.txt',
  282. # 'extra_params': {}} # node nsymb
  283. Gn, y_all = loadDataset(ds['dataset'], extra_params=ds['extra_params'])
  284. lmbda = 0.03 # termination probalility
  285. # alpha_range = np.linspace(0.5, 0.5, 1)
  286. # classify graphs according to letters.
  287. idx_dict = get_same_item_indices(y_all)
  288. time_list = []
  289. sod_list = []
  290. sod_min_list = []
  291. for letter in idx_dict:
  292. Gn_let = [Gn[i].copy() for i in idx_dict[letter]]
  293. alpha_range = np.linspace(1 / len(Gn_let), 1 / len(Gn_let), 1)
  294. # compute
  295. g_best = []
  296. dis_best = []
  297. time0 = time.time()
  298. # for each alpha
  299. for alpha in alpha_range:
  300. print('alpha =', alpha)
  301. ghat_list, dhat = test_iam_moreGraphsAsInit_tryAllPossibleBestGraphs_deleteNodesInIterations(
  302. Gn_let, Gn_let, c_ei=1.7, c_er=1.7, c_es=1.7,
  303. ged_cost='LETTER', ged_method='IPFP', saveGXL='gedlib-letter')
  304. dis_best.append(dhat)
  305. g_best.append(ghat_list)
  306. time_list.append(time.time() - time0)
  307. # show best graphs and save them to file.
  308. for idx, item in enumerate(alpha_range):
  309. print('when alpha is', item, 'the shortest distance is', dis_best[idx])
  310. print('the corresponding pre-images are')
  311. for g in g_best[idx]:
  312. draw_Letter_graph(g, savepath='results/iam/')
  313. # nx.draw_networkx(g)
  314. # plt.show()
  315. print(g.nodes(data=True))
  316. print(g.edges(data=True))
  317. # compute the corresponding sod in kernel space. (alpha range not considered.)
  318. gkernel = 'structuralspkernel'
  319. sod_tmp = []
  320. Gn_mix = g_best[0] + Gn_let
  321. km = compute_kernel(Gn_mix, gkernel, True)
  322. for ig, g in tqdm(enumerate(g_best[0]), desc='computing kernel sod', file=sys.stdout):
  323. dtemp = dis_gstar(ig, range(len(g_best[0]), len(Gn_mix)),
  324. [alpha_range[0]] * len(Gn_let), km, withterm3=False)
  325. sod_tmp.append(dtemp)
  326. sod_list.append(sod_tmp)
  327. sod_min_list.append(np.min(sod_tmp))
  328. print('\nsods in kernel space: ', sod_list)
  329. print('\nsmallest sod in kernel space for each letter: ', sod_min_list)
  330. print('\ntimes:', time_list)
  331. def test_random_preimage_letter_h():
  332. from preimage_random import preimage_random
  333. ds = {'name': 'Letter-high', 'dataset': '../datasets/Letter-high/Letter-high_A.txt',
  334. 'extra_params': {}} # node nsymb
  335. # ds = {'name': 'Letter-med', 'dataset': '../datasets/Letter-med/Letter-med_A.txt',
  336. # 'extra_params': {}} # node nsymb
  337. # ds = {'name': 'MUTAG', 'dataset': '../datasets/MUTAG/MUTAG_A.txt',
  338. # 'extra_params': {}} # node/edge symb
  339. # ds = {'name': 'Acyclic', 'dataset': '../datasets/monoterpenoides/trainset_9.ds',
  340. # 'extra_params': {}}
  341. # ds = {'name': 'Acyclic', 'dataset': '../datasets/acyclic/dataset_bps.ds',
  342. # 'extra_params': {}} # node symb
  343. Gn, y_all = loadDataset(ds['dataset'], extra_params=ds['extra_params'])
  344. gkernel = 'structuralspkernel'
  345. # lmbda = 0.03 # termination probalility
  346. r_max = 3 # 10 # recursions
  347. l = 500
  348. # alpha_range = np.linspace(0.5, 0.5, 1)
  349. #alpha_range = np.linspace(0.1, 0.9, 9)
  350. k = 10 # 5 # k nearest neighbors
  351. # classify graphs according to letters.
  352. idx_dict = get_same_item_indices(y_all)
  353. time_list = []
  354. sod_list = []
  355. sod_min_list = []
  356. for letter in idx_dict:
  357. print('\n-------------------------------------------------------\n')
  358. Gn_let = [Gn[i].copy() for i in idx_dict[letter]]
  359. Gn_mix = Gn_let + [g.copy() for g in Gn_let]
  360. alpha_range = np.linspace(1 / len(Gn_let), 1 / len(Gn_let), 1)
  361. # compute
  362. time0 = time.time()
  363. km = compute_kernel(Gn_mix, gkernel, True)
  364. g_best = []
  365. dis_best = []
  366. # for each alpha
  367. for alpha in alpha_range:
  368. print('alpha =', alpha)
  369. dhat, ghat_list = preimage_random(Gn_let, Gn_let, [alpha] * len(Gn_let),
  370. range(len(Gn_let), len(Gn_mix)), km,
  371. k, r_max, gkernel, c_ei=1.7,
  372. c_er=1.7, c_es=1.7)
  373. dis_best.append(dhat)
  374. g_best.append(ghat_list)
  375. time_list.append(time.time() - time0)
  376. # show best graphs and save them to file.
  377. for idx, item in enumerate(alpha_range):
  378. print('when alpha is', item, 'the shortest distance is', dis_best[idx])
  379. print('the corresponding pre-images are')
  380. for g in g_best[idx]:
  381. draw_Letter_graph(g, savepath='results/gk_iam/')
  382. # nx.draw_networkx(g)
  383. # plt.show()
  384. print(g.nodes(data=True))
  385. print(g.edges(data=True))
  386. # compute the corresponding sod in graph space. (alpha range not considered.)
  387. sod_tmp, _ = ged_median(g_best[0], Gn_let)
  388. sod_list.append(sod_tmp)
  389. sod_min_list.append(np.min(sod_tmp))
  390. print('\nsods in graph space: ', sod_list)
  391. print('\nsmallest sod in graph space for each letter: ', sod_min_list)
  392. print('\ntimes:', time_list)
  393. def test_gkiam_mutag():
  394. from gk_iam import gk_iam_nearest_multi
  395. ds = {'name': 'Letter-high', 'dataset': '../datasets/Letter-high/Letter-high_A.txt',
  396. 'extra_params': {}} # node nsymb
  397. # ds = {'name': 'Letter-med', 'dataset': '../datasets/Letter-med/Letter-med_A.txt',
  398. # 'extra_params': {}} # node nsymb
  399. Gn, y_all = loadDataset(ds['dataset'], extra_params=ds['extra_params'])
  400. gkernel = 'structuralspkernel'
  401. lmbda = 0.03 # termination probalility
  402. r_max = 3 # recursions
  403. # alpha_range = np.linspace(0.5, 0.5, 1)
  404. k = 20 # k nearest neighbors
  405. # classify graphs according to letters.
  406. idx_dict = get_same_item_indices(y_all)
  407. time_list = []
  408. sod_ks_min_list = []
  409. sod_gs_list = []
  410. sod_gs_min_list = []
  411. nb_updated_list = []
  412. for letter in idx_dict:
  413. print('\n-------------------------------------------------------\n')
  414. Gn_let = [Gn[i].copy() for i in idx_dict[letter]]
  415. Gn_mix = Gn_let + [g.copy() for g in Gn_let]
  416. alpha_range = np.linspace(1 / len(Gn_let), 1 / len(Gn_let), 1)
  417. # compute
  418. time0 = time.time()
  419. km = compute_kernel(Gn_mix, gkernel, True)
  420. g_best = []
  421. dis_best = []
  422. # for each alpha
  423. for alpha in alpha_range:
  424. print('alpha =', alpha)
  425. dhat, ghat_list, sod_ks, nb_updated = gk_iam_nearest_multi(Gn_let, Gn_let, [alpha] * len(Gn_let),
  426. range(len(Gn_let), len(Gn_mix)), km,
  427. k, r_max, gkernel, c_ei=1.7,
  428. c_er=1.7, c_es=1.7)
  429. dis_best.append(dhat)
  430. g_best.append(ghat_list)
  431. time_list.append(time.time() - time0)
  432. # show best graphs and save them to file.
  433. for idx, item in enumerate(alpha_range):
  434. print('when alpha is', item, 'the shortest distance is', dis_best[idx])
  435. print('the corresponding pre-images are')
  436. for g in g_best[idx]:
  437. draw_Letter_graph(g, savepath='results/gk_iam/')
  438. # nx.draw_networkx(g)
  439. # plt.show()
  440. print(g.nodes(data=True))
  441. print(g.edges(data=True))
  442. # compute the corresponding sod in graph space. (alpha range not considered.)
  443. sod_tmp, _ = ged_median(g_best[0], Gn_let)
  444. sod_gs_list.append(sod_tmp)
  445. sod_gs_min_list.append(np.min(sod_tmp))
  446. sod_ks_min_list.append(sod_ks)
  447. nb_updated_list.append(nb_updated)
  448. print('\nsods in graph space: ', sod_gs_list)
  449. print('\nsmallest sod in graph space for each letter: ', sod_gs_min_list)
  450. print('\nsmallest sod in kernel space for each letter: ', sod_ks_min_list)
  451. print('\nnumber of updates for each letter: ', nb_updated_list)
  452. print('\ntimes:', time_list)
  453. ###############################################################################
  454. # Re-test.
  455. def retest_the_simple_two():
  456. from gk_iam import gk_iam_nearest_multi
  457. # The two simple graphs.
  458. # g1 = nx.Graph(name='haha')
  459. # g1.add_nodes_from([(0, {'atom': 'C'}), (1, {'atom': 'O'}), (2, {'atom': 'C'})])
  460. # g1.add_edges_from([(0, 1, {'bond_type': '1'}), (1, 2, {'bond_type': '1'})])
  461. # g2 = nx.Graph(name='hahaha')
  462. # g2.add_nodes_from([(0, {'atom': 'C'}), (1, {'atom': 'O'}), (2, {'atom': 'C'}),
  463. # (3, {'atom': 'O'}), (4, {'atom': 'C'})])
  464. # g2.add_edges_from([(0, 1, {'bond_type': '1'}), (1, 2, {'bond_type': '1'}),
  465. # (2, 3, {'bond_type': '1'}), (3, 4, {'bond_type': '1'})])
  466. g1 = nx.Graph(name='haha')
  467. g1.add_nodes_from([(0, {'atom': 'C'}), (1, {'atom': 'C'}), (2, {'atom': 'C'}),
  468. (3, {'atom': 'S'}), (4, {'atom': 'S'})])
  469. g1.add_edges_from([(0, 1, {'bond_type': '1'}), (1, 2, {'bond_type': '1'}),
  470. (2, 3, {'bond_type': '1'}), (2, 4, {'bond_type': '1'})])
  471. g2 = nx.Graph(name='hahaha')
  472. g2.add_nodes_from([(0, {'atom': 'C'}), (1, {'atom': 'C'}), (2, {'atom': 'C'}),
  473. (3, {'atom': 'O'}), (4, {'atom': 'O'})])
  474. g2.add_edges_from([(0, 1, {'bond_type': '1'}), (1, 2, {'bond_type': '1'}),
  475. (2, 3, {'bond_type': '1'}), (2, 4, {'bond_type': '1'})])
  476. # # randomly select two molecules
  477. # np.random.seed(1)
  478. # idx_gi = [0, 6] # np.random.randint(0, len(Gn), 2)
  479. # g1 = Gn[idx_gi[0]]
  480. # g2 = Gn[idx_gi[1]]
  481. # Gn_mix = [g.copy() for g in Gn]
  482. # Gn_mix.append(g1.copy())
  483. # Gn_mix.append(g2.copy())
  484. Gn = [g1.copy(), g2.copy()]
  485. remove_edges(Gn)
  486. gkernel = 'marginalizedkernel'
  487. lmbda = 0.03 # termination probalility
  488. r_max = 10 # recursions
  489. # l = 500
  490. alpha_range = np.linspace(0.5, 0.5, 1)
  491. k = 2 # k nearest neighbors
  492. epsilon = 1e-6
  493. ged_cost='CHEM_1'
  494. ged_method='IPFP'
  495. saveGXL='gedlib'
  496. c_ei=1
  497. c_er=1
  498. c_es=1
  499. Gn_mix = Gn + [g1.copy(), g2.copy()]
  500. # compute
  501. time0 = time.time()
  502. km = compute_kernel(Gn_mix, gkernel, True)
  503. time_km = time.time() - time0
  504. time_list = []
  505. sod_ks_min_list = []
  506. sod_gs_list = []
  507. sod_gs_min_list = []
  508. nb_updated_list = []
  509. g_best = []
  510. # for each alpha
  511. for alpha in alpha_range:
  512. print('\n-------------------------------------------------------\n')
  513. print('alpha =', alpha)
  514. time0 = time.time()
  515. dhat, ghat_list, sod_ks, nb_updated = gk_iam_nearest_multi(Gn, [g1, g2],
  516. [alpha, 1 - alpha], range(len(Gn), len(Gn) + 2), km, k, r_max,
  517. gkernel, c_ei=c_ei, c_er=c_er, c_es=c_es, epsilon=epsilon,
  518. ged_cost=ged_cost, ged_method=ged_method, saveGXL=saveGXL)
  519. time_total = time.time() - time0 + time_km
  520. print('time: ', time_total)
  521. time_list.append(time_total)
  522. sod_ks_min_list.append(dhat)
  523. g_best.append(ghat_list)
  524. nb_updated_list.append(nb_updated)
  525. # show best graphs and save them to file.
  526. for idx, item in enumerate(alpha_range):
  527. print('when alpha is', item, 'the shortest distance is', sod_ks_min_list[idx])
  528. print('one of the possible corresponding pre-images is')
  529. nx.draw(g_best[idx][0], labels=nx.get_node_attributes(g_best[idx][0], 'atom'),
  530. with_labels=True)
  531. plt.savefig('results/gk_iam/mutag_alpha' + str(item) + '.png', format="PNG")
  532. plt.show()
  533. print(g_best[idx][0].nodes(data=True))
  534. print(g_best[idx][0].edges(data=True))
  535. # for g in g_best[idx]:
  536. # draw_Letter_graph(g, savepath='results/gk_iam/')
  537. ## nx.draw_networkx(g)
  538. ## plt.show()
  539. # print(g.nodes(data=True))
  540. # print(g.edges(data=True))
  541. # compute the corresponding sod in graph space.
  542. for idx, item in enumerate(alpha_range):
  543. sod_tmp, _ = ged_median(g_best[0], [g1, g2], ged_cost=ged_cost,
  544. ged_method=ged_method, saveGXL=saveGXL)
  545. sod_gs_list.append(sod_tmp)
  546. sod_gs_min_list.append(np.min(sod_tmp))
  547. print('\nsods in graph space: ', sod_gs_list)
  548. print('\nsmallest sod in graph space for each alpha: ', sod_gs_min_list)
  549. print('\nsmallest sod in kernel space for each alpha: ', sod_ks_min_list)
  550. print('\nnumber of updates for each alpha: ', nb_updated_list)
  551. print('\ntimes:', time_list)
  552. if __name__ == '__main__':
  553. # ds = {'name': 'MUTAG', 'dataset': '../datasets/MUTAG/MUTAG_A.txt',
  554. # 'extra_params': {}} # node/edge symb
  555. # ds = {'name': 'Letter-high', 'dataset': '../datasets/Letter-high/Letter-high_A.txt',
  556. # 'extra_params': {}} # node nsymb
  557. # ds = {'name': 'Acyclic', 'dataset': '../datasets/monoterpenoides/trainset_9.ds',
  558. # 'extra_params': {}}
  559. # ds = {'name': 'Acyclic', 'dataset': '../datasets/acyclic/dataset_bps.ds',
  560. # 'extra_params': {}} # node symb
  561. # Gn, y_all = loadDataset(ds['dataset'], extra_params=ds['extra_params'])
  562. # Gn = Gn[0:20]
  563. # import networkx.algorithms.isomorphism as iso
  564. # G1 = nx.MultiDiGraph()
  565. # G2 = nx.MultiDiGraph()
  566. # G1.add_nodes_from([1,2,3], fill='red')
  567. # G2.add_nodes_from([10,20,30,40], fill='red')
  568. # nx.add_path(G1, [1,2,3,4], weight=3, linewidth=2.5)
  569. # nx.add_path(G2, [10,20,30,40], weight=3)
  570. # nm = iso.categorical_node_match('fill', 'red')
  571. # print(nx.is_isomorphic(G1, G2, node_match=nm))
  572. #
  573. # test_new_IAM_allGraph_deleteNodes(Gn)
  574. # test_will_IAM_give_the_median_graph_we_wanted(Gn)
  575. # test_who_is_the_closest_in_GED_space(Gn)
  576. # test_who_is_the_closest_in_kernel_space(Gn)
  577. # test_the_simple_two(Gn, 'untilhpathkernel')
  578. # test_remove_bests(Gn, 'untilhpathkernel')
  579. # test_gkiam_letter_h()
  580. # test_iam_letter_h()
  581. # test_random_preimage_letter_h
  582. ###############################################################################
  583. # retests.
  584. retest_the_simple_two()

A Python package for graph kernels, graph edit distances and graph pre-image problem.