You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

test_others.py 27 kB

5 years ago
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688
  1. #!/usr/bin/env python3
  2. # -*- coding: utf-8 -*-
  3. """
  4. Created on Thu Jul 4 12:20:16 2019
  5. @author: ljia
  6. """
  7. import numpy as np
  8. import networkx as nx
  9. import matplotlib.pyplot as plt
  10. import time
  11. from tqdm import tqdm
  12. import sys
  13. sys.path.insert(0, "../")
  14. from gklearn.utils.graphfiles import loadDataset
  15. from median import draw_Letter_graph
  16. from ged import GED, ged_median
  17. from utils import get_same_item_indices, compute_kernel, gram2distances, \
  18. dis_gstar, remove_edges
  19. # --------------------------- These are tests --------------------------------#
  20. def test_who_is_the_closest_in_kernel_space(Gn):
  21. idx_gi = [0, 6]
  22. g1 = Gn[idx_gi[0]]
  23. g2 = Gn[idx_gi[1]]
  24. # create the "median" graph.
  25. gnew = g2.copy()
  26. gnew.remove_node(0)
  27. nx.draw_networkx(gnew)
  28. plt.show()
  29. print(gnew.nodes(data=True))
  30. Gn = [gnew] + Gn
  31. # compute gram matrix
  32. Kmatrix = compute_kernel(Gn, 'untilhpathkernel', True)
  33. # the distance matrix
  34. dmatrix = gram2distances(Kmatrix)
  35. print(np.sort(dmatrix[idx_gi[0] + 1]))
  36. print(np.argsort(dmatrix[idx_gi[0] + 1]))
  37. print(np.sort(dmatrix[idx_gi[1] + 1]))
  38. print(np.argsort(dmatrix[idx_gi[1] + 1]))
  39. # for all g in Gn, compute (d(g1, g) + d(g2, g)) / 2
  40. dis_median = [(dmatrix[i, idx_gi[0] + 1] + dmatrix[i, idx_gi[1] + 1]) / 2 for i in range(len(Gn))]
  41. print(np.sort(dis_median))
  42. print(np.argsort(dis_median))
  43. return
  44. def test_who_is_the_closest_in_GED_space(Gn):
  45. idx_gi = [0, 6]
  46. g1 = Gn[idx_gi[0]]
  47. g2 = Gn[idx_gi[1]]
  48. # create the "median" graph.
  49. gnew = g2.copy()
  50. gnew.remove_node(0)
  51. nx.draw_networkx(gnew)
  52. plt.show()
  53. print(gnew.nodes(data=True))
  54. Gn = [gnew] + Gn
  55. # compute GEDs
  56. ged_matrix = np.zeros((len(Gn), len(Gn)))
  57. for i1 in tqdm(range(len(Gn)), desc='computing GEDs', file=sys.stdout):
  58. for i2 in range(len(Gn)):
  59. dis, _, _ = GED(Gn[i1], Gn[i2], lib='gedlib')
  60. ged_matrix[i1, i2] = dis
  61. print(np.sort(ged_matrix[idx_gi[0] + 1]))
  62. print(np.argsort(ged_matrix[idx_gi[0] + 1]))
  63. print(np.sort(ged_matrix[idx_gi[1] + 1]))
  64. print(np.argsort(ged_matrix[idx_gi[1] + 1]))
  65. # for all g in Gn, compute (GED(g1, g) + GED(g2, g)) / 2
  66. dis_median = [(ged_matrix[i, idx_gi[0] + 1] + ged_matrix[i, idx_gi[1] + 1]) / 2 for i in range(len(Gn))]
  67. print(np.sort(dis_median))
  68. print(np.argsort(dis_median))
  69. return
  70. def test_will_IAM_give_the_median_graph_we_wanted(Gn):
  71. idx_gi = [0, 6]
  72. g1 = Gn[idx_gi[0]].copy()
  73. g2 = Gn[idx_gi[1]].copy()
  74. # del Gn[idx_gi[0]]
  75. # del Gn[idx_gi[1] - 1]
  76. g_median = test_iam_with_more_graphs_as_init([g1, g2], [g1, g2], c_ei=1, c_er=1, c_es=1)
  77. # g_median = test_iam_with_more_graphs_as_init(Gn, Gn, c_ei=1, c_er=1, c_es=1)
  78. nx.draw_networkx(g_median)
  79. plt.show()
  80. print(g_median.nodes(data=True))
  81. print(g_median.edges(data=True))
  82. def test_new_IAM_allGraph_deleteNodes(Gn):
  83. idx_gi = [0, 6]
  84. # g1 = Gn[idx_gi[0]].copy()
  85. # g2 = Gn[idx_gi[1]].copy()
  86. # g1 = nx.Graph(name='haha')
  87. # g1.add_nodes_from([(0, {'atom': 'C'}), (1, {'atom': 'O'}), (2, {'atom': 'C'})])
  88. # g1.add_edges_from([(0, 1, {'bond_type': '1'}), (1, 2, {'bond_type': '1'})])
  89. # g2 = nx.Graph(name='hahaha')
  90. # g2.add_nodes_from([(0, {'atom': 'C'}), (1, {'atom': 'O'}), (2, {'atom': 'C'}),
  91. # (3, {'atom': 'O'}), (4, {'atom': 'C'})])
  92. # g2.add_edges_from([(0, 1, {'bond_type': '1'}), (1, 2, {'bond_type': '1'}),
  93. # (2, 3, {'bond_type': '1'}), (3, 4, {'bond_type': '1'})])
  94. g1 = nx.Graph(name='haha')
  95. g1.add_nodes_from([(0, {'atom': 'C'}), (1, {'atom': 'C'}), (2, {'atom': 'C'}),
  96. (3, {'atom': 'S'}), (4, {'atom': 'S'})])
  97. g1.add_edges_from([(0, 1, {'bond_type': '1'}), (1, 2, {'bond_type': '1'}),
  98. (2, 3, {'bond_type': '1'}), (2, 4, {'bond_type': '1'})])
  99. g2 = nx.Graph(name='hahaha')
  100. g2.add_nodes_from([(0, {'atom': 'C'}), (1, {'atom': 'C'}), (2, {'atom': 'C'}),
  101. (3, {'atom': 'O'}), (4, {'atom': 'O'})])
  102. g2.add_edges_from([(0, 1, {'bond_type': '1'}), (1, 2, {'bond_type': '1'}),
  103. (2, 3, {'bond_type': '1'}), (2, 4, {'bond_type': '1'})])
  104. # g2 = g1.copy()
  105. # g2.add_nodes_from([(3, {'atom': 'O'})])
  106. # g2.add_nodes_from([(4, {'atom': 'C'})])
  107. # g2.add_edges_from([(1, 3, {'bond_type': '1'})])
  108. # g2.add_edges_from([(3, 4, {'bond_type': '1'})])
  109. # del Gn[idx_gi[0]]
  110. # del Gn[idx_gi[1] - 1]
  111. nx.draw_networkx(g1)
  112. plt.show()
  113. print(g1.nodes(data=True))
  114. print(g1.edges(data=True))
  115. nx.draw_networkx(g2)
  116. plt.show()
  117. print(g2.nodes(data=True))
  118. print(g2.edges(data=True))
  119. g_median = test_iam_moreGraphsAsInit_tryAllPossibleBestGraphs_deleteNodesInIterations([g1, g2], [g1, g2], c_ei=1, c_er=1, c_es=1)
  120. # g_median = test_iam_moreGraphsAsInit_tryAllPossibleBestGraphs_deleteNodesInIterations(Gn, Gn, c_ei=1, c_er=1, c_es=1)
  121. nx.draw_networkx(g_median)
  122. plt.show()
  123. print(g_median.nodes(data=True))
  124. print(g_median.edges(data=True))
  125. def test_the_simple_two(Gn, gkernel):
  126. from gk_iam import gk_iam_nearest_multi
  127. lmbda = 0.03 # termination probalility
  128. r_max = 10 # recursions
  129. l = 500
  130. alpha_range = np.linspace(0.5, 0.5, 1)
  131. k = 2 # k nearest neighbors
  132. # randomly select two molecules
  133. np.random.seed(1)
  134. idx_gi = [0, 6] # np.random.randint(0, len(Gn), 2)
  135. g1 = Gn[idx_gi[0]]
  136. g2 = Gn[idx_gi[1]]
  137. Gn_mix = [g.copy() for g in Gn]
  138. Gn_mix.append(g1.copy())
  139. Gn_mix.append(g2.copy())
  140. # g_tmp = iam([g1, g2])
  141. # nx.draw_networkx(g_tmp)
  142. # plt.show()
  143. # compute
  144. # k_list = [] # kernel between each graph and itself.
  145. # k_g1_list = [] # kernel between each graph and g1
  146. # k_g2_list = [] # kernel between each graph and g2
  147. # for ig, g in tqdm(enumerate(Gn), desc='computing self kernels', file=sys.stdout):
  148. # ktemp = compute_kernel([g, g1, g2], 'marginalizedkernel', False)
  149. # k_list.append(ktemp[0][0, 0])
  150. # k_g1_list.append(ktemp[0][0, 1])
  151. # k_g2_list.append(ktemp[0][0, 2])
  152. km = compute_kernel(Gn_mix, gkernel, True)
  153. # k_list = np.diag(km) # kernel between each graph and itself.
  154. # k_g1_list = km[idx_gi[0]] # kernel between each graph and g1
  155. # k_g2_list = km[idx_gi[1]] # kernel between each graph and g2
  156. g_best = []
  157. dis_best = []
  158. # for each alpha
  159. for alpha in alpha_range:
  160. print('alpha =', alpha)
  161. dhat, ghat_list = gk_iam_nearest_multi(Gn, [g1, g2], [alpha, 1 - alpha],
  162. range(len(Gn), len(Gn) + 2), km,
  163. k, r_max,gkernel)
  164. dis_best.append(dhat)
  165. g_best.append(ghat_list)
  166. for idx, item in enumerate(alpha_range):
  167. print('when alpha is', item, 'the shortest distance is', dis_best[idx])
  168. print('the corresponding pre-images are')
  169. for g in g_best[idx]:
  170. nx.draw_networkx(g)
  171. plt.show()
  172. print(g.nodes(data=True))
  173. print(g.edges(data=True))
  174. def test_remove_bests(Gn, gkernel):
  175. from gk_iam import gk_iam_nearest_multi
  176. lmbda = 0.03 # termination probalility
  177. r_max = 10 # recursions
  178. l = 500
  179. alpha_range = np.linspace(0.5, 0.5, 1)
  180. k = 20 # k nearest neighbors
  181. # randomly select two molecules
  182. np.random.seed(1)
  183. idx_gi = [0, 6] # np.random.randint(0, len(Gn), 2)
  184. g1 = Gn[idx_gi[0]]
  185. g2 = Gn[idx_gi[1]]
  186. # remove the best 2 graphs.
  187. del Gn[idx_gi[0]]
  188. del Gn[idx_gi[1] - 1]
  189. # del Gn[8]
  190. Gn_mix = [g.copy() for g in Gn]
  191. Gn_mix.append(g1.copy())
  192. Gn_mix.append(g2.copy())
  193. # compute
  194. km = compute_kernel(Gn_mix, gkernel, True)
  195. g_best = []
  196. dis_best = []
  197. # for each alpha
  198. for alpha in alpha_range:
  199. print('alpha =', alpha)
  200. dhat, ghat_list = gk_iam_nearest_multi(Gn, [g1, g2], [alpha, 1 - alpha],
  201. range(len(Gn), len(Gn) + 2), km,
  202. k, r_max, gkernel)
  203. dis_best.append(dhat)
  204. g_best.append(ghat_list)
  205. for idx, item in enumerate(alpha_range):
  206. print('when alpha is', item, 'the shortest distance is', dis_best[idx])
  207. print('the corresponding pre-images are')
  208. for g in g_best[idx]:
  209. draw_Letter_graph(g)
  210. # nx.draw_networkx(g)
  211. # plt.show()
  212. print(g.nodes(data=True))
  213. print(g.edges(data=True))
  214. ###############################################################################
  215. # Tests on dataset Letter-H.
  216. def test_gkiam_letter_h():
  217. from gk_iam import gk_iam_nearest_multi
  218. ds = {'name': 'Letter-high', 'dataset': '../datasets/Letter-high/Letter-high_A.txt',
  219. 'extra_params': {}} # node nsymb
  220. # ds = {'name': 'Letter-med', 'dataset': '../datasets/Letter-med/Letter-med_A.txt',
  221. # 'extra_params': {}} # node nsymb
  222. Gn, y_all = loadDataset(ds['dataset'], extra_params=ds['extra_params'])
  223. gkernel = 'structuralspkernel'
  224. lmbda = 0.03 # termination probalility
  225. r_max = 3 # recursions
  226. # alpha_range = np.linspace(0.5, 0.5, 1)
  227. k = 10 # k nearest neighbors
  228. # classify graphs according to letters.
  229. idx_dict = get_same_item_indices(y_all)
  230. time_list = []
  231. sod_ks_min_list = []
  232. sod_gs_list = []
  233. sod_gs_min_list = []
  234. nb_updated_list = []
  235. for letter in idx_dict:
  236. print('\n-------------------------------------------------------\n')
  237. Gn_let = [Gn[i].copy() for i in idx_dict[letter]]
  238. Gn_mix = Gn_let + [g.copy() for g in Gn_let]
  239. alpha_range = np.linspace(1 / len(Gn_let), 1 / len(Gn_let), 1)
  240. # compute
  241. time0 = time.time()
  242. km = compute_kernel(Gn_mix, gkernel, True)
  243. g_best = []
  244. dis_best = []
  245. # for each alpha
  246. for alpha in alpha_range:
  247. print('alpha =', alpha)
  248. dhat, ghat_list, sod_ks, nb_updated = gk_iam_nearest_multi(Gn_let,
  249. Gn_let, [alpha] * len(Gn_let), range(len(Gn_let), len(Gn_mix)),
  250. km, k, r_max, gkernel, c_ei=1.7, c_er=1.7, c_es=1.7,
  251. ged_cost='LETTER', ged_method='IPFP', saveGXL='gedlib-letter')
  252. dis_best.append(dhat)
  253. g_best.append(ghat_list)
  254. time_list.append(time.time() - time0)
  255. # show best graphs and save them to file.
  256. for idx, item in enumerate(alpha_range):
  257. print('when alpha is', item, 'the shortest distance is', dis_best[idx])
  258. print('the corresponding pre-images are')
  259. for g in g_best[idx]:
  260. draw_Letter_graph(g, savepath='results/gk_iam/')
  261. # nx.draw_networkx(g)
  262. # plt.show()
  263. print(g.nodes(data=True))
  264. print(g.edges(data=True))
  265. # compute the corresponding sod in graph space. (alpha range not considered.)
  266. sod_tmp, _ = ged_median(g_best[0], Gn_let, ged_cost='LETTER',
  267. ged_method='IPFP', saveGXL='gedlib-letter')
  268. sod_gs_list.append(sod_tmp)
  269. sod_gs_min_list.append(np.min(sod_tmp))
  270. sod_ks_min_list.append(sod_ks)
  271. nb_updated_list.append(nb_updated)
  272. print('\nsods in graph space: ', sod_gs_list)
  273. print('\nsmallest sod in graph space for each letter: ', sod_gs_min_list)
  274. print('\nsmallest sod in kernel space for each letter: ', sod_ks_min_list)
  275. print('\nnumber of updates for each letter: ', nb_updated_list)
  276. print('\ntimes:', time_list)
  277. #def compute_letter_median_by_average(Gn):
  278. # return g_median
  279. def test_iam_letter_h():
  280. from iam import test_iam_moreGraphsAsInit_tryAllPossibleBestGraphs_deleteNodesInIterations
  281. ds = {'name': 'Letter-high', 'dataset': '../datasets/Letter-high/Letter-high_A.txt',
  282. 'extra_params': {}} # node nsymb
  283. # ds = {'name': 'Letter-med', 'dataset': '../datasets/Letter-med/Letter-med_A.txt',
  284. # 'extra_params': {}} # node nsymb
  285. Gn, y_all = loadDataset(ds['dataset'], extra_params=ds['extra_params'])
  286. lmbda = 0.03 # termination probalility
  287. # alpha_range = np.linspace(0.5, 0.5, 1)
  288. # classify graphs according to letters.
  289. idx_dict = get_same_item_indices(y_all)
  290. time_list = []
  291. sod_list = []
  292. sod_min_list = []
  293. for letter in idx_dict:
  294. Gn_let = [Gn[i].copy() for i in idx_dict[letter]]
  295. alpha_range = np.linspace(1 / len(Gn_let), 1 / len(Gn_let), 1)
  296. # compute
  297. g_best = []
  298. dis_best = []
  299. time0 = time.time()
  300. # for each alpha
  301. for alpha in alpha_range:
  302. print('alpha =', alpha)
  303. ghat_list, dhat = test_iam_moreGraphsAsInit_tryAllPossibleBestGraphs_deleteNodesInIterations(
  304. Gn_let, Gn_let, c_ei=1.7, c_er=1.7, c_es=1.7,
  305. ged_cost='LETTER', ged_method='IPFP', saveGXL='gedlib-letter')
  306. dis_best.append(dhat)
  307. g_best.append(ghat_list)
  308. time_list.append(time.time() - time0)
  309. # show best graphs and save them to file.
  310. for idx, item in enumerate(alpha_range):
  311. print('when alpha is', item, 'the shortest distance is', dis_best[idx])
  312. print('the corresponding pre-images are')
  313. for g in g_best[idx]:
  314. draw_Letter_graph(g, savepath='results/iam/')
  315. # nx.draw_networkx(g)
  316. # plt.show()
  317. print(g.nodes(data=True))
  318. print(g.edges(data=True))
  319. # compute the corresponding sod in kernel space. (alpha range not considered.)
  320. gkernel = 'structuralspkernel'
  321. sod_tmp = []
  322. Gn_mix = g_best[0] + Gn_let
  323. km = compute_kernel(Gn_mix, gkernel, True)
  324. for ig, g in tqdm(enumerate(g_best[0]), desc='computing kernel sod', file=sys.stdout):
  325. dtemp = dis_gstar(ig, range(len(g_best[0]), len(Gn_mix)),
  326. [alpha_range[0]] * len(Gn_let), km, withterm3=False)
  327. sod_tmp.append(dtemp)
  328. sod_list.append(sod_tmp)
  329. sod_min_list.append(np.min(sod_tmp))
  330. print('\nsods in kernel space: ', sod_list)
  331. print('\nsmallest sod in kernel space for each letter: ', sod_min_list)
  332. print('\ntimes:', time_list)
  333. def test_random_preimage_letter_h():
  334. from preimage_random import preimage_random
  335. ds = {'name': 'Letter-high', 'dataset': '../datasets/Letter-high/Letter-high_A.txt',
  336. 'extra_params': {}} # node nsymb
  337. # ds = {'name': 'Letter-med', 'dataset': '../datasets/Letter-med/Letter-med_A.txt',
  338. # 'extra_params': {}} # node nsymb
  339. # ds = {'name': 'MUTAG', 'dataset': '../datasets/MUTAG/MUTAG_A.txt',
  340. # 'extra_params': {}} # node/edge symb
  341. # ds = {'name': 'Acyclic', 'dataset': '../datasets/monoterpenoides/trainset_9.ds',
  342. # 'extra_params': {}}
  343. # ds = {'name': 'Acyclic', 'dataset': '../datasets/acyclic/dataset_bps.ds',
  344. # 'extra_params': {}} # node symb
  345. Gn, y_all = loadDataset(ds['dataset'], extra_params=ds['extra_params'])
  346. gkernel = 'structuralspkernel'
  347. # lmbda = 0.03 # termination probalility
  348. r_max = 3 # 10 # recursions
  349. l = 500
  350. # alpha_range = np.linspace(0.5, 0.5, 1)
  351. #alpha_range = np.linspace(0.1, 0.9, 9)
  352. k = 10 # 5 # k nearest neighbors
  353. # classify graphs according to letters.
  354. idx_dict = get_same_item_indices(y_all)
  355. time_list = []
  356. sod_list = []
  357. sod_min_list = []
  358. for letter in idx_dict:
  359. print('\n-------------------------------------------------------\n')
  360. Gn_let = [Gn[i].copy() for i in idx_dict[letter]]
  361. Gn_mix = Gn_let + [g.copy() for g in Gn_let]
  362. alpha_range = np.linspace(1 / len(Gn_let), 1 / len(Gn_let), 1)
  363. # compute
  364. time0 = time.time()
  365. km = compute_kernel(Gn_mix, gkernel, True)
  366. g_best = []
  367. dis_best = []
  368. # for each alpha
  369. for alpha in alpha_range:
  370. print('alpha =', alpha)
  371. dhat, ghat_list = preimage_random(Gn_let, Gn_let, [alpha] * len(Gn_let),
  372. range(len(Gn_let), len(Gn_mix)), km,
  373. k, r_max, gkernel, c_ei=1.7,
  374. c_er=1.7, c_es=1.7)
  375. dis_best.append(dhat)
  376. g_best.append(ghat_list)
  377. time_list.append(time.time() - time0)
  378. # show best graphs and save them to file.
  379. for idx, item in enumerate(alpha_range):
  380. print('when alpha is', item, 'the shortest distance is', dis_best[idx])
  381. print('the corresponding pre-images are')
  382. for g in g_best[idx]:
  383. draw_Letter_graph(g, savepath='results/gk_iam/')
  384. # nx.draw_networkx(g)
  385. # plt.show()
  386. print(g.nodes(data=True))
  387. print(g.edges(data=True))
  388. # compute the corresponding sod in graph space. (alpha range not considered.)
  389. sod_tmp, _ = ged_median(g_best[0], Gn_let)
  390. sod_list.append(sod_tmp)
  391. sod_min_list.append(np.min(sod_tmp))
  392. print('\nsods in graph space: ', sod_list)
  393. print('\nsmallest sod in graph space for each letter: ', sod_min_list)
  394. print('\ntimes:', time_list)
  395. def test_gkiam_mutag():
  396. from gk_iam import gk_iam_nearest_multi
  397. ds = {'name': 'Letter-high', 'dataset': '../datasets/Letter-high/Letter-high_A.txt',
  398. 'extra_params': {}} # node nsymb
  399. # ds = {'name': 'Letter-med', 'dataset': '../datasets/Letter-med/Letter-med_A.txt',
  400. # 'extra_params': {}} # node nsymb
  401. Gn, y_all = loadDataset(ds['dataset'], extra_params=ds['extra_params'])
  402. gkernel = 'structuralspkernel'
  403. lmbda = 0.03 # termination probalility
  404. r_max = 3 # recursions
  405. # alpha_range = np.linspace(0.5, 0.5, 1)
  406. k = 20 # k nearest neighbors
  407. # classify graphs according to letters.
  408. idx_dict = get_same_item_indices(y_all)
  409. time_list = []
  410. sod_ks_min_list = []
  411. sod_gs_list = []
  412. sod_gs_min_list = []
  413. nb_updated_list = []
  414. for letter in idx_dict:
  415. print('\n-------------------------------------------------------\n')
  416. Gn_let = [Gn[i].copy() for i in idx_dict[letter]]
  417. Gn_mix = Gn_let + [g.copy() for g in Gn_let]
  418. alpha_range = np.linspace(1 / len(Gn_let), 1 / len(Gn_let), 1)
  419. # compute
  420. time0 = time.time()
  421. km = compute_kernel(Gn_mix, gkernel, True)
  422. g_best = []
  423. dis_best = []
  424. # for each alpha
  425. for alpha in alpha_range:
  426. print('alpha =', alpha)
  427. dhat, ghat_list, sod_ks, nb_updated = gk_iam_nearest_multi(Gn_let, Gn_let, [alpha] * len(Gn_let),
  428. range(len(Gn_let), len(Gn_mix)), km,
  429. k, r_max, gkernel, c_ei=1.7,
  430. c_er=1.7, c_es=1.7)
  431. dis_best.append(dhat)
  432. g_best.append(ghat_list)
  433. time_list.append(time.time() - time0)
  434. # show best graphs and save them to file.
  435. for idx, item in enumerate(alpha_range):
  436. print('when alpha is', item, 'the shortest distance is', dis_best[idx])
  437. print('the corresponding pre-images are')
  438. for g in g_best[idx]:
  439. draw_Letter_graph(g, savepath='results/gk_iam/')
  440. # nx.draw_networkx(g)
  441. # plt.show()
  442. print(g.nodes(data=True))
  443. print(g.edges(data=True))
  444. # compute the corresponding sod in graph space. (alpha range not considered.)
  445. sod_tmp, _ = ged_median(g_best[0], Gn_let)
  446. sod_gs_list.append(sod_tmp)
  447. sod_gs_min_list.append(np.min(sod_tmp))
  448. sod_ks_min_list.append(sod_ks)
  449. nb_updated_list.append(nb_updated)
  450. print('\nsods in graph space: ', sod_gs_list)
  451. print('\nsmallest sod in graph space for each letter: ', sod_gs_min_list)
  452. print('\nsmallest sod in kernel space for each letter: ', sod_ks_min_list)
  453. print('\nnumber of updates for each letter: ', nb_updated_list)
  454. print('\ntimes:', time_list)
  455. ###############################################################################
  456. # Re-test.
  457. def retest_the_simple_two():
  458. from gk_iam import gk_iam_nearest_multi
  459. # The two simple graphs.
  460. # g1 = nx.Graph(name='haha')
  461. # g1.add_nodes_from([(0, {'atom': 'C'}), (1, {'atom': 'O'}), (2, {'atom': 'C'})])
  462. # g1.add_edges_from([(0, 1, {'bond_type': '1'}), (1, 2, {'bond_type': '1'})])
  463. # g2 = nx.Graph(name='hahaha')
  464. # g2.add_nodes_from([(0, {'atom': 'C'}), (1, {'atom': 'O'}), (2, {'atom': 'C'}),
  465. # (3, {'atom': 'O'}), (4, {'atom': 'C'})])
  466. # g2.add_edges_from([(0, 1, {'bond_type': '1'}), (1, 2, {'bond_type': '1'}),
  467. # (2, 3, {'bond_type': '1'}), (3, 4, {'bond_type': '1'})])
  468. g1 = nx.Graph(name='haha')
  469. g1.add_nodes_from([(0, {'atom': 'C'}), (1, {'atom': 'C'}), (2, {'atom': 'C'}),
  470. (3, {'atom': 'S'}), (4, {'atom': 'S'})])
  471. g1.add_edges_from([(0, 1, {'bond_type': '1'}), (1, 2, {'bond_type': '1'}),
  472. (2, 3, {'bond_type': '1'}), (2, 4, {'bond_type': '1'})])
  473. g2 = nx.Graph(name='hahaha')
  474. g2.add_nodes_from([(0, {'atom': 'C'}), (1, {'atom': 'C'}), (2, {'atom': 'C'}),
  475. (3, {'atom': 'O'}), (4, {'atom': 'O'})])
  476. g2.add_edges_from([(0, 1, {'bond_type': '1'}), (1, 2, {'bond_type': '1'}),
  477. (2, 3, {'bond_type': '1'}), (2, 4, {'bond_type': '1'})])
  478. # # randomly select two molecules
  479. # np.random.seed(1)
  480. # idx_gi = [0, 6] # np.random.randint(0, len(Gn), 2)
  481. # g1 = Gn[idx_gi[0]]
  482. # g2 = Gn[idx_gi[1]]
  483. # Gn_mix = [g.copy() for g in Gn]
  484. # Gn_mix.append(g1.copy())
  485. # Gn_mix.append(g2.copy())
  486. Gn = [g1.copy(), g2.copy()]
  487. remove_edges(Gn)
  488. gkernel = 'marginalizedkernel'
  489. lmbda = 0.03 # termination probalility
  490. r_max = 10 # recursions
  491. # l = 500
  492. alpha_range = np.linspace(0.5, 0.5, 1)
  493. k = 2 # k nearest neighbors
  494. epsilon = 1e-6
  495. ged_cost='CHEM_1'
  496. ged_method='IPFP'
  497. saveGXL='gedlib'
  498. c_ei=1
  499. c_er=1
  500. c_es=1
  501. Gn_mix = Gn + [g1.copy(), g2.copy()]
  502. # compute
  503. time0 = time.time()
  504. km = compute_kernel(Gn_mix, gkernel, True)
  505. time_km = time.time() - time0
  506. time_list = []
  507. sod_ks_min_list = []
  508. sod_gs_list = []
  509. sod_gs_min_list = []
  510. nb_updated_list = []
  511. g_best = []
  512. # for each alpha
  513. for alpha in alpha_range:
  514. print('\n-------------------------------------------------------\n')
  515. print('alpha =', alpha)
  516. time0 = time.time()
  517. dhat, ghat_list, sod_ks, nb_updated = gk_iam_nearest_multi(Gn, [g1, g2],
  518. [alpha, 1 - alpha], range(len(Gn), len(Gn) + 2), km, k, r_max,
  519. gkernel, c_ei=c_ei, c_er=c_er, c_es=c_es, epsilon=epsilon,
  520. ged_cost=ged_cost, ged_method=ged_method, saveGXL=saveGXL)
  521. time_total = time.time() - time0 + time_km
  522. print('time: ', time_total)
  523. time_list.append(time_total)
  524. sod_ks_min_list.append(dhat)
  525. g_best.append(ghat_list)
  526. nb_updated_list.append(nb_updated)
  527. # show best graphs and save them to file.
  528. for idx, item in enumerate(alpha_range):
  529. print('when alpha is', item, 'the shortest distance is', sod_ks_min_list[idx])
  530. print('one of the possible corresponding pre-images is')
  531. nx.draw(g_best[idx][0], labels=nx.get_node_attributes(g_best[idx][0], 'atom'),
  532. with_labels=True)
  533. plt.savefig('results/gk_iam/mutag_alpha' + str(item) + '.png', format="PNG")
  534. plt.show()
  535. print(g_best[idx][0].nodes(data=True))
  536. print(g_best[idx][0].edges(data=True))
  537. # for g in g_best[idx]:
  538. # draw_Letter_graph(g, savepath='results/gk_iam/')
  539. ## nx.draw_networkx(g)
  540. ## plt.show()
  541. # print(g.nodes(data=True))
  542. # print(g.edges(data=True))
  543. # compute the corresponding sod in graph space.
  544. for idx, item in enumerate(alpha_range):
  545. sod_tmp, _ = ged_median(g_best[0], [g1, g2], ged_cost=ged_cost,
  546. ged_method=ged_method, saveGXL=saveGXL)
  547. sod_gs_list.append(sod_tmp)
  548. sod_gs_min_list.append(np.min(sod_tmp))
  549. print('\nsods in graph space: ', sod_gs_list)
  550. print('\nsmallest sod in graph space for each alpha: ', sod_gs_min_list)
  551. print('\nsmallest sod in kernel space for each alpha: ', sod_ks_min_list)
  552. print('\nnumber of updates for each alpha: ', nb_updated_list)
  553. print('\ntimes:', time_list)
  554. if __name__ == '__main__':
  555. # ds = {'name': 'MUTAG', 'dataset': '../datasets/MUTAG/MUTAG_A.txt',
  556. # 'extra_params': {}} # node/edge symb
  557. # ds = {'name': 'Letter-high', 'dataset': '../datasets/Letter-high/Letter-high_A.txt',
  558. # 'extra_params': {}} # node nsymb
  559. # ds = {'name': 'Acyclic', 'dataset': '../datasets/monoterpenoides/trainset_9.ds',
  560. # 'extra_params': {}}
  561. # ds = {'name': 'Acyclic', 'dataset': '../datasets/acyclic/dataset_bps.ds',
  562. # 'extra_params': {}} # node symb
  563. # Gn, y_all = loadDataset(ds['dataset'], extra_params=ds['extra_params'])
  564. # Gn = Gn[0:20]
  565. # import networkx.algorithms.isomorphism as iso
  566. # G1 = nx.MultiDiGraph()
  567. # G2 = nx.MultiDiGraph()
  568. # G1.add_nodes_from([1,2,3], fill='red')
  569. # G2.add_nodes_from([10,20,30,40], fill='red')
  570. # nx.add_path(G1, [1,2,3,4], weight=3, linewidth=2.5)
  571. # nx.add_path(G2, [10,20,30,40], weight=3)
  572. # nm = iso.categorical_node_match('fill', 'red')
  573. # print(nx.is_isomorphic(G1, G2, node_match=nm))
  574. #
  575. # test_new_IAM_allGraph_deleteNodes(Gn)
  576. # test_will_IAM_give_the_median_graph_we_wanted(Gn)
  577. # test_who_is_the_closest_in_GED_space(Gn)
  578. # test_who_is_the_closest_in_kernel_space(Gn)
  579. # test_the_simple_two(Gn, 'untilhpathkernel')
  580. # test_remove_bests(Gn, 'untilhpathkernel')
  581. # test_gkiam_letter_h()
  582. # test_iam_letter_h()
  583. # test_random_preimage_letter_h
  584. ###############################################################################
  585. # retests.
  586. retest_the_simple_two()

A Python package for graph kernels, graph edit distances and graph pre-image problem.