You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

run_gk_iam.py 16 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423
  1. #!/usr/bin/env python3
  2. # -*- coding: utf-8 -*-
  3. """
  4. Created on Thu Jul 4 12:20:16 2019
  5. @author: ljia
  6. """
  7. import numpy as np
  8. import networkx as nx
  9. import matplotlib.pyplot as plt
  10. import time
  11. from tqdm import tqdm
  12. import sys
  13. sys.path.insert(0, "../")
  14. from pygraph.utils.graphfiles import loadDataset
  15. from median import draw_Letter_graph
  16. # --------------------------- These are tests --------------------------------#
  17. def test_who_is_the_closest_in_kernel_space(Gn):
  18. idx_gi = [0, 6]
  19. g1 = Gn[idx_gi[0]]
  20. g2 = Gn[idx_gi[1]]
  21. # create the "median" graph.
  22. gnew = g2.copy()
  23. gnew.remove_node(0)
  24. nx.draw_networkx(gnew)
  25. plt.show()
  26. print(gnew.nodes(data=True))
  27. Gn = [gnew] + Gn
  28. # compute gram matrix
  29. Kmatrix = compute_kernel(Gn, 'untilhpathkernel', True)
  30. # the distance matrix
  31. dmatrix = gram2distances(Kmatrix)
  32. print(np.sort(dmatrix[idx_gi[0] + 1]))
  33. print(np.argsort(dmatrix[idx_gi[0] + 1]))
  34. print(np.sort(dmatrix[idx_gi[1] + 1]))
  35. print(np.argsort(dmatrix[idx_gi[1] + 1]))
  36. # for all g in Gn, compute (d(g1, g) + d(g2, g)) / 2
  37. dis_median = [(dmatrix[i, idx_gi[0] + 1] + dmatrix[i, idx_gi[1] + 1]) / 2 for i in range(len(Gn))]
  38. print(np.sort(dis_median))
  39. print(np.argsort(dis_median))
  40. return
  41. def test_who_is_the_closest_in_GED_space(Gn):
  42. from iam import GED
  43. idx_gi = [0, 6]
  44. g1 = Gn[idx_gi[0]]
  45. g2 = Gn[idx_gi[1]]
  46. # create the "median" graph.
  47. gnew = g2.copy()
  48. gnew.remove_node(0)
  49. nx.draw_networkx(gnew)
  50. plt.show()
  51. print(gnew.nodes(data=True))
  52. Gn = [gnew] + Gn
  53. # compute GEDs
  54. ged_matrix = np.zeros((len(Gn), len(Gn)))
  55. for i1 in tqdm(range(len(Gn)), desc='computing GEDs', file=sys.stdout):
  56. for i2 in range(len(Gn)):
  57. dis, _, _ = GED(Gn[i1], Gn[i2], lib='gedlib')
  58. ged_matrix[i1, i2] = dis
  59. print(np.sort(ged_matrix[idx_gi[0] + 1]))
  60. print(np.argsort(ged_matrix[idx_gi[0] + 1]))
  61. print(np.sort(ged_matrix[idx_gi[1] + 1]))
  62. print(np.argsort(ged_matrix[idx_gi[1] + 1]))
  63. # for all g in Gn, compute (GED(g1, g) + GED(g2, g)) / 2
  64. dis_median = [(ged_matrix[i, idx_gi[0] + 1] + ged_matrix[i, idx_gi[1] + 1]) / 2 for i in range(len(Gn))]
  65. print(np.sort(dis_median))
  66. print(np.argsort(dis_median))
  67. return
  68. def test_will_IAM_give_the_median_graph_we_wanted(Gn):
  69. idx_gi = [0, 6]
  70. g1 = Gn[idx_gi[0]].copy()
  71. g2 = Gn[idx_gi[1]].copy()
  72. # del Gn[idx_gi[0]]
  73. # del Gn[idx_gi[1] - 1]
  74. g_median = test_iam_with_more_graphs_as_init([g1, g2], [g1, g2], c_ei=1, c_er=1, c_es=1)
  75. # g_median = test_iam_with_more_graphs_as_init(Gn, Gn, c_ei=1, c_er=1, c_es=1)
  76. nx.draw_networkx(g_median)
  77. plt.show()
  78. print(g_median.nodes(data=True))
  79. print(g_median.edges(data=True))
  80. def test_new_IAM_allGraph_deleteNodes(Gn):
  81. idx_gi = [0, 6]
  82. # g1 = Gn[idx_gi[0]].copy()
  83. # g2 = Gn[idx_gi[1]].copy()
  84. # g1 = nx.Graph(name='haha')
  85. # g1.add_nodes_from([(0, {'atom': 'C'}), (1, {'atom': 'O'}), (2, {'atom': 'C'})])
  86. # g1.add_edges_from([(0, 1, {'bond_type': '1'}), (1, 2, {'bond_type': '1'})])
  87. # g2 = nx.Graph(name='hahaha')
  88. # g2.add_nodes_from([(0, {'atom': 'C'}), (1, {'atom': 'O'}), (2, {'atom': 'C'}),
  89. # (3, {'atom': 'O'}), (4, {'atom': 'C'})])
  90. # g2.add_edges_from([(0, 1, {'bond_type': '1'}), (1, 2, {'bond_type': '1'}),
  91. # (2, 3, {'bond_type': '1'}), (3, 4, {'bond_type': '1'})])
  92. g1 = nx.Graph(name='haha')
  93. g1.add_nodes_from([(0, {'atom': 'C'}), (1, {'atom': 'C'}), (2, {'atom': 'C'}),
  94. (3, {'atom': 'S'}), (4, {'atom': 'S'})])
  95. g1.add_edges_from([(0, 1, {'bond_type': '1'}), (1, 2, {'bond_type': '1'}),
  96. (2, 3, {'bond_type': '1'}), (2, 4, {'bond_type': '1'})])
  97. g2 = nx.Graph(name='hahaha')
  98. g2.add_nodes_from([(0, {'atom': 'C'}), (1, {'atom': 'C'}), (2, {'atom': 'C'}),
  99. (3, {'atom': 'O'}), (4, {'atom': 'O'})])
  100. g2.add_edges_from([(0, 1, {'bond_type': '1'}), (1, 2, {'bond_type': '1'}),
  101. (2, 3, {'bond_type': '1'}), (2, 4, {'bond_type': '1'})])
  102. # g2 = g1.copy()
  103. # g2.add_nodes_from([(3, {'atom': 'O'})])
  104. # g2.add_nodes_from([(4, {'atom': 'C'})])
  105. # g2.add_edges_from([(1, 3, {'bond_type': '1'})])
  106. # g2.add_edges_from([(3, 4, {'bond_type': '1'})])
  107. # del Gn[idx_gi[0]]
  108. # del Gn[idx_gi[1] - 1]
  109. nx.draw_networkx(g1)
  110. plt.show()
  111. print(g1.nodes(data=True))
  112. print(g1.edges(data=True))
  113. nx.draw_networkx(g2)
  114. plt.show()
  115. print(g2.nodes(data=True))
  116. print(g2.edges(data=True))
  117. g_median = test_iam_moreGraphsAsInit_tryAllPossibleBestGraphs_deleteNodesInIterations([g1, g2], [g1, g2], c_ei=1, c_er=1, c_es=1)
  118. # g_median = test_iam_moreGraphsAsInit_tryAllPossibleBestGraphs_deleteNodesInIterations(Gn, Gn, c_ei=1, c_er=1, c_es=1)
  119. nx.draw_networkx(g_median)
  120. plt.show()
  121. print(g_median.nodes(data=True))
  122. print(g_median.edges(data=True))
  123. def test_the_simple_two(Gn, gkernel):
  124. from gk_iam import gk_iam_nearest_multi, compute_kernel
  125. lmbda = 0.03 # termination probalility
  126. r_max = 10 # recursions
  127. l = 500
  128. alpha_range = np.linspace(0.5, 0.5, 1)
  129. k = 2 # k nearest neighbors
  130. # randomly select two molecules
  131. np.random.seed(1)
  132. idx_gi = [0, 6] # np.random.randint(0, len(Gn), 2)
  133. g1 = Gn[idx_gi[0]]
  134. g2 = Gn[idx_gi[1]]
  135. Gn_mix = [g.copy() for g in Gn]
  136. Gn_mix.append(g1.copy())
  137. Gn_mix.append(g2.copy())
  138. # g_tmp = iam([g1, g2])
  139. # nx.draw_networkx(g_tmp)
  140. # plt.show()
  141. # compute
  142. # k_list = [] # kernel between each graph and itself.
  143. # k_g1_list = [] # kernel between each graph and g1
  144. # k_g2_list = [] # kernel between each graph and g2
  145. # for ig, g in tqdm(enumerate(Gn), desc='computing self kernels', file=sys.stdout):
  146. # ktemp = compute_kernel([g, g1, g2], 'marginalizedkernel', False)
  147. # k_list.append(ktemp[0][0, 0])
  148. # k_g1_list.append(ktemp[0][0, 1])
  149. # k_g2_list.append(ktemp[0][0, 2])
  150. km = compute_kernel(Gn_mix, gkernel, True)
  151. # k_list = np.diag(km) # kernel between each graph and itself.
  152. # k_g1_list = km[idx_gi[0]] # kernel between each graph and g1
  153. # k_g2_list = km[idx_gi[1]] # kernel between each graph and g2
  154. g_best = []
  155. dis_best = []
  156. # for each alpha
  157. for alpha in alpha_range:
  158. print('alpha =', alpha)
  159. dhat, ghat_list = gk_iam_nearest_multi(Gn, [g1, g2], [alpha, 1 - alpha],
  160. range(len(Gn), len(Gn) + 2), km,
  161. k, r_max,gkernel)
  162. dis_best.append(dhat)
  163. g_best.append(ghat_list)
  164. for idx, item in enumerate(alpha_range):
  165. print('when alpha is', item, 'the shortest distance is', dis_best[idx])
  166. print('the corresponding pre-images are')
  167. for g in g_best[idx]:
  168. nx.draw_networkx(g)
  169. plt.show()
  170. print(g.nodes(data=True))
  171. print(g.edges(data=True))
  172. def test_remove_bests(Gn, gkernel):
  173. from gk_iam import gk_iam_nearest_multi, compute_kernel
  174. lmbda = 0.03 # termination probalility
  175. r_max = 10 # recursions
  176. l = 500
  177. alpha_range = np.linspace(0.5, 0.5, 1)
  178. k = 20 # k nearest neighbors
  179. # randomly select two molecules
  180. np.random.seed(1)
  181. idx_gi = [0, 6] # np.random.randint(0, len(Gn), 2)
  182. g1 = Gn[idx_gi[0]]
  183. g2 = Gn[idx_gi[1]]
  184. # remove the best 2 graphs.
  185. del Gn[idx_gi[0]]
  186. del Gn[idx_gi[1] - 1]
  187. # del Gn[8]
  188. Gn_mix = [g.copy() for g in Gn]
  189. Gn_mix.append(g1.copy())
  190. Gn_mix.append(g2.copy())
  191. # compute
  192. km = compute_kernel(Gn_mix, gkernel, True)
  193. g_best = []
  194. dis_best = []
  195. # for each alpha
  196. for alpha in alpha_range:
  197. print('alpha =', alpha)
  198. dhat, ghat_list = gk_iam_nearest_multi(Gn, [g1, g2], [alpha, 1 - alpha],
  199. range(len(Gn), len(Gn) + 2), km,
  200. k, r_max, gkernel)
  201. dis_best.append(dhat)
  202. g_best.append(ghat_list)
  203. for idx, item in enumerate(alpha_range):
  204. print('when alpha is', item, 'the shortest distance is', dis_best[idx])
  205. print('the corresponding pre-images are')
  206. for g in g_best[idx]:
  207. draw_Letter_graph(g)
  208. # nx.draw_networkx(g)
  209. # plt.show()
  210. print(g.nodes(data=True))
  211. print(g.edges(data=True))
  212. def test_gkiam_letter_h():
  213. from gk_iam import gk_iam_nearest_multi, compute_kernel
  214. from iam import median_distance
  215. ds = {'name': 'Letter-high', 'dataset': '../datasets/Letter-high/Letter-high_A.txt',
  216. 'extra_params': {}} # node nsymb
  217. # ds = {'name': 'Letter-med', 'dataset': '../datasets/Letter-med/Letter-med_A.txt',
  218. # 'extra_params': {}} # node nsymb
  219. Gn, y_all = loadDataset(ds['dataset'], extra_params=ds['extra_params'])
  220. gkernel = 'structuralspkernel'
  221. lmbda = 0.03 # termination probalility
  222. r_max = 3 # recursions
  223. # alpha_range = np.linspace(0.5, 0.5, 1)
  224. k = 10 # k nearest neighbors
  225. # classify graphs according to letters.
  226. idx_dict = get_same_item_indices(y_all)
  227. time_list = []
  228. sod_list = []
  229. sod_min_list = []
  230. for letter in idx_dict:
  231. print('\n-------------------------------------------------------\n')
  232. Gn_let = [Gn[i].copy() for i in idx_dict[letter]]
  233. Gn_mix = Gn_let + [g.copy() for g in Gn_let]
  234. alpha_range = np.linspace(1 / len(Gn_let), 1 / len(Gn_let), 1)
  235. # compute
  236. time0 = time.time()
  237. km = compute_kernel(Gn_mix, gkernel, True)
  238. g_best = []
  239. dis_best = []
  240. # for each alpha
  241. for alpha in alpha_range:
  242. print('alpha =', alpha)
  243. dhat, ghat_list = gk_iam_nearest_multi(Gn_let, Gn_let, [alpha] * len(Gn_let),
  244. range(len(Gn_let), len(Gn_mix)), km,
  245. k, r_max, gkernel, c_ei=1.7,
  246. c_er=1.7, c_es=1.7)
  247. dis_best.append(dhat)
  248. g_best.append(ghat_list)
  249. time_list.append(time.time() - time0)
  250. # show best graphs and save them to file.
  251. for idx, item in enumerate(alpha_range):
  252. print('when alpha is', item, 'the shortest distance is', dis_best[idx])
  253. print('the corresponding pre-images are')
  254. for g in g_best[idx]:
  255. draw_Letter_graph(g, savepath='results/gk_iam/')
  256. # nx.draw_networkx(g)
  257. # plt.show()
  258. print(g.nodes(data=True))
  259. print(g.edges(data=True))
  260. # compute the corresponding sod in graph space. (alpha range not considered.)
  261. sod_tmp, _ = median_distance(g_best[0], Gn_let)
  262. sod_list.append(sod_tmp)
  263. sod_min_list.append(np.min(sod_tmp))
  264. print('\nsods in graph space: ', sod_list)
  265. print('\nsmallest sod in graph space for each letter: ', sod_min_list)
  266. print('\ntimes:', time_list)
  267. def get_same_item_indices(ls):
  268. """Get the indices of the same items in a list. Return a dict keyed by items.
  269. """
  270. idx_dict = {}
  271. for idx, item in enumerate(ls):
  272. if item in idx_dict:
  273. idx_dict[item].append(idx)
  274. else:
  275. idx_dict[item] = [idx]
  276. return idx_dict
  277. #def compute_letter_median_by_average(Gn):
  278. # return g_median
  279. def test_iam_letter_h():
  280. from iam import test_iam_moreGraphsAsInit_tryAllPossibleBestGraphs_deleteNodesInIterations
  281. from gk_iam import dis_gstar, compute_kernel
  282. ds = {'name': 'Letter-high', 'dataset': '../datasets/Letter-high/Letter-high_A.txt',
  283. 'extra_params': {}} # node nsymb
  284. # ds = {'name': 'Letter-med', 'dataset': '../datasets/Letter-med/Letter-med_A.txt',
  285. # 'extra_params': {}} # node nsymb
  286. Gn, y_all = loadDataset(ds['dataset'], extra_params=ds['extra_params'])
  287. lmbda = 0.03 # termination probalility
  288. # alpha_range = np.linspace(0.5, 0.5, 1)
  289. # classify graphs according to letters.
  290. idx_dict = get_same_item_indices(y_all)
  291. time_list = []
  292. sod_list = []
  293. sod_min_list = []
  294. for letter in idx_dict:
  295. Gn_let = [Gn[i].copy() for i in idx_dict[letter]]
  296. alpha_range = np.linspace(1 / len(Gn_let), 1 / len(Gn_let), 1)
  297. # compute
  298. g_best = []
  299. dis_best = []
  300. time0 = time.time()
  301. # for each alpha
  302. for alpha in alpha_range:
  303. print('alpha =', alpha)
  304. ghat_list, dhat = test_iam_moreGraphsAsInit_tryAllPossibleBestGraphs_deleteNodesInIterations(
  305. Gn_let, Gn_let, c_ei=1.7, c_er=1.7, c_es=1.7)
  306. dis_best.append(dhat)
  307. g_best.append(ghat_list)
  308. time_list.append(time.time() - time0)
  309. # show best graphs and save them to file.
  310. for idx, item in enumerate(alpha_range):
  311. print('when alpha is', item, 'the shortest distance is', dis_best[idx])
  312. print('the corresponding pre-images are')
  313. for g in g_best[idx]:
  314. draw_Letter_graph(g, savepath='results/iam/')
  315. # nx.draw_networkx(g)
  316. # plt.show()
  317. print(g.nodes(data=True))
  318. print(g.edges(data=True))
  319. # compute the corresponding sod in kernel space. (alpha range not considered.)
  320. gkernel = 'structuralspkernel'
  321. sod_tmp = []
  322. Gn_mix = g_best[0] + Gn_let
  323. km = compute_kernel(Gn_mix, gkernel, True)
  324. for ig, g in tqdm(enumerate(g_best[0]), desc='computing kernel sod', file=sys.stdout):
  325. dtemp = dis_gstar(ig, range(len(g_best[0]), len(Gn_mix)),
  326. [alpha_range[0]] * len(Gn_let), km, withterm3=False)
  327. sod_tmp.append(dtemp)
  328. sod_list.append(sod_tmp)
  329. sod_min_list.append(np.min(sod_tmp))
  330. print('\nsods in kernel space: ', sod_list)
  331. print('\nsmallest sod in kernel space for each letter: ', sod_min_list)
  332. print('\ntimes:', time_list)
  333. if __name__ == '__main__':
  334. # ds = {'name': 'MUTAG', 'dataset': '../datasets/MUTAG/MUTAG_A.txt',
  335. # 'extra_params': {}} # node/edge symb
  336. ds = {'name': 'Letter-high', 'dataset': '../datasets/Letter-high/Letter-high_A.txt',
  337. 'extra_params': {}} # node nsymb
  338. # ds = {'name': 'Acyclic', 'dataset': '../datasets/monoterpenoides/trainset_9.ds',
  339. # 'extra_params': {}}
  340. # ds = {'name': 'Acyclic', 'dataset': '../datasets/acyclic/dataset_bps.ds',
  341. # 'extra_params': {}} # node symb
  342. Gn, y_all = loadDataset(ds['dataset'], extra_params=ds['extra_params'])
  343. # Gn = Gn[0:20]
  344. # import networkx.algorithms.isomorphism as iso
  345. # G1 = nx.MultiDiGraph()
  346. # G2 = nx.MultiDiGraph()
  347. # G1.add_nodes_from([1,2,3], fill='red')
  348. # G2.add_nodes_from([10,20,30,40], fill='red')
  349. # nx.add_path(G1, [1,2,3,4], weight=3, linewidth=2.5)
  350. # nx.add_path(G2, [10,20,30,40], weight=3)
  351. # nm = iso.categorical_node_match('fill', 'red')
  352. # print(nx.is_isomorphic(G1, G2, node_match=nm))
  353. #
  354. # test_new_IAM_allGraph_deleteNodes(Gn)
  355. # test_will_IAM_give_the_median_graph_we_wanted(Gn)
  356. # test_who_is_the_closest_in_GED_space(Gn)
  357. # test_who_is_the_closest_in_kernel_space(Gn)
  358. # test_the_simple_two(Gn, 'untilhpathkernel')
  359. # test_remove_bests(Gn, 'untilhpathkernel')
  360. test_gkiam_letter_h()
  361. # test_iam_letter_h()

A Python package for graph kernels, graph edit distances and graph pre-image problem.