You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

gk_iam.py 15 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373
  1. #!/usr/bin/env python3
  2. # -*- coding: utf-8 -*-
  3. """
  4. Created on Tue Apr 30 17:07:43 2019
  5. A graph pre-image method combining iterative pre-image method in reference [1]
  6. and the iterative alternate minimizations (IAM) in reference [2].
  7. @author: ljia
  8. @references:
  9. [1] Gökhan H Bakir, Alexander Zien, and Koji Tsuda. Learning to and graph
  10. pre-images. In Joint Pattern Re ognition Symposium , pages 253-261. Springer, 2004.
  11. [2] Generalized median graph via iterative alternate minimization.
  12. """
  13. import sys
  14. import numpy as np
  15. import multiprocessing
  16. from tqdm import tqdm
  17. import networkx as nx
  18. import matplotlib.pyplot as plt
  19. from iam import iam, test_iam_with_more_graphs_as_init, test_iam_moreGraphsAsInit_tryAllPossibleBestGraphs_deleteNodesInIterations
  20. sys.path.insert(0, "../")
  21. from pygraph.kernels.marginalizedKernel import marginalizedkernel
  22. from pygraph.kernels.untilHPathKernel import untilhpathkernel
  23. def gk_iam(Gn, alpha):
  24. """This function constructs graph pre-image by the iterative pre-image
  25. framework in reference [1], algorithm 1, where the step of generating new
  26. graphs randomly is replaced by the IAM algorithm in reference [2].
  27. notes
  28. -----
  29. Every time a better graph is acquired, the older one is replaced by it.
  30. """
  31. pass
  32. # # compute k nearest neighbors of phi in DN.
  33. # dis_list = [] # distance between g_star and each graph.
  34. # for ig, g in tqdm(enumerate(Gn), desc='computing distances', file=sys.stdout):
  35. # dtemp = k_list[ig] - 2 * (alpha * k_g1_list[ig] + (1 - alpha) *
  36. # k_g2_list[ig]) + (alpha * alpha * k_list[idx1] + alpha *
  37. # (1 - alpha) * k_g2_list[idx1] + (1 - alpha) * alpha *
  38. # k_g1_list[idx2] + (1 - alpha) * (1 - alpha) * k_list[idx2])
  39. # dis_list.append(dtemp)
  40. #
  41. # # sort
  42. # sort_idx = np.argsort(dis_list)
  43. # dis_gs = [dis_list[idis] for idis in sort_idx[0:k]]
  44. # g0hat = Gn[sort_idx[0]] # the nearest neighbor of phi in DN
  45. # if dis_gs[0] == 0: # the exact pre-image.
  46. # print('The exact pre-image is found from the input dataset.')
  47. # return 0, g0hat
  48. # dhat = dis_gs[0] # the nearest distance
  49. # Gk = [Gn[ig] for ig in sort_idx[0:k]] # the k nearest neighbors
  50. # gihat_list = []
  51. #
  52. ## i = 1
  53. # r = 1
  54. # while r < r_max:
  55. # print('r =', r)
  56. ## found = False
  57. # Gs_nearest = Gk + gihat_list
  58. # g_tmp = iam(Gs_nearest)
  59. #
  60. # # compute distance between phi and the new generated graph.
  61. # knew = marginalizedkernel([g_tmp, g1, g2], node_label='atom', edge_label=None,
  62. # p_quit=lmbda, n_iteration=20, remove_totters=False,
  63. # n_jobs=multiprocessing.cpu_count(), verbose=False)
  64. # dnew = knew[0][0, 0] - 2 * (alpha * knew[0][0, 1] + (1 - alpha) *
  65. # knew[0][0, 2]) + (alpha * alpha * k_list[idx1] + alpha *
  66. # (1 - alpha) * k_g2_list[idx1] + (1 - alpha) * alpha *
  67. # k_g1_list[idx2] + (1 - alpha) * (1 - alpha) * k_list[idx2])
  68. # if dnew <= dhat: # the new distance is smaller
  69. # print('I am smaller!')
  70. # dhat = dnew
  71. # g_new = g_tmp.copy() # found better graph.
  72. # gihat_list = [g_new]
  73. # dis_gs.append(dhat)
  74. # r = 0
  75. # else:
  76. # r += 1
  77. #
  78. # ghat = ([g0hat] if len(gihat_list) == 0 else gihat_list)
  79. #
  80. # return dhat, ghat
  81. def gk_iam_nearest(Gn, alpha, idx_gi, Kmatrix, k, r_max):
  82. """This function constructs graph pre-image by the iterative pre-image
  83. framework in reference [1], algorithm 1, where the step of generating new
  84. graphs randomly is replaced by the IAM algorithm in reference [2].
  85. notes
  86. -----
  87. Every time a better graph is acquired, its distance in kernel space is
  88. compared with the k nearest ones, and the k nearest distances from the k+1
  89. distances will be used as the new ones.
  90. """
  91. # compute k nearest neighbors of phi in DN.
  92. dis_list = [] # distance between g_star and each graph.
  93. for ig, g in tqdm(enumerate(Gn), desc='computing distances', file=sys.stdout):
  94. dtemp = dis_gstar(ig, idx_gi, alpha, Kmatrix)
  95. # dtemp = k_list[ig] - 2 * (alpha * k_g1_list[ig] + (1 - alpha) *
  96. # k_g2_list[ig]) + (alpha * alpha * k_list[0] + alpha *
  97. # (1 - alpha) * k_g2_list[0] + (1 - alpha) * alpha *
  98. # k_g1_list[6] + (1 - alpha) * (1 - alpha) * k_list[6])
  99. dis_list.append(dtemp)
  100. # sort
  101. sort_idx = np.argsort(dis_list)
  102. dis_gs = [dis_list[idis] for idis in sort_idx[0:k]] # the k shortest distances
  103. g0hat = Gn[sort_idx[0]] # the nearest neighbor of phi in DN
  104. if dis_gs[0] == 0: # the exact pre-image.
  105. print('The exact pre-image is found from the input dataset.')
  106. return 0, g0hat
  107. dhat = dis_gs[0] # the nearest distance
  108. ghat = g0hat.copy()
  109. Gk = [Gn[ig].copy() for ig in sort_idx[0:k]] # the k nearest neighbors
  110. for gi in Gk:
  111. nx.draw_networkx(gi)
  112. plt.show()
  113. Gs_nearest = Gk.copy()
  114. # gihat_list = []
  115. # i = 1
  116. r = 1
  117. while r < r_max:
  118. print('r =', r)
  119. # found = False
  120. # Gs_nearest = Gk + gihat_list
  121. # g_tmp = iam(Gs_nearest)
  122. g_tmp = test_iam_with_more_graphs_as_init(Gs_nearest, Gs_nearest, c_ei=1, c_er=1, c_es=1)
  123. nx.draw_networkx(g_tmp)
  124. plt.show()
  125. # compute distance between phi and the new generated graph.
  126. gi_list = [Gn[i] for i in idx_gi]
  127. knew = compute_kernel([g_tmp] + gi_list, 'untilhpathkernel', False)
  128. dnew = dis_gstar(0, range(1, len(gi_list) + 1), alpha, knew)
  129. # dnew = knew[0, 0] - 2 * (alpha[0] * knew[0, 1] + alpha[1] *
  130. # knew[0, 2]) + (alpha[0] * alpha[0] * k_list[0] + alpha[0] *
  131. # alpha[1] * k_g2_list[0] + alpha[1] * alpha[0] *
  132. # k_g1_list[1] + alpha[1] * alpha[1] * k_list[1])
  133. if dnew <= dhat and g_tmp != ghat: # the new distance is smaller
  134. print('I am smaller!')
  135. print(str(dhat) + '->' + str(dnew))
  136. # nx.draw_networkx(ghat)
  137. # plt.show()
  138. # print('->')
  139. # nx.draw_networkx(g_tmp)
  140. # plt.show()
  141. dhat = dnew
  142. g_new = g_tmp.copy() # found better graph.
  143. ghat = g_tmp.copy()
  144. dis_gs.append(dhat) # add the new nearest distance.
  145. Gs_nearest.append(g_new) # add the corresponding graph.
  146. sort_idx = np.argsort(dis_gs)
  147. dis_gs = [dis_gs[idx] for idx in sort_idx[0:k]] # the new k nearest distances.
  148. Gs_nearest = [Gs_nearest[idx] for idx in sort_idx[0:k]]
  149. r = 0
  150. else:
  151. r += 1
  152. return dhat, ghat
  153. def dis_gstar(idx_g, idx_gi, alpha, Kmatrix):
  154. term1 = Kmatrix[idx_g, idx_g]
  155. term2 = 0
  156. for i, a in enumerate(alpha):
  157. term2 += a * Kmatrix[idx_g, idx_gi[i]]
  158. term2 *= 2
  159. term3 = 0
  160. for i1, a1 in enumerate(alpha):
  161. for i2, a2 in enumerate(alpha):
  162. term3 += a1 * a2 * Kmatrix[idx_gi[i1], idx_gi[i2]]
  163. return np.sqrt(term1 - term2 + term3)
  164. def compute_kernel(Gn, graph_kernel, verbose):
  165. if graph_kernel == 'marginalizedkernel':
  166. Kmatrix, _ = marginalizedkernel(Gn, node_label='atom', edge_label=None,
  167. p_quit=0.3, n_iteration=19, remove_totters=False,
  168. n_jobs=multiprocessing.cpu_count(), verbose=verbose)
  169. elif graph_kernel == 'untilhpathkernel':
  170. Kmatrix, _ = untilhpathkernel(Gn, node_label='atom', edge_label='bond_type',
  171. depth=2, k_func='MinMax', compute_method='trie',
  172. n_jobs=multiprocessing.cpu_count(), verbose=verbose)
  173. # normalization
  174. Kmatrix_diag = Kmatrix.diagonal().copy()
  175. for i in range(len(Kmatrix)):
  176. for j in range(i, len(Kmatrix)):
  177. Kmatrix[i][j] /= np.sqrt(Kmatrix_diag[i] * Kmatrix_diag[j])
  178. Kmatrix[j][i] = Kmatrix[i][j]
  179. return Kmatrix
  180. def gram2distances(Kmatrix):
  181. dmatrix = np.zeros((len(Kmatrix), len(Kmatrix)))
  182. for i1 in range(len(Kmatrix)):
  183. for i2 in range(len(Kmatrix)):
  184. dmatrix[i1, i2] = Kmatrix[i1, i1] + Kmatrix[i2, i2] - 2 * Kmatrix[i1, i2]
  185. dmatrix = np.sqrt(dmatrix)
  186. return dmatrix
  187. # --------------------------- These are tests --------------------------------#
  188. def test_who_is_the_closest_in_kernel_space(Gn):
  189. idx_gi = [0, 6]
  190. g1 = Gn[idx_gi[0]]
  191. g2 = Gn[idx_gi[1]]
  192. # create the "median" graph.
  193. gnew = g2.copy()
  194. gnew.remove_node(0)
  195. nx.draw_networkx(gnew)
  196. plt.show()
  197. print(gnew.nodes(data=True))
  198. Gn = [gnew] + Gn
  199. # compute gram matrix
  200. Kmatrix = compute_kernel(Gn, 'untilhpathkernel', True)
  201. # the distance matrix
  202. dmatrix = gram2distances(Kmatrix)
  203. print(np.sort(dmatrix[idx_gi[0] + 1]))
  204. print(np.argsort(dmatrix[idx_gi[0] + 1]))
  205. print(np.sort(dmatrix[idx_gi[1] + 1]))
  206. print(np.argsort(dmatrix[idx_gi[1] + 1]))
  207. # for all g in Gn, compute (d(g1, g) + d(g2, g)) / 2
  208. dis_median = [(dmatrix[i, idx_gi[0] + 1] + dmatrix[i, idx_gi[1] + 1]) / 2 for i in range(len(Gn))]
  209. print(np.sort(dis_median))
  210. print(np.argsort(dis_median))
  211. return
  212. def test_who_is_the_closest_in_GED_space(Gn):
  213. from iam import GED
  214. idx_gi = [0, 6]
  215. g1 = Gn[idx_gi[0]]
  216. g2 = Gn[idx_gi[1]]
  217. # create the "median" graph.
  218. gnew = g2.copy()
  219. gnew.remove_node(0)
  220. nx.draw_networkx(gnew)
  221. plt.show()
  222. print(gnew.nodes(data=True))
  223. Gn = [gnew] + Gn
  224. # compute GEDs
  225. ged_matrix = np.zeros((len(Gn), len(Gn)))
  226. for i1 in tqdm(range(len(Gn)), desc='computing GEDs', file=sys.stdout):
  227. for i2 in range(len(Gn)):
  228. dis, _, _ = GED(Gn[i1], Gn[i2], lib='gedlib')
  229. ged_matrix[i1, i2] = dis
  230. print(np.sort(ged_matrix[idx_gi[0] + 1]))
  231. print(np.argsort(ged_matrix[idx_gi[0] + 1]))
  232. print(np.sort(ged_matrix[idx_gi[1] + 1]))
  233. print(np.argsort(ged_matrix[idx_gi[1] + 1]))
  234. # for all g in Gn, compute (GED(g1, g) + GED(g2, g)) / 2
  235. dis_median = [(ged_matrix[i, idx_gi[0] + 1] + ged_matrix[i, idx_gi[1] + 1]) / 2 for i in range(len(Gn))]
  236. print(np.sort(dis_median))
  237. print(np.argsort(dis_median))
  238. return
  239. def test_will_IAM_give_the_median_graph_we_wanted(Gn):
  240. idx_gi = [0, 6]
  241. g1 = Gn[idx_gi[0]].copy()
  242. g2 = Gn[idx_gi[1]].copy()
  243. # del Gn[idx_gi[0]]
  244. # del Gn[idx_gi[1] - 1]
  245. g_median = test_iam_with_more_graphs_as_init([g1, g2], [g1, g2], c_ei=1, c_er=1, c_es=1)
  246. # g_median = test_iam_with_more_graphs_as_init(Gn, Gn, c_ei=1, c_er=1, c_es=1)
  247. nx.draw_networkx(g_median)
  248. plt.show()
  249. print(g_median.nodes(data=True))
  250. print(g_median.edges(data=True))
  251. def test_new_IAM_allGraph_deleteNodes(Gn):
  252. idx_gi = [0, 6]
  253. # g1 = Gn[idx_gi[0]].copy()
  254. # g2 = Gn[idx_gi[1]].copy()
  255. g1 = nx.Graph(name='haha')
  256. g1.add_nodes_from([(2, {'atom': 'C'}), (3, {'atom': 'O'}), (4, {'atom': 'C'})])
  257. g1.add_edges_from([(2, 3, {'bond_type': '1'}), (3, 4, {'bond_type': '1'})])
  258. g2 = nx.Graph(name='hahaha')
  259. g2.add_nodes_from([(0, {'atom': 'C'}), (1, {'atom': 'O'}), (2, {'atom': 'C'}),
  260. (3, {'atom': 'O'}), (4, {'atom': 'C'})])
  261. g2.add_edges_from([(0, 1, {'bond_type': '1'}), (1, 2, {'bond_type': '1'}),
  262. (2, 3, {'bond_type': '1'}), (3, 4, {'bond_type': '1'})])
  263. # g2 = g1.copy()
  264. # g2.add_nodes_from([(3, {'atom': 'O'})])
  265. # g2.add_nodes_from([(4, {'atom': 'C'})])
  266. # g2.add_edges_from([(1, 3, {'bond_type': '1'})])
  267. # g2.add_edges_from([(3, 4, {'bond_type': '1'})])
  268. # del Gn[idx_gi[0]]
  269. # del Gn[idx_gi[1] - 1]
  270. g_median = test_iam_moreGraphsAsInit_tryAllPossibleBestGraphs_deleteNodesInIterations([g1, g2], [g1, g2], c_ei=1, c_er=1, c_es=1)
  271. # g_median = test_iam_moreGraphsAsInit_tryAllPossibleBestGraphs_deleteNodesInIterations(Gn, Gn, c_ei=1, c_er=1, c_es=1)
  272. nx.draw_networkx(g_median)
  273. plt.show()
  274. print(g_median.nodes(data=True))
  275. print(g_median.edges(data=True))
  276. if __name__ == '__main__':
  277. from pygraph.utils.graphfiles import loadDataset
  278. # ds = {'name': 'MUTAG', 'dataset': '../datasets/MUTAG/MUTAG.mat',
  279. # 'extra_params': {'am_sp_al_nl_el': [0, 0, 3, 1, 2]}} # node/edge symb
  280. # ds = {'name': 'Letter-high', 'dataset': '../datasets/Letter-high/Letter-high_A.txt',
  281. # 'extra_params': {}} # node nsymb
  282. # ds = {'name': 'Acyclic', 'dataset': '../datasets/monoterpenoides/trainset_9.ds',
  283. # 'extra_params': {}}
  284. ds = {'name': 'Acyclic', 'dataset': '../datasets/acyclic/dataset_bps.ds',
  285. 'extra_params': {}} # node symb
  286. Gn, y_all = loadDataset(ds['dataset'], extra_params=ds['extra_params'])
  287. # Gn = Gn[0:20]
  288. test_new_IAM_allGraph_deleteNodes(Gn)
  289. test_will_IAM_give_the_median_graph_we_wanted(Gn)
  290. test_who_is_the_closest_in_GED_space(Gn)
  291. test_who_is_the_closest_in_kernel_space(Gn)
  292. lmbda = 0.03 # termination probalility
  293. r_max = 10 # recursions
  294. l = 500
  295. alpha_range = np.linspace(0.5, 0.5, 1)
  296. k = 20 # k nearest neighbors
  297. # randomly select two molecules
  298. np.random.seed(1)
  299. idx_gi = [0, 6] # np.random.randint(0, len(Gn), 2)
  300. g1 = Gn[idx_gi[0]]
  301. g2 = Gn[idx_gi[1]]
  302. # g_tmp = iam([g1, g2])
  303. # nx.draw_networkx(g_tmp)
  304. # plt.show()
  305. # compute
  306. # k_list = [] # kernel between each graph and itself.
  307. # k_g1_list = [] # kernel between each graph and g1
  308. # k_g2_list = [] # kernel between each graph and g2
  309. # for ig, g in tqdm(enumerate(Gn), desc='computing self kernels', file=sys.stdout):
  310. # ktemp = compute_kernel([g, g1, g2], 'marginalizedkernel', False)
  311. # k_list.append(ktemp[0][0, 0])
  312. # k_g1_list.append(ktemp[0][0, 1])
  313. # k_g2_list.append(ktemp[0][0, 2])
  314. km = compute_kernel(Gn, 'untilhpathkernel', True)
  315. # k_list = np.diag(km) # kernel between each graph and itself.
  316. # k_g1_list = km[idx_gi[0]] # kernel between each graph and g1
  317. # k_g2_list = km[idx_gi[1]] # kernel between each graph and g2
  318. g_best = []
  319. dis_best = []
  320. # for each alpha
  321. for alpha in alpha_range:
  322. print('alpha =', alpha)
  323. dhat, ghat = gk_iam_nearest(Gn, [alpha, 1 - alpha], idx_gi, km, k, r_max)
  324. dis_best.append(dhat)
  325. g_best.append(ghat)
  326. for idx, item in enumerate(alpha_range):
  327. print('when alpha is', item, 'the shortest distance is', dis_best[idx])
  328. print('the corresponding pre-image is')
  329. nx.draw_networkx(g_best[idx])
  330. plt.show()

A Python package for graph kernels, graph edit distances and graph pre-image problem.