You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

preimage_random.py 14 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312
  1. #!/usr/bin/env python3
  2. # -*- coding: utf-8 -*-
  3. """
  4. Created on Wed Mar 6 16:03:11 2019
  5. pre-image
  6. @author: ljia
  7. """
  8. import sys
  9. import numpy as np
  10. import random
  11. from tqdm import tqdm
  12. import networkx as nx
  13. import matplotlib.pyplot as plt
  14. sys.path.insert(0, "../")
  15. from utils import compute_kernel, dis_gstar
  16. def preimage_random(Gn_init, Gn_median, alpha, idx_gi, Kmatrix, k, r_max, l, gkernel):
  17. Gn_init = [nx.convert_node_labels_to_integers(g) for g in Gn_init]
  18. # compute k nearest neighbors of phi in DN.
  19. dis_list = [] # distance between g_star and each graph.
  20. term3 = 0
  21. for i1, a1 in enumerate(alpha):
  22. for i2, a2 in enumerate(alpha):
  23. term3 += a1 * a2 * Kmatrix[idx_gi[i1], idx_gi[i2]]
  24. for ig, g in tqdm(enumerate(Gn_init), desc='computing distances', file=sys.stdout):
  25. dtemp = dis_gstar(ig, idx_gi, alpha, Kmatrix, term3=term3)
  26. dis_list.append(dtemp)
  27. # print(np.max(dis_list))
  28. # print(np.min(dis_list))
  29. # print(np.min([item for item in dis_list if item != 0]))
  30. # print(np.mean(dis_list))
  31. # sort
  32. sort_idx = np.argsort(dis_list)
  33. dis_gs = [dis_list[idis] for idis in sort_idx[0:k]] # the k shortest distances
  34. nb_best = len(np.argwhere(dis_gs == dis_gs[0]).flatten().tolist())
  35. g0hat_list = [Gn_init[idx] for idx in sort_idx[0:nb_best]] # the nearest neighbors of phi in DN
  36. if dis_gs[0] == 0: # the exact pre-image.
  37. print('The exact pre-image is found from the input dataset.')
  38. return 0, g0hat_list[0], 0
  39. dhat = dis_gs[0] # the nearest distance
  40. # ghat_list = [g.copy() for g in g0hat_list]
  41. # for g in ghat_list:
  42. # draw_Letter_graph(g)
  43. # nx.draw_networkx(g)
  44. # plt.show()
  45. # print(g.nodes(data=True))
  46. # print(g.edges(data=True))
  47. Gk = [Gn_init[ig].copy() for ig in sort_idx[0:k]] # the k nearest neighbors
  48. # for gi in Gk:
  49. ## nx.draw_networkx(gi)
  50. ## plt.show()
  51. # draw_Letter_graph(g)
  52. # print(gi.nodes(data=True))
  53. # print(gi.edges(data=True))
  54. Gs_nearest = [g.copy() for g in Gk]
  55. gihat_list = []
  56. dihat_list = []
  57. # i = 1
  58. r = 0
  59. # sod_list = [dhat]
  60. # found = False
  61. dis_of_each_itr = [dhat]
  62. nb_updated = 0
  63. g_best = []
  64. while r < r_max:
  65. print('\nr =', r)
  66. print('itr for gk =', nb_updated, '\n')
  67. found = False
  68. dis_bests = dis_gs + dihat_list
  69. # @todo what if the log is negetive? how to choose alpha (scalar)?
  70. fdgs_list = np.array(dis_bests)
  71. if np.min(fdgs_list) < 1:
  72. fdgs_list /= np.min(dis_bests)
  73. fdgs_list = [int(item) for item in np.ceil(np.log(fdgs_list))]
  74. if np.min(fdgs_list) < 1:
  75. fdgs_list = np.array(fdgs_list) + 1
  76. for ig, gs in enumerate(Gs_nearest + gihat_list):
  77. # nx.draw_networkx(gs)
  78. # plt.show()
  79. for trail in range(0, l):
  80. # for trail in tqdm(range(0, l), desc='l loops', file=sys.stdout):
  81. # add and delete edges.
  82. gtemp = gs.copy()
  83. np.random.seed()
  84. # which edges to change.
  85. # @todo: should we use just half of the adjacency matrix for undirected graphs?
  86. nb_vpairs = nx.number_of_nodes(gs) * (nx.number_of_nodes(gs) - 1)
  87. # @todo: what if fdgs is bigger than nb_vpairs?
  88. idx_change = random.sample(range(nb_vpairs), fdgs_list[ig] if
  89. fdgs_list[ig] < nb_vpairs else nb_vpairs)
  90. # idx_change = np.random.randint(0, nx.number_of_nodes(gs) *
  91. # (nx.number_of_nodes(gs) - 1), fdgs)
  92. for item in idx_change:
  93. node1 = int(item / (nx.number_of_nodes(gs) - 1))
  94. node2 = (item - node1 * (nx.number_of_nodes(gs) - 1))
  95. if node2 >= node1: # skip the self pair.
  96. node2 += 1
  97. # @todo: is the randomness correct?
  98. if not gtemp.has_edge(node1, node2):
  99. gtemp.add_edge(node1, node2)
  100. # nx.draw_networkx(gs)
  101. # plt.show()
  102. # nx.draw_networkx(gtemp)
  103. # plt.show()
  104. else:
  105. gtemp.remove_edge(node1, node2)
  106. # nx.draw_networkx(gs)
  107. # plt.show()
  108. # nx.draw_networkx(gtemp)
  109. # plt.show()
  110. # nx.draw_networkx(gtemp)
  111. # plt.show()
  112. # compute distance between \psi and the new generated graph.
  113. # knew = marginalizedkernel([gtemp, g1, g2], node_label='atom', edge_label=None,
  114. # p_quit=lmbda, n_iteration=20, remove_totters=False,
  115. # n_jobs=multiprocessing.cpu_count(), verbose=False)
  116. knew = compute_kernel([gtemp] + Gn_median, gkernel, verbose=False)
  117. dnew = dis_gstar(0, range(1, len(Gn_median) + 1), alpha, knew,
  118. withterm3=False)
  119. if dnew <= dhat: # @todo: the new distance is smaller or also equal?
  120. if dnew < dhat:
  121. print('\nI am smaller!')
  122. print('ig =', str(ig), ', l =', str(trail))
  123. print(dhat, '->', dnew)
  124. nb_updated += 1
  125. elif dnew == dhat:
  126. print('I am equal!')
  127. # nx.draw_networkx(gtemp)
  128. # plt.show()
  129. # print(gtemp.nodes(data=True))
  130. # print(gtemp.edges(data=True))
  131. dhat = dnew
  132. gnew = gtemp.copy()
  133. found = True # found better graph.
  134. if found:
  135. r = 0
  136. gihat_list = [gnew]
  137. dihat_list = [dhat]
  138. else:
  139. r += 1
  140. dis_of_each_itr.append(dhat)
  141. print('the shortest distances for previous iterations are', dis_of_each_itr)
  142. # dis_best.append(dhat)
  143. g_best = (g0hat_list[0] if len(gihat_list) == 0 else gihat_list[0])
  144. print('distances in kernel space:', dis_of_each_itr, '\n')
  145. return dhat, g_best, nb_updated
  146. # return 0, 0, 0
  147. if __name__ == '__main__':
  148. from pygraph.utils.graphfiles import loadDataset
  149. # ds = {'name': 'MUTAG', 'dataset': '../datasets/MUTAG/MUTAG_A.txt',
  150. # 'extra_params': {}} # node/edge symb
  151. ds = {'name': 'Letter-high', 'dataset': '../datasets/Letter-high/Letter-high_A.txt',
  152. 'extra_params': {}} # node nsymb
  153. # ds = {'name': 'Acyclic', 'dataset': '../datasets/monoterpenoides/trainset_9.ds',
  154. # 'extra_params': {}}
  155. # ds = {'name': 'Acyclic', 'dataset': '../datasets/acyclic/dataset_bps.ds',
  156. # 'extra_params': {}} # node symb
  157. DN, y_all = loadDataset(ds['dataset'], extra_params=ds['extra_params'])
  158. #DN = DN[0:10]
  159. lmbda = 0.03 # termination probalility
  160. r_max = 3 # 10 # iteration limit.
  161. l = 500
  162. alpha_range = np.linspace(0.5, 0.5, 1)
  163. #alpha_range = np.linspace(0.1, 0.9, 9)
  164. k = 10 # 5 # k nearest neighbors
  165. # randomly select two molecules
  166. #np.random.seed(1)
  167. #idx1, idx2 = np.random.randint(0, len(DN), 2)
  168. #g1 = DN[idx1]
  169. #g2 = DN[idx2]
  170. idx1 = 0
  171. idx2 = 6
  172. g1 = DN[idx1]
  173. g2 = DN[idx2]
  174. # compute
  175. k_list = [] # kernel between each graph and itself.
  176. k_g1_list = [] # kernel between each graph and g1
  177. k_g2_list = [] # kernel between each graph and g2
  178. for ig, g in tqdm(enumerate(DN), desc='computing self kernels', file=sys.stdout):
  179. # ktemp = marginalizedkernel([g, g1, g2], node_label='atom', edge_label=None,
  180. # p_quit=lmbda, n_iteration=20, remove_totters=False,
  181. # n_jobs=multiprocessing.cpu_count(), verbose=False)
  182. ktemp = compute_kernel([g, g1, g2], 'untilhpathkernel', verbose=False)
  183. k_list.append(ktemp[0, 0])
  184. k_g1_list.append(ktemp[0, 1])
  185. k_g2_list.append(ktemp[0, 2])
  186. g_best = []
  187. dis_best = []
  188. # for each alpha
  189. for alpha in alpha_range:
  190. print('alpha =', alpha)
  191. # compute k nearest neighbors of phi in DN.
  192. dis_list = [] # distance between g_star and each graph.
  193. for ig, g in tqdm(enumerate(DN), desc='computing distances', file=sys.stdout):
  194. dtemp = k_list[ig] - 2 * (alpha * k_g1_list[ig] + (1 - alpha) *
  195. k_g2_list[ig]) + (alpha * alpha * k_list[idx1] + alpha *
  196. (1 - alpha) * k_g2_list[idx1] + (1 - alpha) * alpha *
  197. k_g1_list[idx2] + (1 - alpha) * (1 - alpha) * k_list[idx2])
  198. dis_list.append(np.sqrt(dtemp))
  199. # sort
  200. sort_idx = np.argsort(dis_list)
  201. dis_gs = [dis_list[idis] for idis in sort_idx[0:k]]
  202. g0hat = DN[sort_idx[0]] # the nearest neighbor of phi in DN
  203. if dis_gs[0] == 0: # the exact pre-image.
  204. print('The exact pre-image is found from the input dataset.')
  205. g_pimg = g0hat
  206. break
  207. dhat = dis_gs[0] # the nearest distance
  208. Dk = [DN[ig] for ig in sort_idx[0:k]] # the k nearest neighbors
  209. gihat_list = []
  210. i = 1
  211. r = 1
  212. while r < r_max:
  213. print('r =', r)
  214. found = False
  215. for ig, gs in enumerate(Dk + gihat_list):
  216. # nx.draw_networkx(gs)
  217. # plt.show()
  218. # @todo what if the log is negetive?
  219. fdgs = int(np.abs(np.ceil(np.log(alpha * dis_gs[ig]))))
  220. for trail in tqdm(range(0, l), desc='l loop', file=sys.stdout):
  221. # add and delete edges.
  222. gtemp = gs.copy()
  223. np.random.seed()
  224. # which edges to change.
  225. # @todo: should we use just half of the adjacency matrix for undirected graphs?
  226. nb_vpairs = nx.number_of_nodes(gs) * (nx.number_of_nodes(gs) - 1)
  227. # @todo: what if fdgs is bigger than nb_vpairs?
  228. idx_change = random.sample(range(nb_vpairs), fdgs if fdgs < nb_vpairs else nb_vpairs)
  229. # idx_change = np.random.randint(0, nx.number_of_nodes(gs) *
  230. # (nx.number_of_nodes(gs) - 1), fdgs)
  231. for item in idx_change:
  232. node1 = int(item / (nx.number_of_nodes(gs) - 1))
  233. node2 = (item - node1 * (nx.number_of_nodes(gs) - 1))
  234. if node2 >= node1: # skip the self pair.
  235. node2 += 1
  236. # @todo: is the randomness correct?
  237. if not gtemp.has_edge(node1, node2):
  238. # @todo: how to update the bond_type? 0 or 1?
  239. gtemp.add_edges_from([(node1, node2, {'bond_type': 1})])
  240. # nx.draw_networkx(gs)
  241. # plt.show()
  242. # nx.draw_networkx(gtemp)
  243. # plt.show()
  244. else:
  245. gtemp.remove_edge(node1, node2)
  246. # nx.draw_networkx(gs)
  247. # plt.show()
  248. # nx.draw_networkx(gtemp)
  249. # plt.show()
  250. # nx.draw_networkx(gtemp)
  251. # plt.show()
  252. # compute distance between phi and the new generated graph.
  253. # knew = marginalizedkernel([gtemp, g1, g2], node_label='atom', edge_label=None,
  254. # p_quit=lmbda, n_iteration=20, remove_totters=False,
  255. # n_jobs=multiprocessing.cpu_count(), verbose=False)
  256. knew = compute_kernel([gtemp, g1, g2], 'untilhpathkernel', verbose=False)
  257. dnew = np.sqrt(knew[0, 0] - 2 * (alpha * knew[0, 1] + (1 - alpha) *
  258. knew[0, 2]) + (alpha * alpha * k_list[idx1] + alpha *
  259. (1 - alpha) * k_g2_list[idx1] + (1 - alpha) * alpha *
  260. k_g1_list[idx2] + (1 - alpha) * (1 - alpha) * k_list[idx2]))
  261. if dnew < dhat: # @todo: the new distance is smaller or also equal?
  262. print('I am smaller!')
  263. print(dhat, '->', dnew)
  264. nx.draw_networkx(gtemp)
  265. plt.show()
  266. print(gtemp.nodes(data=True))
  267. print(gtemp.edges(data=True))
  268. dhat = dnew
  269. gnew = gtemp.copy()
  270. found = True # found better graph.
  271. r = 0
  272. elif dnew == dhat:
  273. print('I am equal!')
  274. if found:
  275. gihat_list = [gnew]
  276. dis_gs.append(dhat)
  277. else:
  278. r += 1
  279. dis_best.append(dhat)
  280. g_best += ([g0hat] if len(gihat_list) == 0 else gihat_list)
  281. for idx, item in enumerate(alpha_range):
  282. print('when alpha is', item, 'the shortest distance is', dis_best[idx])
  283. print('the corresponding pre-image is')
  284. nx.draw_networkx(g_best[idx])
  285. plt.show()

A Python package for graph kernels, graph edit distances and graph pre-image problem.