You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

xp_random_preimage_generation.py 9.6 kB


  1. #!/usr/bin/env python3
  2. # -*- coding: utf-8 -*-
  3. """
  4. Created on Mon Jun 1 11:37:57 2020
  5. @author: ljia
  6. """
  7. import multiprocessing
  8. import numpy as np
  9. import networkx as nx
  10. import os
  11. from gklearn.utils.graphfiles import saveGXL
  12. from gklearn.preimage import RandomPreimageGenerator
  13. from gklearn.utils import Dataset
  14. dir_root = '../results/xp_random_preimage_generation/'
  15. def xp_random_preimage_generation(kernel_name):
  16. """
  17. Experiment similar to the one in Bakir's paper. A test to check if RandomPreimageGenerator class works correctly.
  18. Returns
  19. -------
  20. None.
  21. """
  22. alpha1_list = np.linspace(0, 1, 11)
  23. k_dis_datasets = []
  24. k_dis_preimages = []
  25. preimages = []
  26. bests_from_dataset = []
  27. for alpha1 in alpha1_list:
  28. print('alpha1 =', alpha1, ':\n')
  29. # set parameters.
  30. ds_name = 'MUTAG'
  31. rpg_options = {'k': 5,
  32. 'r_max': 10, #
  33. 'l': 500,
  34. 'alphas': None,
  35. 'parallel': True,
  36. 'verbose': 2}
  37. if kernel_name == 'PathUpToH':
  38. kernel_options = {'name': 'PathUpToH',
  39. 'depth': 2, #
  40. 'k_func': 'MinMax', #
  41. 'compute_method': 'trie',
  42. 'parallel': 'imap_unordered',
  43. # 'parallel': None,
  44. 'n_jobs': multiprocessing.cpu_count(),
  45. 'normalize': True,
  46. 'verbose': 0}
  47. elif kernel_name == 'Marginalized':
  48. kernel_options = {'name': 'Marginalized',
  49. 'p_quit': 0.8, #
  50. 'n_iteration': 7, #
  51. 'remove_totters': False,
  52. 'parallel': 'imap_unordered',
  53. # 'parallel': None,
  54. 'n_jobs': multiprocessing.cpu_count(),
  55. 'normalize': True,
  56. 'verbose': 0}
  57. edge_required = True
  58. irrelevant_labels = {'edge_labels': ['label_0']}
  59. cut_range = None
  60. # create/get Gram matrix.
  61. dir_save = dir_root + ds_name + '.' + kernel_options['name'] + '/'
  62. if not os.path.exists(dir_save):
  63. os.makedirs(dir_save)
  64. gm_fname = dir_save + 'gram_matrix_unnorm.' + ds_name + '.' + kernel_options['name'] + '.gm.npz'
  65. gmfile_exist = os.path.isfile(os.path.abspath(gm_fname))
  66. if gmfile_exist:
  67. gmfile = np.load(gm_fname, allow_pickle=True) # @todo: may not be safe.
  68. gram_matrix_unnorm = gmfile['gram_matrix_unnorm']
  69. time_precompute_gm = gmfile['run_time']
  70. # 1. get dataset.
  71. print('1. getting dataset...')
  72. dataset_all = Dataset()
  73. dataset_all.load_predefined_dataset(ds_name)
  74. dataset_all.trim_dataset(edge_required=edge_required)
  75. if irrelevant_labels is not None:
  76. dataset_all.remove_labels(**irrelevant_labels)
  77. if cut_range is not None:
  78. dataset_all.cut_graphs(cut_range)
  79. # # add two "random" graphs.
  80. # g1 = nx.Graph()
  81. # g1.add_nodes_from(range(0, 16), label_0='0')
  82. # g1.add_nodes_from(range(16, 25), label_0='1')
  83. # g1.add_node(25, label_0='2')
  84. # g1.add_nodes_from([26, 27], label_0='3')
  85. # g1.add_edges_from([(0, 1), (1, 2), (2, 3), (3, 4), (4, 5), (5, 6), (6, 7), (7, 8), (8, 9), (9, 10), (10, 11), (11, 12), (5, 0), (4, 9), (12, 3), (10, 13), (13, 14), (14, 15), (15, 8), (0, 16), (1, 17), (2, 18), (12, 19), (11, 20), (13, 21), (15, 22), (7, 23), (6, 24), (14, 25), (25, 26), (25, 27)])
  86. # g2 = nx.Graph()
  87. # g2.add_nodes_from(range(0, 12), label_0='0')
  88. # g2.add_nodes_from(range(12, 19), label_0='1')
  89. # g2.add_nodes_from([19, 20, 21], label_0='2')
  90. # g2.add_nodes_from([22, 23], label_0='3')
  91. # g2.add_edges_from([(0, 1), (1, 2), (2, 3), (3, 4), (4, 5), (5, 6), (6, 19), (19, 7), (7, 8), (8, 9), (9, 10), (10, 11), (11, 20), (20, 7), (5, 0), (4, 8), (0, 12), (1, 13), (2, 14), (9, 15), (10, 16), (11, 17), (6, 18), (3, 21), (21, 22), (21, 23)])
  92. # dataset_all.load_graphs([g1, g2] + dataset_all.graphs, targets=None)
  93. # 2. initialize rpg and setting parameters.
  94. print('2. initializing rpg and setting parameters...')
  95. # nb_graphs = len(dataset_all.graphs) - 2
  96. # rpg_options['alphas'] = [alpha1, 1 - alpha1] + [0] * nb_graphs
  97. nb_graphs = len(dataset_all.graphs)
  98. alphas = [0] * nb_graphs
  99. alphas[1] = alpha1
  100. alphas[6] = 1 - alpha1
  101. rpg_options['alphas'] = alphas
  102. if gmfile_exist:
  103. rpg_options['gram_matrix_unnorm'] = gram_matrix_unnorm
  104. rpg_options['runtime_precompute_gm'] = time_precompute_gm
  105. rpg = RandomPreimageGenerator()
  106. rpg.dataset = dataset_all
  107. rpg.set_options(**rpg_options.copy())
  108. rpg.kernel_options = kernel_options.copy()
  109. # 3. compute preimage.
  110. print('3. computing preimage...')
  111. rpg.run()
  112. results = rpg.get_results()
  113. k_dis_datasets.append(results['k_dis_dataset'])
  114. k_dis_preimages.append(results['k_dis_preimage'])
  115. bests_from_dataset.append(rpg.best_from_dataset)
  116. preimages.append(rpg.preimage)
  117. # 4. save results.
  118. # write Gram matrices to file.
  119. if not gmfile_exist:
  120. np.savez(dir_save + 'gram_matrix_unnorm.' + ds_name + '.' + kernel_options['name'] + '.gm', gram_matrix_unnorm=rpg.gram_matrix_unnorm, run_time=results['runtime_precompute_gm'])
  121. # save graphs.
  122. fn_best_dataset = dir_save + 'g_best_dataset.' + 'alpha1_' + str(alpha1)[0:3]
  123. saveGXL(rpg.best_from_dataset, fn_best_dataset + '.gxl', method='default',
  124. node_labels=dataset_all.node_labels, edge_labels=dataset_all.edge_labels,
  125. node_attrs=dataset_all.node_attrs, edge_attrs=dataset_all.edge_attrs)
  126. fn_preimage = dir_save + 'g_preimage.' + 'alpha1_' + str(alpha1)[0:3]
  127. saveGXL(rpg.preimage, fn_preimage + '.gxl', method='default',
  128. node_labels=dataset_all.node_labels, edge_labels=dataset_all.edge_labels,
  129. node_attrs=dataset_all.node_attrs, edge_attrs=dataset_all.edge_attrs)
  130. # draw graphs.
  131. __draw_graph(rpg.best_from_dataset, fn_best_dataset)
  132. __draw_graph(rpg.preimage, fn_preimage)
  133. # save distances.
  134. np.savez(dir_save + 'distances.' + ds_name + '.' + kernel_options['name'], k_dis_datasets=k_dis_datasets, k_dis_preimages=k_dis_preimages)
  135. # plot results figure.
  136. __plot_results(alpha1_list, k_dis_datasets, k_dis_preimages, dir_save)
  137. print('\ncomplete.\n')
  138. return k_dis_datasets, k_dis_preimages, bests_from_dataset, preimages
  139. def __draw_graph(graph, file_prefix):
  140. # import matplotlib
  141. # matplotlib.use('agg')
  142. import matplotlib.pyplot as plt
  143. plt.figure()
  144. pos = nx.spring_layout(graph)
  145. nx.draw(graph, pos, node_size=500, labels=nx.get_node_attributes(graph, 'label_0'), font_color='w', width=3, with_labels=True)
  146. plt.savefig(file_prefix + '.eps', format='eps', dpi=300)
  147. # plt.show()
  148. plt.clf()
  149. plt.close()
  150. def __plot_results(alpha1_list, k_dis_datasets, k_dis_preimages, dir_save):
  151. import matplotlib.pyplot as plt
  152. fig, ax = plt.subplots(1, 1, figsize=(7, 4.5))
  153. ind = np.arange(len(alpha1_list)) # the x locations for the groups
  154. width = 0.35 # the width of the bars: can also be len(x) sequence
  155. ax.bar(ind, k_dis_preimages, width, label='Reconstructed pre-image', zorder=3, color='#133AAC')
  156. ax.set_xlabel(r'$\alpha \in [0,1]$')
  157. ax.set_ylabel(r'$d(g_i,g^\star(\alpha))$')
  158. #ax.set_title('Runtime of the shortest path kernel on all datasets')
  159. plt.xticks(ind, [str(i)[0:3] for i in alpha1_list])
  160. #ax.set_yticks(np.logspace(-16, -3, num=20, base=10))
  161. #ax.set_ylim(bottom=1e-15)
  162. ax.grid(axis='y', zorder=0)
  163. ax.spines['top'].set_visible(False)
  164. ax.spines['bottom'].set_visible(False)
  165. ax.spines['left'].set_visible(False)
  166. ax.spines['right'].set_visible(False)
  167. ax.xaxis.set_ticks_position('none')
  168. ax.plot(ind, k_dis_datasets, 'b.-', label=r'Nearest neighbor in $D_N$', color='orange', zorder=4)
  169. ax.yaxis.set_ticks_position('none')
  170. fig.subplots_adjust(bottom=.2)
  171. fig.legend(loc='lower center', ncol=2, frameon=False) # , ncol=5, labelspacing=0.1, handletextpad=0.4, columnspacing=0.6)
  172. plt.savefig(dir_save + 'distances in kernel space.eps', format='eps', dpi=300,
  173. transparent=True, bbox_inches='tight')
  174. plt.show()
  175. plt.clf()
  176. plt.close()
  177. if __name__ == '__main__':
  178. # kernel_name = 'PathUpToH'
  179. kernel_name = 'Marginalized'
  180. k_dis_datasets, k_dis_preimages, bests_from_dataset, preimages = xp_random_preimage_generation(kernel_name)
  181. # # save graphs.
  182. # dir_save = dir_root + 'MUTAG.PathUpToH/'
  183. # for i, alpha1 in enumerate(np.linspace(0, 1, 11)):
  184. # fn_best_dataset = dir_save + 'g_best_dataset.' + 'alpha1_' + str(alpha1)[0:3]
  185. # saveGXL(bests_from_dataset[i], fn_best_dataset + '.gxl', method='default',
  186. # node_labels=['label_0'], edge_labels=[],
  187. # node_attrs=[], edge_attrs=[])
  188. # fn_preimage = dir_save + 'g_preimage.' + 'alpha1_' + str(alpha1)[0:3]
  189. # saveGXL(preimages[i], fn_preimage + '.gxl', method='default',
  190. # node_labels=['label_0'], edge_labels=[],
  191. # node_attrs=[], edge_attrs=[])
  192. # # draw graphs.
  193. # dir_save = dir_root + 'MUTAG.PathUpToH/'
  194. # for i, alpha1 in enumerate(np.linspace(0, 1, 11)):
  195. # fn_best_dataset = dir_save + 'g_best_dataset.' + 'alpha1_' + str(alpha1)[0:3]
  196. # __draw_graph(bests_from_dataset[i], fn_best_dataset)
  197. # fn_preimage = dir_save + 'g_preimage.' + 'alpha1_' + str(alpha1)[0:3]
  198. # __draw_graph(preimages[i], fn_preimage)
  199. # # plot results figure.
  200. # alpha1_list = np.linspace(0, 1, 11)
  201. # dir_save = dir_root + 'MUTAG.PathUpToH/'
  202. # __plot_results(alpha1_list, k_dis_datasets, k_dis_preimages, dir_save)
  203. # k_dis_datasets = [0.0,
  204. # 0.08882515554098754,
  205. # 0.17765031108197632,
  206. # 0.2664754666229643,
  207. # 0.35530062216395264,
  208. # 0.44412577770494066,
  209. # 0.35530062216395236,
  210. # 0.2664754666229643,
  211. # 0.17765031108197632,
  212. # 0.08882515554098878,
  213. # 0.0]
  214. # k_dis_preimages = [0.0,
  215. # 0.08882515554098754,
  216. # 0.17765031108197632,
  217. # 0.2664754666229643,
  218. # 0.35530062216395264,
  219. # 0.44412577770494066,
  220. # 0.35530062216395236,
  221. # 0.2664754666229643,
  222. # 0.17765031108197632,
  223. # 0.08882515554098878,
  224. # 0.0]

A Python package for graph kernels, graph edit distances and graph pre-image problem.