You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

Analysis_stability.ratios.real_data.relative_error.py 11 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312
  1. #!/usr/bin/env python3
  2. # -*- coding: utf-8 -*-
  3. """
  4. Created on Tue Nov 6 15:35:32 2018
  5. @author: ljia
  6. """
  7. #import numpy as np
  8. import matplotlib.pyplot as plt
  9. import numpy as np
  10. import matplotlib.gridspec as gridspec
  11. # import pickle
  12. import os
  13. import sys
  14. from tqdm import tqdm
  15. # from mpl_toolkits.mplot3d import Axes3D
  16. root_dir = '/media/ljia/DATA/research-repo/codes/Linlin/graphkit-learn/gklearn/experiments/ged/stability/outputs/'
  17. root_dir_criann = '/media/ljia/DATA/research-repo/codes/Linlin/graphkit-learn/gklearn/experiments/ged/stability/outputs/CRIANN/'
  18. Dataset_List = ['MAO', 'Monoterpenoides', 'MUTAG', 'AIDS_symb']
  19. Legend_Labels = ['common walk', 'marginalized', 'Sylvester equation', 'conjugate gradient', 'fixed-point iterations', 'Spectral decomposition', 'shortest path', 'structural sp', 'path up to length $h$', 'treelet', 'WL subtree']
  20. # Colors = ['#084594', '#2171b5', '#4292c6', '#6baed6', '#9ecae1', '#c6dbef',
  21. # '#54278f', '#756bb1', '#9e9ac8', '#de2d26', '#fc9272']
  22. Colors=[
  23. '#1f77b4', '#aec7e8', '#ff7f0e', '#ffbb78', '#2ca02c', '#98df8a',
  24. '#d62728', '#ff9896', '#9467bd', '#c5b0d5', '#8c564b', '#c49c94',
  25. '#e377c2', '#f7b6d2', '#7f7f7f', '#c7c7c7', '#bcbd22', '#dbdb8d',
  26. '#17becf', '#9edae5']
  27. SMALL_SIZE = 8
  28. MEDIUM_SIZE = 10
  29. BIGGER_SIZE = 12
  30. def read_trials_group(save_dir, ds_name, num_sols, ratio, label):
  31. file_name = save_dir + 'groups/ged_mats.' + ds_name + '.' + label + '_' + str(num_sols) + '.ratio_' + "{:.2f}".format(ratio) + '.npy'
  32. if os.path.isfile(file_name):
  33. with open(file_name, 'rb') as f:
  34. ged_mats = np.load(f)
  35. return ged_mats
  36. else:
  37. return []
  38. # ged_mats = []
  39. # for trial in range(1, 101):
  40. # file_name = file_prefix + '.trial_' + str(trial) + '.pkl'
  41. # if os.path.isfile(file_name):
  42. # ged_matrix = pickle.load(open(file_name, 'rb'))
  43. # ged_mats.append(ged_matrix)
  44. # else:
  45. # # print(trial)
  46. # pass
  47. # Check average relative error along elements in two ged matrices.
  48. def matrices_ave_relative_error(m1, m2):
  49. error = 0
  50. base = 0
  51. for i in range(m1.shape[0]):
  52. for j in range(m1.shape[1]):
  53. error += np.abs(m1[i, j] - m2[i, j])
  54. base += (np.abs(m1[i, j]) + np.abs(m2[i, j])) / 2
  55. return error / base
  56. def compute_relative_error(ged_mats):
  57. if len(ged_mats) != 0:
  58. # get the smallest "correct" GED matrix.
  59. ged_mat_s = np.ones(ged_mats[0].shape) * np.inf
  60. for i in range(ged_mats[0].shape[0]):
  61. for j in range(ged_mats[0].shape[1]):
  62. ged_mat_s[i, j] = np.min([mat[i, j] for mat in ged_mats])
  63. # compute average error.
  64. errors = []
  65. for i, mat in enumerate(ged_mats):
  66. err = matrices_ave_relative_error(mat, ged_mat_s)
  67. # if not per_correct:
  68. # print('matrix # ', str(i))
  69. # pass
  70. errors.append(err)
  71. else:
  72. errors = [0]
  73. return np.mean(errors)
  74. #plt.rc('font', size=SMALL_SIZE) # controls default text sizes
  75. plt.rc('axes', titlesize=15) # fontsize of the axes title
  76. plt.rc('axes', labelsize=15) # fontsize of the x and y labels
  77. plt.rc('xtick', labelsize=15) # fontsize of the tick labels
  78. plt.rc('ytick', labelsize=15) # fontsize of the tick labels
  79. plt.rc('legend', fontsize=15) # legend fontsize
  80. plt.rc('figure', titlesize=15) # fontsize of the figure title
  81. #fig, _ = plt.subplots(2, 2, figsize=(13, 12))
  82. #ax1 = plt.subplot(221)
  83. #ax2 = plt.subplot(222)
  84. #ax3 = plt.subplot(223)
  85. #ax4 = plt.subplot(224)
  86. gs = gridspec.GridSpec(2, 2)
  87. gs.update(hspace=0.3)
  88. fig = plt.figure(figsize=(11, 12))
  89. ax = fig.add_subplot(111) # The big subplot for common labels
  90. ax1 = fig.add_subplot(gs[0, 0], projection='3d')
  91. ax2 = fig.add_subplot(gs[0, 1], projection='3d')
  92. ax3 = fig.add_subplot(gs[1, 0], projection='3d')
  93. ax4 = fig.add_subplot(gs[1, 1], projection='3d')
  94. # ax5 = fig.add_subplot(gs[2, 0])
  95. # ax6 = fig.add_subplot(gs[2, 1])
  96. # Turn off axis lines and ticks of the big subplot
  97. ax.spines['top'].set_color('none')
  98. ax.spines['bottom'].set_color('none')
  99. ax.spines['left'].set_color('none')
  100. ax.spines['right'].set_color('none')
  101. ax.tick_params(labelcolor='w', top='off', bottom='off', left='off', right='off')
  102. ax.xaxis.set_ticks_position('none')
  103. ax.yaxis.set_ticks_position('none')
  104. # Set common labels
  105. #ax.set_xlabel('accuracy(%)')
  106. ax.yaxis.set_label_coords(-0.105, 0.5)
  107. # ax.set_ylabel('runtime($s$)')
  108. # -------------- num_sols, IPFP --------------
  109. def get_num_sol_results():
  110. save_dir = root_dir_criann + 'edit_costs.num_sols.ratios.IPFP/'
  111. errors = {}
  112. print('-------- num_sols, IPFP --------')
  113. for ds_name in Dataset_List:
  114. print(ds_name)
  115. errors[ds_name] = []
  116. for num_sols in [1, 10, 20, 30, 40, 50, 60, 70, 80, 90, 100]:
  117. errors[ds_name].append([])
  118. for ratio in tqdm([0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10], desc='num_sols = ' + str(num_sols), file=sys.stdout):
  119. ged_mats = read_trials_group(save_dir, ds_name, num_sols, ratio, 'num_sols')
  120. error = compute_relative_error(ged_mats)
  121. errors[ds_name][-1].append(error)
  122. return errors
  123. x_values = [1, 10, 20, 30, 40, 50, 60, 70, 80, 90, 100]
  124. y_values = range(0, 19)
  125. X, Y = np.meshgrid(x_values, y_values)
  126. errors = get_num_sol_results()
  127. for i, ds_name in enumerate(Dataset_List):
  128. if ds_name in errors:
  129. z_values = np.array(errors[ds_name])
  130. ax1.plot_wireframe(X, Y, z_values.T, label=Dataset_List[i], color=Colors[i]) #, '.-', label=Legend_Labels[i], color=Colors[i])
  131. # ax1.set_yscale('squareroot')
  132. # ax1.grid(axis='y')
  133. ax1.set_xlabel('# of solutions')
  134. ax1.set_ylabel('ratios')
  135. ax1.set_zlabel('average relative errors (%)')
  136. ax1.set_title('(a) num_sols, IPFP')
  137. ax1.set_yticks(range(0, 19, 2))
  138. ax1.set_yticklabels([0.1, 0.3, 0.5, 0.7, 0.9, 2, 4, 6, 8, 10])
  139. # ax1.set_axisbelow(True)
  140. # ax1.spines['top'].set_visible(False)
  141. # ax1.spines['bottom'].set_visible(False)
  142. # ax1.spines['right'].set_visible(False)
  143. # ax1.spines['left'].set_visible(False)
  144. # ax1.xaxis.set_ticks_position('none')
  145. # ax1.yaxis.set_ticks_position('none')
  146. # ax1.set_ylim(bottom=-1000)
  147. handles, labels = ax1.get_legend_handles_labels()
  148. # # -------------- repeats, IPFP --------------
  149. def get_repeats_results():
  150. save_dir = root_dir_criann + 'edit_costs.repeats.ratios.IPFP/'
  151. errors = {}
  152. print('-------- repeats, IPFP --------')
  153. for ds_name in Dataset_List:
  154. print(ds_name)
  155. errors[ds_name] = []
  156. for num_sols in [1, 10, 20, 30, 40, 50, 60, 70, 80, 90, 100]:
  157. errors[ds_name].append([])
  158. for ratio in tqdm([0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10], desc='num_sols = ' + str(num_sols), file=sys.stdout):
  159. ged_mats = read_trials_group(save_dir, ds_name, num_sols, ratio, 'repeats')
  160. error = compute_relative_error(ged_mats)
  161. errors[ds_name][-1].append(error)
  162. return errors
  163. x_values = [1, 10, 20, 30, 40, 50, 60, 70, 80, 90, 100]
  164. y_values = range(0, 19)
  165. X, Y = np.meshgrid(x_values, y_values)
  166. errors = get_repeats_results()
  167. for i, ds_name in enumerate(Dataset_List):
  168. if ds_name in errors:
  169. z_values = np.array(errors[ds_name])
  170. ax2.plot_wireframe(X, Y, z_values.T, label=Dataset_List[i], color=Colors[i]) #, '.-', label=Legend_Labels[i], color=Colors[i])
  171. # ax2.set_yscale('squareroot')
  172. # ax2.grid(axis='y')
  173. ax2.set_xlabel('# of solutions')
  174. ax2.set_ylabel('ratios')
  175. ax2.set_zlabel('average relative errors (%)')
  176. ax2.set_title('(b) repeats, IPFP')
  177. ax2.set_yticks(range(0, 19, 2))
  178. ax2.set_yticklabels([0.1, 0.3, 0.5, 0.7, 0.9, 2, 4, 6, 8, 10])
  179. # ax2.set_axisbelow(True)
  180. # ax2.spines['top'].set_visible(False)
  181. # ax2.spines['bottom'].set_visible(False)
  182. # ax2.spines['right'].set_visible(False)
  183. # ax2.spines['left'].set_visible(False)
  184. # ax2.xaxis.set_ticks_position('none')
  185. # ax2.yaxis.set_ticks_position('none')
  186. # ax2.set_ylim(bottom=-1000)
  187. handles, labels = ax2.get_legend_handles_labels()
  188. # # -------------- degrees --------------
  189. # def get_degree_results():
  190. # save_dir = root_dir_criann + '28 cores/synthesized_graphs_degrees/'
  191. # run_times = {}
  192. # for kernel_name in Graph_Kernel_List:
  193. # run_times[kernel_name] = []
  194. # for num in [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]:
  195. # file_name = save_dir + 'run_time.' + kernel_name + '.' + str(num) + '.pkl'
  196. # if os.path.isfile(file_name):
  197. # run_time = pickle.load(open(file_name, 'rb'))
  198. # else:
  199. # run_time = 0
  200. # run_times[kernel_name].append(run_time)
  201. # return run_times
  202. # x_labels = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
  203. # run_times = get_degree_results()
  204. # for i, kernel_name in enumerate(Graph_Kernel_List):
  205. # if kernel_name in run_times:
  206. # ax3.plot(x_labels, run_times[kernel_name], '.-', label=Legend_Labels[i], color=Colors[i])
  207. # ax3.set_yscale('log', nonposy='clip')
  208. # ax3.grid(axis='y')
  209. # ax3.set_xlabel('degrees')
  210. # ax3.set_ylabel('runtime($s$)')
  211. # #ax3.set_ylabel('runtime($s$) per pair of graphs')
  212. # ax3.set_title('(c) degrees')
  213. # ax3.set_axisbelow(True)
  214. # ax3.spines['top'].set_visible(False)
  215. # ax3.spines['bottom'].set_visible(False)
  216. # ax3.spines['right'].set_visible(False)
  217. # ax3.spines['left'].set_visible(False)
  218. # ax3.xaxis.set_ticks_position('none')
  219. # ax3.yaxis.set_ticks_position('none')
  220. # # -------------- Node labels --------------
  221. # def get_node_label_results():
  222. # save_dir = root_dir_criann + '28 cores/synthesized_graphs_num_node_label_alphabet/'
  223. # run_times = {}
  224. # for kernel_name in Graph_Kernel_List_VSym:
  225. # run_times[kernel_name] = []
  226. # for num in [0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20]:
  227. # file_name = save_dir + 'run_time.' + kernel_name + '.' + str(num) + '.pkl'
  228. # if os.path.isfile(file_name):
  229. # run_time = pickle.load(open(file_name, 'rb'))
  230. # else:
  231. # run_time = 0
  232. # run_times[kernel_name].append(run_time)
  233. # return run_times
  234. # # save_dir = root_dir_criann + 'synthesized_graphs_num_node_label_alphabet/'
  235. # # run_times = pickle.load(open(save_dir + 'run_times.pkl', 'rb'))
  236. # # return run_times
  237. # x_labels = [0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20]
  238. # run_times = get_node_label_results()
  239. # for i, kernel_name in enumerate(Graph_Kernel_List):
  240. # if kernel_name in run_times:
  241. # ax4.plot(x_labels[1:], run_times[kernel_name][1:], '.-', label=Legend_Labels[i], color=Colors[i])
  242. # ax4.set_yscale('log', nonposy='clip')
  243. # ax4.grid(axis='y')
  244. # ax4.set_xlabel('# of alphabets')
  245. # ax4.set_ylabel('runtime($s$)')
  246. # #ax4.set_ylabel('runtime($s$) per pair of graphs')
  247. # ax4.set_title('(d) alphabet size of vertex labels')
  248. # ax4.set_axisbelow(True)
  249. # ax4.spines['top'].set_visible(False)
  250. # ax4.spines['bottom'].set_visible(False)
  251. # ax4.spines['right'].set_visible(False)
  252. # ax4.spines['left'].set_visible(False)
  253. # ax4.xaxis.set_ticks_position('none')
  254. # ax4.yaxis.set_ticks_position('none')
  255. from matplotlib.lines import Line2D
  256. custom_lines = []
  257. for color in Colors:
  258. custom_lines.append(Line2D([0], [0], color=color, lw=4))
  259. fig.subplots_adjust(bottom=0.135)
  260. fig.legend(custom_lines, labels, loc='lower center', ncol=4, frameon=False) # , ncol=5, labelspacing=0.1, handletextpad=0.4, columnspacing=0.6)
  261. plt.savefig('stability.real_data.relative_error.eps', format='eps', dpi=300, transparent=True,
  262. bbox_inches='tight')
  263. plt.show()

A Python package for graph kernels, graph edit distances and graph pre-image problem.