You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

preimage_results_to_latex_tables.py 8.5 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228
  1. #!/usr/bin/env python3
  2. # -*- coding: utf-8 -*-
  3. """
  4. Created on Thu Apr 30 10:16:33 2020
  5. @author: ljia
  6. """
  7. import pandas as pd
  8. import numpy as np
  9. import os
  10. DS_SYMB = ['MUTAG', 'Monoterpenoides', 'MAO_symb']
  11. DS_NON_SYMB = ['Letter-high', 'Letter-med', 'Letter-low', 'COIL-RAG', 'PAH']
  12. DS_UNLABELED = ['PAH_unlabeled']
  13. def rounder(x, decimals):
  14. x_strs = str(x).split('.')
  15. if len(x_strs) == 2:
  16. before = x_strs[0]
  17. after = x_strs[1]
  18. if len(after) > decimals:
  19. if int(after[decimals]) >= 5:
  20. after0s = ''
  21. for c in after:
  22. if c == '0':
  23. after0s += '0'
  24. elif c != '0':
  25. break
  26. after = after0s + str(int(after[0:decimals]) + 1)[-decimals:]
  27. else:
  28. after = after[0:decimals]
  29. elif len(after) < decimals:
  30. after += '0' * (decimals - len(after))
  31. return before + '.' + after
  32. elif len(x_strs) == 1:
  33. return x_strs[0]
  34. def replace_nth(string, sub, wanted, n):
  35. import re
  36. where = [m.start() for m in re.finditer(sub, string)][n-1]
  37. before = string[:where]
  38. after = string[where:]
  39. after = after.replace(sub, wanted, 1)
  40. newString = before + after
  41. return newString
  42. def df_to_latex_table(df):
  43. ltx = df.to_latex(index=True, escape=False, multirow=True)
  44. # modify middle lines.
  45. ltx = ltx.replace('\\cline{1-9}\n\\cline{2-9}', '\\toprule')
  46. ltx = ltx.replace('\\cline{2-9}', '\\cmidrule(l){2-9}')
  47. # modify first row.
  48. i_start = ltx.find('\n\\toprule\n')
  49. i_end = ltx.find('\\\\\n\\midrule\n')
  50. ltx = ltx.replace(ltx[i_start:i_end+12], '\n\\toprule\nDatasets & Graph Kernels & Algorithms & $d_\\mathcal{F}$ SM & $d_\\mathcal{F}$ SM (UO) & $d_\\mathcal{F}$ GM & $d_\\mathcal{F}$ GM (UO) & Runtime & Runtime (UO) \\\\\n\\midrule\n', 1)
  51. # add row numbers.
  52. ltx = ltx.replace('lllllllll', 'lllllllll|@{\\makebox[2em][r]{\\textit{\\rownumber\\space}}}', 1)
  53. ltx = replace_nth(ltx, '\\\\\n', '\\gdef\\rownumber{\\stepcounter{magicrownumbers}\\arabic{magicrownumbers}} \\\\\n', 1)
  54. return ltx
  55. def beautify_df(df):
  56. df = df.sort_values(by=['Datasets', 'Graph Kernels'])
  57. df = df.set_index(['Datasets', 'Graph Kernels', 'Algorithms'])
  58. # index = pd.MultiIndex.from_frame(df[['Datasets', 'Graph Kernels', 'Algorithms']])
  59. # bold the best results.
  60. for ds in df.index.get_level_values('Datasets').unique():
  61. for gk in df.loc[ds].index.get_level_values('Graph Kernels').unique():
  62. min_val = np.inf
  63. min_indices = []
  64. min_labels = []
  65. for index, row in df.loc[(ds, gk)].iterrows():
  66. for label in ['$d_\mathcal{F}$ SM', '$d_\mathcal{F}$ GM', '$d_\mathcal{F}$ GM (UO)']:
  67. value = row[label]
  68. if value != '-':
  69. value = float(value.strip('/same'))
  70. if value < min_val:
  71. min_val = value
  72. min_indices = [index]
  73. min_labels = [label]
  74. elif value == min_val:
  75. min_indices.append(index)
  76. min_labels.append(label)
  77. for idx, index in enumerate(min_indices):
  78. df.loc[(ds, gk, index), min_labels[idx]] = '\\textbf{' + df.loc[(ds, gk, index), min_labels[idx]] + '}'
  79. return df
  80. def get_results(data_dir, ds_name, gkernel):
  81. # get results from .csv.
  82. file_name = data_dir + 'results_summary.' + ds_name + '.' + gkernel + '.csv'
  83. try:
  84. df_summary = pd.read_csv(file_name)
  85. except FileNotFoundError:
  86. return None
  87. df_results = pd.DataFrame(index=None, columns=['d_F SM', 'd_F GM', 'runtime'])
  88. for index, row in df_summary.iterrows():
  89. if row['target'] == 'all' and row['fit method'] == 'k-graphs':
  90. df_results.loc['From median set'] = ['-', rounder(row['min dis_k gi'], 3), '-']
  91. if_uo = (int(row['mge num decrease order']) > 0 or int(row['mge num increase order']) > 0)
  92. df_results.loc['Optimized'] = [rounder(row['dis_k SM'], 3),
  93. rounder(row['dis_k GM'], 3) if if_uo else (rounder(row['dis_k GM'], 3) + '/same'),
  94. rounder(row['time total'], 2)]
  95. if row['target'] == 'all' and row['fit method'] == 'expert':
  96. if_uo = (int(row['mge num decrease order']) > 0 or int(row['mge num increase order']) > 0)
  97. df_results.loc['IAM: expert costs'] = [rounder(row['dis_k SM'], 3),
  98. rounder(row['dis_k GM'], 3) if if_uo else (rounder(row['dis_k GM'], 3) + '/same'),
  99. rounder(row['time total'], 2)]
  100. # get results from random summary .csv.
  101. random_fini = True
  102. file_name = data_dir + 'summary_for_random_edit_costs.csv'
  103. try:
  104. df_random = pd.read_csv(file_name)
  105. except FileNotFoundError:
  106. random_fini = False
  107. if random_fini:
  108. for index, row in df_random.iterrows():
  109. if row['measure'] == 'mean':
  110. if_uo = (float(row['mge num decrease order']) > 0 or float(row['mge num increase order']) > 0)
  111. df_results.loc['IAM: random costs'] = [rounder(row['dis_k SM'], 3),
  112. rounder(row['dis_k GM'], 3) if if_uo else (rounder(row['dis_k GM'], 3) + '/same'),
  113. rounder(row['time total'], 2)]
  114. # sort index.
  115. df_results = df_results.reindex([item for item in ['From median set', 'IAM: random costs', 'IAM: expert costs', 'Optimized'] if item in df_results.index])
  116. return df_results
  117. def get_results_of_one_xp(data_dir, ds_name, gkernel):
  118. df_results = pd.DataFrame()
  119. df_tmp_uo = None
  120. if not os.path.isfile(data_dir + 'update_order/error.txt'):
  121. df_tmp_uo = get_results(data_dir + 'update_order/', ds_name, gkernel)
  122. df_tmp = None
  123. if not os.path.isfile(data_dir + 'error.txt'):
  124. df_tmp = get_results(data_dir, ds_name, gkernel)
  125. if (df_tmp_uo is not None and not df_tmp_uo.empty) or (df_tmp is not None and not df_tmp.empty):
  126. df_results = pd.DataFrame(index=['From median set', 'IAM: random costs', 'IAM: expert costs', 'Optimized'], columns=['$d_\mathcal{F}$ SM', '$d_\mathcal{F}$ SM (UO)', '$d_\mathcal{F}$ GM', '$d_\mathcal{F}$ GM (UO)', 'Runtime', 'Runtime (UO)'])
  127. if df_tmp_uo is not None and not df_tmp_uo.empty:
  128. for index, row in df_tmp_uo.iterrows():
  129. for algo in df_results.index:
  130. if index == algo:
  131. df_results.at[algo, '$d_\mathcal{F}$ SM (UO)'] = row['d_F SM']
  132. df_results.at[algo, '$d_\mathcal{F}$ GM (UO)'] = row['d_F GM']
  133. df_results.at[algo, 'Runtime (UO)'] = row['runtime']
  134. if df_tmp is not None and not df_tmp.empty:
  135. for index, row in df_tmp.iterrows():
  136. for algo in df_results.index:
  137. if index == algo:
  138. df_results.at[algo, '$d_\mathcal{F}$ SM'] = row['d_F SM']
  139. df_results.at[algo, '$d_\mathcal{F}$ GM'] = row['d_F GM'].strip('/same')
  140. df_results.at[algo, 'Runtime'] = row['runtime']
  141. df_results = df_results.dropna(axis=0, how='all')
  142. df_results = df_results.fillna(value='-')
  143. df_results = df_results.reset_index().rename(columns={'index': 'Algorithms'})
  144. return df_results
  145. def get_results_for_all_experiments(root_dir):
  146. columns=['Datasets', 'Graph Kernels', 'Algorithms', '$d_\mathcal{F}$ SM', '$d_\mathcal{F}$ SM (UO)', '$d_\mathcal{F}$ GM', '$d_\mathcal{F}$ GM (UO)', 'Runtime', 'Runtime (UO)']
  147. df_symb = pd.DataFrame(columns=columns)
  148. df_nonsymb = pd.DataFrame(columns=columns)
  149. df_unlabeled = pd.DataFrame(columns=columns)
  150. dir_list = [i for i in os.listdir(root_dir) if os.path.isdir(root_dir + i)]
  151. for dir_name in dir_list:
  152. sp_tmp = dir_name.split('.')
  153. gkernel = sp_tmp[1]
  154. ds_name = sp_tmp[0].strip('[error]')
  155. suffix = ''
  156. if sp_tmp[-1] == 'unlabeled':
  157. suffix = '_unlabeled'
  158. elif sp_tmp[-1] == 'symb':
  159. suffix = '_symb'
  160. df_results = get_results_of_one_xp(root_dir + dir_name + '/', ds_name, gkernel)
  161. if not df_results.empty:
  162. ds_name += suffix
  163. if ds_name in DS_SYMB:
  164. for index, row in df_results.iterrows():
  165. df_symb.loc[len(df_symb)] = [ds_name.replace('_', '\_'), gkernel] + row.tolist()
  166. elif ds_name in DS_NON_SYMB:
  167. for index, row in df_results.iterrows():
  168. df_nonsymb.loc[len(df_nonsymb)] = [ds_name.replace('_', '\_'), gkernel] + row.tolist()
  169. elif ds_name in DS_UNLABELED:
  170. for index, row in df_results.iterrows():
  171. df_unlabeled.loc[len(df_unlabeled)] = [ds_name.replace('_', '\_'), gkernel] + row.tolist()
  172. else:
  173. raise Exception('dataset' + ds_name + 'is not pre-defined.')
  174. # sort.
  175. df_symb = beautify_df(df_symb)
  176. df_nonsymb = beautify_df(df_nonsymb)
  177. df_unlabeled = beautify_df(df_unlabeled)
  178. # convert dfs to latex strings.
  179. ltx_symb = df_to_latex_table(df_symb)
  180. ltx_nonsymb = df_to_latex_table(df_nonsymb)
  181. ltx_unlabeled = df_to_latex_table(df_unlabeled)
  182. return ltx_symb, ltx_nonsymb, ltx_unlabeled
  183. if __name__ == '__main__':
  184. # root_dir = '../results/xp_median_preimage.init20/'
  185. root_dir = '../../results/CRIANN/xp_median_preimage.init10/'
  186. ltx_symb, ltx_nonsymb, ltx_unlabeled = get_results_for_all_experiments(root_dir)

A Python package for graph kernels, graph edit distances and graph pre-image problem.