You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

group_results.py 3.5 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125
  1. #!/usr/bin/env python3
  2. # -*- coding: utf-8 -*-
  3. """
  4. Created on Thu Oct 29 17:26:43 2020
  5. @author: ljia
  6. This script groups results together into a single file for the sake of faster
  7. searching and loading.
  8. """
  9. import os
  10. import pickle
  11. import numpy as np
  12. from shutil import copyfile
  13. from tqdm import tqdm
  14. import sys
  15. # This function is used by other scripts. Modify it carefully.
  16. def group_trials(dir_folder, name_prefix, override, clear, backup):
  17. # Get group name.
  18. label_name = name_prefix.split('.')[0]
  19. if label_name == 'ged_matrix':
  20. group_label = 'ged_mats'
  21. elif label_name == 'runtime':
  22. group_label = 'runtimes'
  23. else:
  24. group_label = label_name
  25. name_suffix = name_prefix[len(label_name):]
  26. if label_name == 'ged_matrix':
  27. name_group = dir_folder + 'groups/' + group_label + name_suffix + 'npy'
  28. else:
  29. name_group = dir_folder + 'groups/' + group_label + name_suffix + 'pkl'
  30. if not override and os.path.isfile(name_group):
  31. # Check if all trial files exist.
  32. trials_complete = True
  33. for trial in range(1, 101):
  34. file_name = dir_folder + name_prefix + 'trial_' + str(trial) + '.pkl'
  35. if not os.path.isfile(file_name):
  36. trials_complete = False
  37. break
  38. else:
  39. # Get data.
  40. data_group = []
  41. for trial in range(1, 101):
  42. file_name = dir_folder + name_prefix + 'trial_' + str(trial) + '.pkl'
  43. if os.path.isfile(file_name):
  44. with open(file_name, 'rb') as f:
  45. try:
  46. data = pickle.load(f)
  47. except EOFError:
  48. print('EOF Error occurred.')
  49. return
  50. data_group.append(data)
  51. # unpickler = pickle.Unpickler(f)
  52. # data = unpickler.load()
  53. # if not isinstance(data, np.array):
  54. # return
  55. # else:
  56. # data_group.append(data)
  57. else: # Not all trials are completed.
  58. return
  59. # Write groups.
  60. if label_name == 'ged_matrix':
  61. data_group = np.array(data_group)
  62. with open(name_group, 'wb') as f:
  63. np.save(f, data_group)
  64. else:
  65. with open(name_group, 'wb') as f:
  66. pickle.dump(data_group, f)
  67. trials_complete = True
  68. if trials_complete:
  69. # Backup.
  70. if backup:
  71. for trial in range(1, 101):
  72. src = dir_folder + name_prefix + 'trial_' + str(trial) + '.pkl'
  73. dst = dir_folder + 'backups/' + name_prefix + 'trial_' + str(trial) + '.pkl'
  74. copyfile(src, dst)
  75. # Clear.
  76. if clear:
  77. for trial in range(1, 101):
  78. src = dir_folder + name_prefix + 'trial_' + str(trial) + '.pkl'
  79. os.remove(src)
  80. def group_all_in_folder(dir_folder, override=False, clear=True, backup=True):
  81. # Create folders.
  82. os.makedirs(dir_folder + 'groups/', exist_ok=True)
  83. if backup:
  84. os.makedirs(dir_folder + 'backups', exist_ok=True)
  85. # Iterate all files.
  86. cur_file_prefix = ''
  87. for file in tqdm(sorted(os.listdir(dir_folder)), desc='Grouping', file=sys.stdout):
  88. if os.path.isfile(os.path.join(dir_folder, file)):
  89. name_prefix = file.split('trial_')[0]
  90. # print(name)
  91. # print(name_prefix)
  92. if name_prefix != cur_file_prefix:
  93. group_trials(dir_folder, name_prefix, override, clear, backup)
  94. cur_file_prefix = name_prefix
  95. if __name__ == '__main__':
  96. dir_folder = 'outputs/CRIANN/edit_costs.num_sols.ratios.IPFP/'
  97. group_all_in_folder(dir_folder)
  98. dir_folder = 'outputs/CRIANN/edit_costs.repeats.ratios.IPFP/'
  99. group_all_in_folder(dir_folder)
  100. dir_folder = 'outputs/CRIANN/edit_costs.max_num_sols.ratios.bipartite/'
  101. group_all_in_folder(dir_folder)
  102. dir_folder = 'outputs/CRIANN/edit_costs.repeats.ratios.bipartite/'
  103. group_all_in_folder(dir_folder)

A Python package for graph kernels, graph edit distances and graph pre-image problem.