You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

group_results.py 3.0 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108
  1. #!/usr/bin/env python3
  2. # -*- coding: utf-8 -*-
  3. """
  4. Created on Thu Oct 29 17:26:43 2020
  5. @author: ljia
  6. This script groups results together into a single file for the sake of faster
  7. searching and loading.
  8. """
  9. import os
  10. import pickle
  11. import numpy as np
  12. from shutil import copyfile
  13. from tqdm import tqdm
  14. import sys
  15. def group_trials(dir_folder, name_prefix, override, clear, backup):
  16. # Get group name.
  17. label_name = name_prefix.split('.')[0]
  18. if label_name == 'ged_matrix':
  19. group_label = 'ged_mats'
  20. elif label_name == 'runtime':
  21. group_label = 'runtimes'
  22. else:
  23. group_label = label_name
  24. name_suffix = name_prefix[len(label_name):]
  25. if label_name == 'ged_matrix':
  26. name_group = dir_folder + 'groups/' + group_label + name_suffix + 'npy'
  27. else:
  28. name_group = dir_folder + 'groups/' + group_label + name_suffix + 'pkl'
  29. if not override and os.path.isfile(name_group):
  30. # Check if all trial files exist.
  31. trials_complete = True
  32. for trial in range(1, 101):
  33. file_name = dir_folder + name_prefix + 'trial_' + str(trial) + '.pkl'
  34. if not os.path.isfile(file_name):
  35. trials_complete = False
  36. break
  37. else:
  38. # Get data.
  39. data_group = []
  40. for trial in range(1, 101):
  41. file_name = dir_folder + name_prefix + 'trial_' + str(trial) + '.pkl'
  42. if os.path.isfile(file_name):
  43. with open(file_name, 'rb') as f:
  44. data = pickle.load(f)
  45. data_group.append(data)
  46. else: # Not all trials are completed.
  47. return
  48. # Write groups.
  49. if label_name == 'ged_matrix':
  50. data_group = np.array(data_group)
  51. with open(name_group, 'wb') as f:
  52. np.save(f, data_group)
  53. else:
  54. with open(name_group, 'wb') as f:
  55. pickle.dump(data_group, f)
  56. trials_complete = True
  57. if trials_complete:
  58. # Backup.
  59. if backup:
  60. for trial in range(1, 101):
  61. src = dir_folder + name_prefix + 'trial_' + str(trial) + '.pkl'
  62. dst = dir_folder + 'backups/' + name_prefix + 'trial_' + str(trial) + '.pkl'
  63. copyfile(src, dst)
  64. # Clear.
  65. if clear:
  66. for trial in range(1, 101):
  67. src = dir_folder + name_prefix + 'trial_' + str(trial) + '.pkl'
  68. os.remove(src)
  69. def group_all_in_folder(dir_folder, override=False, clear=True, backup=True):
  70. # Create folders.
  71. if not os.path.exists(dir_folder + 'groups/'):
  72. os.makedirs(dir_folder + 'groups/')
  73. if backup:
  74. if not os.path.exists(dir_folder + 'backups'):
  75. os.makedirs(dir_folder + 'backups')
  76. # Iterate all files.
  77. cur_file_prefix = ''
  78. for file in tqdm(sorted(os.listdir(dir_folder)), desc='Grouping', file=sys.stdout):
  79. if os.path.isfile(os.path.join(dir_folder, file)):
  80. name_prefix = file.split('trial_')[0]
  81. # print(name)
  82. # print(name_prefix)
  83. if name_prefix != cur_file_prefix:
  84. group_trials(dir_folder, name_prefix, override, clear, backup)
  85. cur_file_prefix = name_prefix
  86. if __name__ == '__main__':
  87. dir_folder = 'outputs/CRIANN/edit_costs.num_sols.ratios.IPFP/'
  88. group_all_in_folder(dir_folder)
  89. dir_folder = 'outputs/CRIANN/edit_costs.repeats.ratios.IPFP/'
  90. group_all_in_folder(dir_folder)

A Python package for graph kernels, graph edit distances and graph pre-image problem.