You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

group_results.py 4.7 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174
  1. #!/usr/bin/env python3
  2. # -*- coding: utf-8 -*-
  3. """
  4. Created on Thu Oct 29 17:26:43 2020
  5. @author: ljia
  6. This script groups results together into a single file for the sake of faster
  7. searching and loading.
  8. """
  9. import os
  10. import pickle
  11. import numpy as np
  12. from shutil import copyfile
  13. from tqdm import tqdm
  14. import sys
  15. def check_group_existence(file_name):
  16. path, name = os.path.split(file_name)
  17. marker_fn = os.path.join(path, 'group_names_finished.pkl')
  18. if os.path.isfile(marker_fn):
  19. with open(marker_fn, 'rb') as f:
  20. fns = pickle.load(f)
  21. if name in fns:
  22. return True
  23. if os.path.isfile(file_name):
  24. return True
  25. return False
  26. def update_group_marker(file_name):
  27. path, name = os.path.split(file_name)
  28. marker_fn = os.path.join(path, 'group_names_finished.pkl')
  29. if os.path.isfile(marker_fn):
  30. with open(marker_fn, 'rb') as f:
  31. fns = pickle.loads(f)
  32. if name in fns:
  33. return
  34. else:
  35. fns.add(name)
  36. else:
  37. fns = set({name})
  38. with open(marker_fn, 'wb') as f:
  39. pickle.dump(fns, f)
  40. def create_group_marker_file(dir_folder, overwrite=True):
  41. if not overwrite:
  42. return
  43. fns = set()
  44. for file in sorted(os.listdir(dir_folder)):
  45. if os.path.isfile(os.path.join(dir_folder, file)):
  46. if file.endswith('.npy'):
  47. fns.add(file)
  48. marker_fn = os.path.join(dir_folder, 'group_names_finished.pkl')
  49. with open(marker_fn, 'wb') as f:
  50. pickle.dump(fns, f)
  51. # This function is used by other scripts. Modify it carefully.
  52. def group_trials(dir_folder, name_prefix, overwrite, clear, backup, num_trials=100):
  53. # Get group name.
  54. label_name = name_prefix.split('.')[0]
  55. if label_name == 'ged_matrix':
  56. group_label = 'ged_mats'
  57. elif label_name == 'runtime':
  58. group_label = 'runtimes'
  59. else:
  60. group_label = label_name
  61. name_suffix = name_prefix[len(label_name):]
  62. if label_name == 'ged_matrix':
  63. name_group = dir_folder + 'groups/' + group_label + name_suffix + 'npy'
  64. else:
  65. name_group = dir_folder + 'groups/' + group_label + name_suffix + 'pkl'
  66. if not overwrite and os.path.isfile(name_group):
  67. # Check if all trial files exist.
  68. trials_complete = True
  69. for trial in range(1, num_trials + 1):
  70. file_name = dir_folder + name_prefix + 'trial_' + str(trial) + '.pkl'
  71. if not os.path.isfile(file_name):
  72. trials_complete = False
  73. break
  74. else:
  75. # Get data.
  76. data_group = []
  77. for trial in range(1, num_trials + 1):
  78. file_name = dir_folder + name_prefix + 'trial_' + str(trial) + '.pkl'
  79. if os.path.isfile(file_name):
  80. with open(file_name, 'rb') as f:
  81. try:
  82. data = pickle.load(f)
  83. except EOFError:
  84. print('EOF Error occurred.')
  85. return
  86. data_group.append(data)
  87. # unpickler = pickle.Unpickler(f)
  88. # data = unpickler.load()
  89. # if not isinstance(data, np.array):
  90. # return
  91. # else:
  92. # data_group.append(data)
  93. else: # Not all trials are completed.
  94. return
  95. # Write groups.
  96. if label_name == 'ged_matrix':
  97. data_group = np.array(data_group)
  98. with open(name_group, 'wb') as f:
  99. np.save(f, data_group)
  100. else:
  101. with open(name_group, 'wb') as f:
  102. pickle.dump(data_group, f)
  103. trials_complete = True
  104. if trials_complete:
  105. # Backup.
  106. if backup:
  107. for trial in range(1, num_trials + 1):
  108. src = dir_folder + name_prefix + 'trial_' + str(trial) + '.pkl'
  109. dst = dir_folder + 'backups/' + name_prefix + 'trial_' + str(trial) + '.pkl'
  110. copyfile(src, dst)
  111. # Clear.
  112. if clear:
  113. for trial in range(1, num_trials + 1):
  114. src = dir_folder + name_prefix + 'trial_' + str(trial) + '.pkl'
  115. os.remove(src)
  116. def group_all_in_folder(dir_folder, overwrite=False, clear=True, backup=True):
  117. # Create folders.
  118. os.makedirs(dir_folder + 'groups/', exist_ok=True)
  119. if backup:
  120. os.makedirs(dir_folder + 'backups', exist_ok=True)
  121. # Iterate all files.
  122. cur_file_prefix = ''
  123. for file in tqdm(sorted(os.listdir(dir_folder)), desc='Grouping', file=sys.stdout):
  124. if os.path.isfile(os.path.join(dir_folder, file)):
  125. name_prefix = file.split('trial_')[0]
  126. # print(name)
  127. # print(name_prefix)
  128. if name_prefix != cur_file_prefix:
  129. group_trials(dir_folder, name_prefix, overwrite, clear, backup)
  130. cur_file_prefix = name_prefix
  131. if __name__ == '__main__':
  132. # dir_folder = 'outputs/CRIANN/edit_costs.num_sols.ratios.IPFP/'
  133. # group_all_in_folder(dir_folder)
  134. # dir_folder = 'outputs/CRIANN/edit_costs.repeats.ratios.IPFP/'
  135. # group_all_in_folder(dir_folder)
  136. # dir_folder = 'outputs/CRIANN/edit_costs.max_num_sols.ratios.bipartite/'
  137. # group_all_in_folder(dir_folder)
  138. # dir_folder = 'outputs/CRIANN/edit_costs.repeats.ratios.bipartite/'
  139. # group_all_in_folder(dir_folder)
  140. dir_folder = 'outputs/edit_costs.real_data.num_sols.ratios.IPFP/groups/'
  141. create_group_marker_file(dir_folder)

A Python package for graph kernels, graph edit distances and graph pre-image problem.