#!/usr/bin/env python3 # -*- coding: utf-8 -*- """ Created on Thu Oct 29 17:26:43 2020 @author: ljia This script groups results together into a single file for the sake of faster searching and loading. """ import os import pickle import numpy as np from shutil import copyfile from tqdm import tqdm import sys def check_group_existence(file_name): path, name = os.path.split(file_name) marker_fn = os.path.join(path, 'group_names_finished.pkl') if os.path.isfile(marker_fn): with open(marker_fn, 'rb') as f: fns = pickle.load(f) if name in fns: return True if os.path.isfile(file_name): return True return False def update_group_marker(file_name): # @todo: possible error when seveal tasks are using this file at the same time. path, name = os.path.split(file_name) marker_fn = os.path.join(path, 'group_names_finished.pkl') if os.path.isfile(marker_fn): with open(marker_fn, 'rb') as f: fns = pickle.load(f) if name in fns: return else: fns.add(name) else: fns = set({name}) with open(marker_fn, 'wb') as f: pickle.dump(fns, f) def create_group_marker_file(dir_folder, overwrite=True): if not overwrite: return fns = set() for file in sorted(os.listdir(dir_folder)): if os.path.isfile(os.path.join(dir_folder, file)): if file.endswith('.npy'): fns.add(file) marker_fn = os.path.join(dir_folder, 'group_names_finished.pkl') with open(marker_fn, 'wb') as f: pickle.dump(fns, f) # This function is used by other scripts. Modify it carefully. def group_trials(dir_folder, name_prefix, overwrite, clear, backup, num_trials=100): # Get group name. label_name = name_prefix.split('.')[0] if label_name == 'ged_matrix': group_label = 'ged_mats' elif label_name == 'runtime': group_label = 'runtimes' else: group_label = label_name name_suffix = name_prefix[len(label_name):] if label_name == 'ged_matrix': name_group = dir_folder + 'groups/' + group_label + name_suffix + 'npy' else: name_group = dir_folder + 'groups/' + group_label + name_suffix + 'pkl' if not overwrite and os.path.isfile(name_group): # Check if all trial files exist. trials_complete = True for trial in range(1, num_trials + 1): file_name = dir_folder + name_prefix + 'trial_' + str(trial) + '.pkl' if not os.path.isfile(file_name): trials_complete = False break else: # Get data. data_group = [] for trial in range(1, num_trials + 1): file_name = dir_folder + name_prefix + 'trial_' + str(trial) + '.pkl' if os.path.isfile(file_name): with open(file_name, 'rb') as f: try: data = pickle.load(f) except EOFError: print('EOF Error occurred.') return data_group.append(data) # unpickler = pickle.Unpickler(f) # data = unpickler.load() # if not isinstance(data, np.array): # return # else: # data_group.append(data) else: # Not all trials are completed. return # Write groups. if label_name == 'ged_matrix': data_group = np.array(data_group) with open(name_group, 'wb') as f: np.save(f, data_group) else: with open(name_group, 'wb') as f: pickle.dump(data_group, f) trials_complete = True if trials_complete: # Backup. if backup: for trial in range(1, num_trials + 1): src = dir_folder + name_prefix + 'trial_' + str(trial) + '.pkl' dst = dir_folder + 'backups/' + name_prefix + 'trial_' + str(trial) + '.pkl' copyfile(src, dst) # Clear. if clear: for trial in range(1, num_trials + 1): src = dir_folder + name_prefix + 'trial_' + str(trial) + '.pkl' os.remove(src) def group_all_in_folder(dir_folder, overwrite=False, clear=True, backup=True): # Create folders. os.makedirs(dir_folder + 'groups/', exist_ok=True) if backup: os.makedirs(dir_folder + 'backups', exist_ok=True) # Iterate all files. cur_file_prefix = '' for file in tqdm(sorted(os.listdir(dir_folder)), desc='Grouping', file=sys.stdout): if os.path.isfile(os.path.join(dir_folder, file)): name_prefix = file.split('trial_')[0] # print(name) # print(name_prefix) if name_prefix != cur_file_prefix: group_trials(dir_folder, name_prefix, overwrite, clear, backup) cur_file_prefix = name_prefix if __name__ == '__main__': # dir_folder = 'outputs/CRIANN/edit_costs.num_sols.ratios.IPFP/' # group_all_in_folder(dir_folder) # dir_folder = 'outputs/CRIANN/edit_costs.repeats.ratios.IPFP/' # group_all_in_folder(dir_folder) # dir_folder = 'outputs/CRIANN/edit_costs.max_num_sols.ratios.bipartite/' # group_all_in_folder(dir_folder) # dir_folder = 'outputs/CRIANN/edit_costs.repeats.ratios.bipartite/' # group_all_in_folder(dir_folder) dir_folder = 'outputs/CRIANN/edit_costs.real_data.num_sols.ratios.IPFP/groups/' create_group_marker_file(dir_folder)