diff --git a/gklearn/experiments/ged/stability/edit_costs.max_num_sols.ratios.bipartite.py b/gklearn/experiments/ged/stability/edit_costs.max_num_sols.ratios.bipartite.py index 8b6bbd3..1c935d9 100644 --- a/gklearn/experiments/ged/stability/edit_costs.max_num_sols.ratios.bipartite.py +++ b/gklearn/experiments/ged/stability/edit_costs.max_num_sols.ratios.bipartite.py @@ -12,10 +12,10 @@ import multiprocessing import pickle import logging from gklearn.ged.util import compute_geds -import numpy as np import time from utils import get_dataset import sys +from group_results import group_trials def xp_compute_ged_matrix(dataset, ds_name, max_num_solutions, ratio, trial): @@ -92,11 +92,12 @@ def save_trials_as_group(dataset, ds_name, max_num_solutions, ratio): ged_mats.append(ged_mat) runtimes.append(runtime) -# save_file_suffix = '.' + ds_name + '.mnum_sols_' + str(max_num_solutions) + '.ratio_' + "{:.2f}".format(ratio) -# with open(save_dir + 'groups/ged_mats' + save_file_suffix + '.npy', 'wb') as f: -# np.save(f, np.array(ged_mats)) -# with open(save_dir + 'groups/runtimes' + save_file_suffix + '.pkl', 'wb') as f: -# pickle.dump(runtime, f) + # Group trials and Remove single files. + name_middle = '.' + ds_name + '.mnum_sols_' + str(max_num_solutions) + '.ratio_' + "{:.2f}".format(ratio) + '.' + name_prefix = 'ged_matrix' + name_middle + group_trials(save_dir, name_prefix, True, True, False) + name_prefix = 'runtime' + name_middle + group_trials(save_dir, name_prefix, True, True, False) def results_for_a_dataset(ds_name): diff --git a/gklearn/experiments/ged/stability/edit_costs.nums_sols.ratios.IPFP.py b/gklearn/experiments/ged/stability/edit_costs.nums_sols.ratios.IPFP.py index 122b4f0..7277e47 100644 --- a/gklearn/experiments/ged/stability/edit_costs.nums_sols.ratios.IPFP.py +++ b/gklearn/experiments/ged/stability/edit_costs.nums_sols.ratios.IPFP.py @@ -12,15 +12,19 @@ import multiprocessing import pickle import logging from gklearn.ged.util import compute_geds -import numpy as np import time from utils import get_dataset import sys +from group_results import group_trials def xp_compute_ged_matrix(dataset, ds_name, num_solutions, ratio, trial): save_file_suffix = '.' + ds_name + '.num_sols_' + str(num_solutions) + '.ratio_' + "{:.2f}".format(ratio) + '.trial_' + str(trial) + + # Return if the file exists. + if os.path.isfile(save_dir + 'ged_matrix' + save_file_suffix + '.pkl'): + return None, None """**2. Set parameters.**""" @@ -39,8 +43,8 @@ def xp_compute_ged_matrix(dataset, ds_name, num_solutions, ratio, trial): } edit_cost_constants = [i * ratio for i in [1, 1, 1]] + [1, 1, 1] -# edit_cost_constants = [item * 0.01 for item in edit_cost_constants] -# pickle.dump(edit_cost_constants, open(save_dir + "edit_costs" + save_file_suffix + ".pkl", "wb")) +# edit_cost_constants = [item * 0.01 for item in edit_cost_constants] +# pickle.dump(edit_cost_constants, open(save_dir + "edit_costs" + save_file_suffix + ".pkl", "wb")) options = ged_options.copy() options['edit_cost_constants'] = edit_cost_constants @@ -75,6 +79,12 @@ def xp_compute_ged_matrix(dataset, ds_name, num_solutions, ratio, trial): def save_trials_as_group(dataset, ds_name, num_solutions, ratio): + # Return if the group file exists. + name_middle = '.' + ds_name + '.num_sols_' + str(num_solutions) + '.ratio_' + "{:.2f}".format(ratio) + '.' + name_group = save_dir + 'groups/ged_mats' + name_middle + 'npy' + if os.path.isfile(name_group): + return + ged_mats = [] runtimes = [] for trial in range(1, 101): @@ -84,24 +94,35 @@ def save_trials_as_group(dataset, ds_name, num_solutions, ratio): ged_mats.append(ged_mat) runtimes.append(runtime) -# save_file_suffix = '.' + ds_name + '.num_sols_' + str(num_solutions) + '.ratio_' + "{:.2f}".format(ratio) -# with open(save_dir + 'groups/ged_mats' + save_file_suffix + '.npy', 'wb') as f: -# np.save(f, np.array(ged_mats)) -# with open(save_dir + 'groups/runtimes' + save_file_suffix + '.pkl', 'wb') as f: -# pickle.dump(runtime, f) + # Group trials and Remove single files. + name_prefix = 'ged_matrix' + name_middle + group_trials(save_dir, name_prefix, True, True, False) + name_prefix = 'runtime' + name_middle + group_trials(save_dir, name_prefix, True, True, False) def results_for_a_dataset(ds_name): """**1. Get dataset.**""" dataset = get_dataset(ds_name) - for num_solutions in [1, 20, 40, 60, 80, 100]: + for num_solutions in num_solutions_list: print() print('# of solutions:', num_solutions) - for ratio in [0.1, 0.3, 0.5, 0.7, 0.9, 1, 3, 5, 7, 9]: + for ratio in ratio_list: print() print('Ratio:', ratio) save_trials_as_group(dataset, ds_name, num_solutions, ratio) + + +def get_param_lists(ds_name): + if ds_name == 'AIDS_symb': + num_solutions_list = [1, 20, 40, 60, 80, 100] + ratio_list = [0.1, 0.3, 0.5, 0.7, 0.9, 1, 3, 5, 7, 9] + else: + num_solutions_list = [1, 20, 40, 60, 80, 100] + ratio_list = [0.1, 0.3, 0.5, 0.7, 0.9, 1, 3, 5, 7, 9] + + return num_solutions_list, ratio_list if __name__ == '__main__': @@ -117,4 +138,5 @@ if __name__ == '__main__': for ds_name in ds_name_list: print() print('Dataset:', ds_name) + num_solutions_list, ratio_list = get_param_lists(ds_name) results_for_a_dataset(ds_name) diff --git a/gklearn/experiments/ged/stability/edit_costs.repeats.ratios.IPFP.py b/gklearn/experiments/ged/stability/edit_costs.repeats.ratios.IPFP.py index c21ec93..e273377 100644 --- a/gklearn/experiments/ged/stability/edit_costs.repeats.ratios.IPFP.py +++ b/gklearn/experiments/ged/stability/edit_costs.repeats.ratios.IPFP.py @@ -12,10 +12,10 @@ import multiprocessing import pickle import logging from gklearn.ged.util import compute_geds -import numpy as np import time from utils import get_dataset import sys +from group_results import group_trials def xp_compute_ged_matrix(dataset, ds_name, repeats, ratio, trial): @@ -87,11 +87,12 @@ def save_trials_as_group(dataset, ds_name, repeats, ratio): ged_mats.append(ged_mat) runtimes.append(runtime) -# save_file_suffix = '.' + ds_name + '.repeats_' + str(repeats) + '.ratio_' + "{:.2f}".format(ratio) -# with open(save_dir + 'groups/ged_mats' + save_file_suffix + '.npy', 'wb') as f: -# np.save(f, np.array(ged_mats)) -# with open(save_dir + 'groups/runtimes' + save_file_suffix + '.pkl', 'wb') as f: -# pickle.dump(runtime, f) + # Group trials and Remove single files. + name_middle = '.' + ds_name + '.repeats_' + str(repeats) + '.ratio_' + "{:.2f}".format(ratio) + '.' + name_prefix = 'ged_matrix' + name_middle + group_trials(save_dir, name_prefix, True, True, False) + name_prefix = 'runtime' + name_middle + group_trials(save_dir, name_prefix, True, True, False) def results_for_a_dataset(ds_name): diff --git a/gklearn/experiments/ged/stability/edit_costs.repeats.ratios.bipartite.py b/gklearn/experiments/ged/stability/edit_costs.repeats.ratios.bipartite.py index aef5b0b..6b76457 100644 --- a/gklearn/experiments/ged/stability/edit_costs.repeats.ratios.bipartite.py +++ b/gklearn/experiments/ged/stability/edit_costs.repeats.ratios.bipartite.py @@ -12,10 +12,10 @@ import multiprocessing import pickle import logging from gklearn.ged.util import compute_geds -import numpy as np import time from utils import get_dataset import sys +from group_results import group_trials def xp_compute_ged_matrix(dataset, ds_name, repeats, ratio, trial): @@ -92,11 +92,12 @@ def save_trials_as_group(dataset, ds_name, repeats, ratio): ged_mats.append(ged_mat) runtimes.append(runtime) -# save_file_suffix = '.' + ds_name + '.repeats_' + str(repeats) + '.ratio_' + "{:.2f}".format(ratio) -# with open(save_dir + 'groups/ged_mats' + save_file_suffix + '.npy', 'wb') as f: -# np.save(f, np.array(ged_mats)) -# with open(save_dir + 'groups/runtimes' + save_file_suffix + '.pkl', 'wb') as f: -# pickle.dump(runtime, f) + # Group trials and Remove single files. + name_middle = '.' + ds_name + '.repeats_' + str(repeats) + '.ratio_' + "{:.2f}".format(ratio) + '.' + name_prefix = 'ged_matrix' + name_middle + group_trials(save_dir, name_prefix, True, True, False) + name_prefix = 'runtime' + name_middle + group_trials(save_dir, name_prefix, True, True, False) def results_for_a_dataset(ds_name): diff --git a/gklearn/experiments/ged/stability/group_results.py b/gklearn/experiments/ged/stability/group_results.py index 48ea68d..e1f999e 100644 --- a/gklearn/experiments/ged/stability/group_results.py +++ b/gklearn/experiments/ged/stability/group_results.py @@ -16,6 +16,7 @@ from tqdm import tqdm import sys +# This function is used by other scripts. Modify it carefully. def group_trials(dir_folder, name_prefix, override, clear, backup): # Get group name. @@ -47,8 +48,20 @@ def group_trials(dir_folder, name_prefix, override, clear, backup): file_name = dir_folder + name_prefix + 'trial_' + str(trial) + '.pkl' if os.path.isfile(file_name): with open(file_name, 'rb') as f: - data = pickle.load(f) + try: + data = pickle.load(f) + except EOFError: + print('EOF Error occurred.') + return data_group.append(data) + +# unpickler = pickle.Unpickler(f) +# data = unpickler.load() +# if not isinstance(data, np.array): +# return +# else: +# data_group.append(data) + else: # Not all trials are completed. return @@ -81,11 +94,9 @@ def group_trials(dir_folder, name_prefix, override, clear, backup): def group_all_in_folder(dir_folder, override=False, clear=True, backup=True): # Create folders. - if not os.path.exists(dir_folder + 'groups/'): - os.makedirs(dir_folder + 'groups/') + os.makedirs(dir_folder + 'groups/', exist_ok=True) if backup: - if not os.path.exists(dir_folder + 'backups'): - os.makedirs(dir_folder + 'backups') + os.makedirs(dir_folder + 'backups', exist_ok=True) # Iterate all files. cur_file_prefix = '' @@ -105,4 +116,10 @@ if __name__ == '__main__': group_all_in_folder(dir_folder) dir_folder = 'outputs/CRIANN/edit_costs.repeats.ratios.IPFP/' + group_all_in_folder(dir_folder) + + dir_folder = 'outputs/CRIANN/edit_costs.max_num_sols.ratios.bipartite/' + group_all_in_folder(dir_folder) + + dir_folder = 'outputs/CRIANN/edit_costs.repeats.ratios.bipartite/' group_all_in_folder(dir_folder) \ No newline at end of file diff --git a/gklearn/experiments/ged/stability/run_job_edit_costs.nums_sols.ratios.IPFP.py b/gklearn/experiments/ged/stability/run_job_edit_costs.nums_sols.ratios.IPFP.py new file mode 100644 index 0000000..6939a06 --- /dev/null +++ b/gklearn/experiments/ged/stability/run_job_edit_costs.nums_sols.ratios.IPFP.py @@ -0,0 +1,47 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +""" +Created on Tue Nov 3 20:23:25 2020 + +@author: ljia +""" +import os +import re + + +def get_job_script(arg): + script = r""" +#!/bin/bash + +#SBATCH --exclusive +#SBATCH --job-name="st.""" + arg + r""".IPFP" +#SBATCH --partition=tlong +#SBATCH --mail-type=ALL +#SBATCH --mail-user=jajupmochi@gmail.com +#SBATCH --output="outputs/output_edit_costs.nums_sols.ratios.IPFP.""" + arg + """.txt" +#SBATCH --error="errors/error_edit_costs.nums_sols.ratios.IPFP.""" + arg + """.txt" +# +#SBATCH --ntasks=1 +#SBATCH --nodes=1 +#SBATCH --cpus-per-task=1 +#SBATCH --time=300:00:00 +#SBATCH --mem-per-cpu=4000 + +srun hostname +srun cd /home/2019015/ljia02/graphkit-learn/gklearn/experiments/ged/stability +srun python3 edit_costs.nums_sols.ratios.IPFP.py """ + arg + script = script.strip() + script = re.sub('\n\t+', '\n', script) + script = re.sub('\n +', '\n', script) + + return script + +if __name__ == '__main__': + ds_list = ['MAO', 'Monoterpenoides', 'MUTAG', 'AIDS_symb'] + for ds_name in [ds_list[i] for i in [0, 3]]: + job_script = get_job_script(ds_name) + command = 'sbatch <