@@ -12,10 +12,10 @@ import multiprocessing | |||
import pickle | |||
import logging | |||
from gklearn.ged.util import compute_geds | |||
import numpy as np | |||
import time | |||
from utils import get_dataset | |||
import sys | |||
from group_results import group_trials | |||
def xp_compute_ged_matrix(dataset, ds_name, max_num_solutions, ratio, trial): | |||
@@ -92,11 +92,12 @@ def save_trials_as_group(dataset, ds_name, max_num_solutions, ratio): | |||
ged_mats.append(ged_mat) | |||
runtimes.append(runtime) | |||
# save_file_suffix = '.' + ds_name + '.mnum_sols_' + str(max_num_solutions) + '.ratio_' + "{:.2f}".format(ratio) | |||
# with open(save_dir + 'groups/ged_mats' + save_file_suffix + '.npy', 'wb') as f: | |||
# np.save(f, np.array(ged_mats)) | |||
# with open(save_dir + 'groups/runtimes' + save_file_suffix + '.pkl', 'wb') as f: | |||
# pickle.dump(runtime, f) | |||
# Group trials and Remove single files. | |||
name_middle = '.' + ds_name + '.mnum_sols_' + str(max_num_solutions) + '.ratio_' + "{:.2f}".format(ratio) + '.' | |||
name_prefix = 'ged_matrix' + name_middle | |||
group_trials(save_dir, name_prefix, True, True, False) | |||
name_prefix = 'runtime' + name_middle | |||
group_trials(save_dir, name_prefix, True, True, False) | |||
def results_for_a_dataset(ds_name): | |||
@@ -12,15 +12,19 @@ import multiprocessing | |||
import pickle | |||
import logging | |||
from gklearn.ged.util import compute_geds | |||
import numpy as np | |||
import time | |||
from utils import get_dataset | |||
import sys | |||
from group_results import group_trials | |||
def xp_compute_ged_matrix(dataset, ds_name, num_solutions, ratio, trial): | |||
save_file_suffix = '.' + ds_name + '.num_sols_' + str(num_solutions) + '.ratio_' + "{:.2f}".format(ratio) + '.trial_' + str(trial) | |||
# Return if the file exists. | |||
if os.path.isfile(save_dir + 'ged_matrix' + save_file_suffix + '.pkl'): | |||
return None, None | |||
"""**2. Set parameters.**""" | |||
@@ -39,8 +43,8 @@ def xp_compute_ged_matrix(dataset, ds_name, num_solutions, ratio, trial): | |||
} | |||
edit_cost_constants = [i * ratio for i in [1, 1, 1]] + [1, 1, 1] | |||
# edit_cost_constants = [item * 0.01 for item in edit_cost_constants] | |||
# pickle.dump(edit_cost_constants, open(save_dir + "edit_costs" + save_file_suffix + ".pkl", "wb")) | |||
# edit_cost_constants = [item * 0.01 for item in edit_cost_constants] | |||
# pickle.dump(edit_cost_constants, open(save_dir + "edit_costs" + save_file_suffix + ".pkl", "wb")) | |||
options = ged_options.copy() | |||
options['edit_cost_constants'] = edit_cost_constants | |||
@@ -75,6 +79,12 @@ def xp_compute_ged_matrix(dataset, ds_name, num_solutions, ratio, trial): | |||
def save_trials_as_group(dataset, ds_name, num_solutions, ratio): | |||
# Return if the group file exists. | |||
name_middle = '.' + ds_name + '.num_sols_' + str(num_solutions) + '.ratio_' + "{:.2f}".format(ratio) + '.' | |||
name_group = save_dir + 'groups/ged_mats' + name_middle + 'npy' | |||
if os.path.isfile(name_group): | |||
return | |||
ged_mats = [] | |||
runtimes = [] | |||
for trial in range(1, 101): | |||
@@ -84,24 +94,35 @@ def save_trials_as_group(dataset, ds_name, num_solutions, ratio): | |||
ged_mats.append(ged_mat) | |||
runtimes.append(runtime) | |||
# save_file_suffix = '.' + ds_name + '.num_sols_' + str(num_solutions) + '.ratio_' + "{:.2f}".format(ratio) | |||
# with open(save_dir + 'groups/ged_mats' + save_file_suffix + '.npy', 'wb') as f: | |||
# np.save(f, np.array(ged_mats)) | |||
# with open(save_dir + 'groups/runtimes' + save_file_suffix + '.pkl', 'wb') as f: | |||
# pickle.dump(runtime, f) | |||
# Group trials and Remove single files. | |||
name_prefix = 'ged_matrix' + name_middle | |||
group_trials(save_dir, name_prefix, True, True, False) | |||
name_prefix = 'runtime' + name_middle | |||
group_trials(save_dir, name_prefix, True, True, False) | |||
def results_for_a_dataset(ds_name): | |||
"""**1. Get dataset.**""" | |||
dataset = get_dataset(ds_name) | |||
for num_solutions in [1, 20, 40, 60, 80, 100]: | |||
for num_solutions in num_solutions_list: | |||
print() | |||
print('# of solutions:', num_solutions) | |||
for ratio in [0.1, 0.3, 0.5, 0.7, 0.9, 1, 3, 5, 7, 9]: | |||
for ratio in ratio_list: | |||
print() | |||
print('Ratio:', ratio) | |||
save_trials_as_group(dataset, ds_name, num_solutions, ratio) | |||
def get_param_lists(ds_name): | |||
if ds_name == 'AIDS_symb': | |||
num_solutions_list = [1, 20, 40, 60, 80, 100] | |||
ratio_list = [0.1, 0.3, 0.5, 0.7, 0.9, 1, 3, 5, 7, 9] | |||
else: | |||
num_solutions_list = [1, 20, 40, 60, 80, 100] | |||
ratio_list = [0.1, 0.3, 0.5, 0.7, 0.9, 1, 3, 5, 7, 9] | |||
return num_solutions_list, ratio_list | |||
if __name__ == '__main__': | |||
@@ -117,4 +138,5 @@ if __name__ == '__main__': | |||
for ds_name in ds_name_list: | |||
print() | |||
print('Dataset:', ds_name) | |||
num_solutions_list, ratio_list = get_param_lists(ds_name) | |||
results_for_a_dataset(ds_name) |
@@ -12,10 +12,10 @@ import multiprocessing | |||
import pickle | |||
import logging | |||
from gklearn.ged.util import compute_geds | |||
import numpy as np | |||
import time | |||
from utils import get_dataset | |||
import sys | |||
from group_results import group_trials | |||
def xp_compute_ged_matrix(dataset, ds_name, repeats, ratio, trial): | |||
@@ -87,11 +87,12 @@ def save_trials_as_group(dataset, ds_name, repeats, ratio): | |||
ged_mats.append(ged_mat) | |||
runtimes.append(runtime) | |||
# save_file_suffix = '.' + ds_name + '.repeats_' + str(repeats) + '.ratio_' + "{:.2f}".format(ratio) | |||
# with open(save_dir + 'groups/ged_mats' + save_file_suffix + '.npy', 'wb') as f: | |||
# np.save(f, np.array(ged_mats)) | |||
# with open(save_dir + 'groups/runtimes' + save_file_suffix + '.pkl', 'wb') as f: | |||
# pickle.dump(runtime, f) | |||
# Group trials and Remove single files. | |||
name_middle = '.' + ds_name + '.repeats_' + str(repeats) + '.ratio_' + "{:.2f}".format(ratio) + '.' | |||
name_prefix = 'ged_matrix' + name_middle | |||
group_trials(save_dir, name_prefix, True, True, False) | |||
name_prefix = 'runtime' + name_middle | |||
group_trials(save_dir, name_prefix, True, True, False) | |||
def results_for_a_dataset(ds_name): | |||
@@ -12,10 +12,10 @@ import multiprocessing | |||
import pickle | |||
import logging | |||
from gklearn.ged.util import compute_geds | |||
import numpy as np | |||
import time | |||
from utils import get_dataset | |||
import sys | |||
from group_results import group_trials | |||
def xp_compute_ged_matrix(dataset, ds_name, repeats, ratio, trial): | |||
@@ -92,11 +92,12 @@ def save_trials_as_group(dataset, ds_name, repeats, ratio): | |||
ged_mats.append(ged_mat) | |||
runtimes.append(runtime) | |||
# save_file_suffix = '.' + ds_name + '.repeats_' + str(repeats) + '.ratio_' + "{:.2f}".format(ratio) | |||
# with open(save_dir + 'groups/ged_mats' + save_file_suffix + '.npy', 'wb') as f: | |||
# np.save(f, np.array(ged_mats)) | |||
# with open(save_dir + 'groups/runtimes' + save_file_suffix + '.pkl', 'wb') as f: | |||
# pickle.dump(runtime, f) | |||
# Group trials and Remove single files. | |||
name_middle = '.' + ds_name + '.repeats_' + str(repeats) + '.ratio_' + "{:.2f}".format(ratio) + '.' | |||
name_prefix = 'ged_matrix' + name_middle | |||
group_trials(save_dir, name_prefix, True, True, False) | |||
name_prefix = 'runtime' + name_middle | |||
group_trials(save_dir, name_prefix, True, True, False) | |||
def results_for_a_dataset(ds_name): | |||
@@ -16,6 +16,7 @@ from tqdm import tqdm | |||
import sys | |||
# This function is used by other scripts. Modify it carefully. | |||
def group_trials(dir_folder, name_prefix, override, clear, backup): | |||
# Get group name. | |||
@@ -47,8 +48,20 @@ def group_trials(dir_folder, name_prefix, override, clear, backup): | |||
file_name = dir_folder + name_prefix + 'trial_' + str(trial) + '.pkl' | |||
if os.path.isfile(file_name): | |||
with open(file_name, 'rb') as f: | |||
data = pickle.load(f) | |||
try: | |||
data = pickle.load(f) | |||
except EOFError: | |||
print('EOF Error occurred.') | |||
return | |||
data_group.append(data) | |||
# unpickler = pickle.Unpickler(f) | |||
# data = unpickler.load() | |||
# if not isinstance(data, np.array): | |||
# return | |||
# else: | |||
# data_group.append(data) | |||
else: # Not all trials are completed. | |||
return | |||
@@ -81,11 +94,9 @@ def group_trials(dir_folder, name_prefix, override, clear, backup): | |||
def group_all_in_folder(dir_folder, override=False, clear=True, backup=True): | |||
# Create folders. | |||
if not os.path.exists(dir_folder + 'groups/'): | |||
os.makedirs(dir_folder + 'groups/') | |||
os.makedirs(dir_folder + 'groups/', exist_ok=True) | |||
if backup: | |||
if not os.path.exists(dir_folder + 'backups'): | |||
os.makedirs(dir_folder + 'backups') | |||
os.makedirs(dir_folder + 'backups', exist_ok=True) | |||
# Iterate all files. | |||
cur_file_prefix = '' | |||
@@ -105,4 +116,10 @@ if __name__ == '__main__': | |||
group_all_in_folder(dir_folder) | |||
dir_folder = 'outputs/CRIANN/edit_costs.repeats.ratios.IPFP/' | |||
group_all_in_folder(dir_folder) | |||
dir_folder = 'outputs/CRIANN/edit_costs.max_num_sols.ratios.bipartite/' | |||
group_all_in_folder(dir_folder) | |||
dir_folder = 'outputs/CRIANN/edit_costs.repeats.ratios.bipartite/' | |||
group_all_in_folder(dir_folder) |
@@ -0,0 +1,47 @@ | |||
#!/usr/bin/env python3 | |||
# -*- coding: utf-8 -*- | |||
""" | |||
Created on Tue Nov 3 20:23:25 2020 | |||
@author: ljia | |||
""" | |||
import os | |||
import re | |||
def get_job_script(arg): | |||
script = r""" | |||
#!/bin/bash | |||
#SBATCH --exclusive | |||
#SBATCH --job-name="st.""" + arg + r""".IPFP" | |||
#SBATCH --partition=tlong | |||
#SBATCH --mail-type=ALL | |||
#SBATCH --mail-user=jajupmochi@gmail.com | |||
#SBATCH --output="outputs/output_edit_costs.nums_sols.ratios.IPFP.""" + arg + """.txt" | |||
#SBATCH --error="errors/error_edit_costs.nums_sols.ratios.IPFP.""" + arg + """.txt" | |||
# | |||
#SBATCH --ntasks=1 | |||
#SBATCH --nodes=1 | |||
#SBATCH --cpus-per-task=1 | |||
#SBATCH --time=300:00:00 | |||
#SBATCH --mem-per-cpu=4000 | |||
srun hostname | |||
srun cd /home/2019015/ljia02/graphkit-learn/gklearn/experiments/ged/stability | |||
srun python3 edit_costs.nums_sols.ratios.IPFP.py """ + arg | |||
script = script.strip() | |||
script = re.sub('\n\t+', '\n', script) | |||
script = re.sub('\n +', '\n', script) | |||
return script | |||
if __name__ == '__main__': | |||
ds_list = ['MAO', 'Monoterpenoides', 'MUTAG', 'AIDS_symb'] | |||
for ds_name in [ds_list[i] for i in [0, 3]]: | |||
job_script = get_job_script(ds_name) | |||
command = 'sbatch <<EOF\n' + job_script + '\nEOF' | |||
# print(command) | |||
os.system(command) | |||
# os.popen(command) | |||
# output = stream.readlines() |