diff --git a/gklearn/experiments/ged/stability/edit_costs.max_num_sols.N.bipartite.py b/gklearn/experiments/ged/stability/edit_costs.max_num_sols.N.bipartite.py new file mode 100644 index 0000000..fd9e49e --- /dev/null +++ b/gklearn/experiments/ged/stability/edit_costs.max_num_sols.N.bipartite.py @@ -0,0 +1,142 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +""" +Created on Wed Oct 20 11:48:02 2020 + +@author: ljia +""" +# This script tests the influence of the ratios between node costs and edge costs on the stability of the GED computation, where the base edit costs are [1, 1, 1, 1, 1, 1]. + +import os +import multiprocessing +import pickle +import logging +from gklearn.ged.util import compute_geds +import time +import sys +from group_results import group_trials + + +def generate_graphs(): + from gklearn.utils.graph_synthesizer import GraphSynthesizer + gsyzer = GraphSynthesizer() + graphs = gsyzer.unified_graphs(num_graphs=100, num_nodes=20, num_edges=20, num_node_labels=0, num_edge_labels=0, seed=None, directed=False) + return graphs + + +def xp_compute_ged_matrix(graphs, N, max_num_solutions, ratio, trial): + + save_file_suffix = '.' + str(N) + '.mnum_sols_' + str(max_num_solutions) + '.ratio_' + "{:.2f}".format(ratio) + '.trial_' + str(trial) + + # Return if the file exists. + if os.path.isfile(save_dir + 'ged_matrix' + save_file_suffix + '.pkl'): + return None, None + + """**2. Set parameters.**""" + + # Parameters for GED computation. + ged_options = {'method': 'BIPARTITE', # use BIPARTITE huristic. + # 'initialization_method': 'RANDOM', # or 'NODE', etc. (for GEDEnv) + 'lsape_model': 'ECBP', # + # ??when bigger than 1, then the method is considered mIPFP. + # the actual number of computed solutions might be smaller than the specified value + 'max_num_solutions': max_num_solutions, + 'edit_cost': 'CONSTANT', # use CONSTANT cost. + 'greedy_method': 'BASIC', # + # the distance between non-symbolic node/edge labels is computed by euclidean distance. + 'attr_distance': 'euclidean', + 'optimal': True, # if TRUE, the option --greedy-method has no effect + # parallel threads. Do not work if mpg_options['parallel'] = False. + 'threads': multiprocessing.cpu_count(), + 'centrality_method': 'NONE', + 'centrality_weight': 0.7, + 'init_option': 'EAGER_WITHOUT_SHUFFLED_COPIES' + } + + edit_cost_constants = [i * ratio for i in [1, 1, 1]] + [1, 1, 1] +# edit_cost_constants = [item * 0.01 for item in edit_cost_constants] +# pickle.dump(edit_cost_constants, open(save_dir + "edit_costs" + save_file_suffix + ".pkl", "wb")) + + options = ged_options.copy() + options['edit_cost_constants'] = edit_cost_constants + options['node_labels'] = [] + options['edge_labels'] = [] + options['node_attrs'] = [] + options['edge_attrs'] = [] + parallel = True # if num_solutions == 1 else False + + """**5. Compute GED matrix.**""" + ged_mat = 'error' + runtime = 0 + try: + time0 = time.time() + ged_vec_init, ged_mat, n_edit_operations = compute_geds(graphs, options=options, repeats=1, parallel=parallel, verbose=True) + runtime = time.time() - time0 + except Exception as exp: + print('An exception occured when running this experiment:') + LOG_FILENAME = save_dir + 'error.txt' + logging.basicConfig(filename=LOG_FILENAME, level=logging.DEBUG) + logging.exception(save_file_suffix) + print(repr(exp)) + + """**6. Get results.**""" + + with open(save_dir + 'ged_matrix' + save_file_suffix + '.pkl', 'wb') as f: + pickle.dump(ged_mat, f) + with open(save_dir + 'runtime' + save_file_suffix + '.pkl', 'wb') as f: + pickle.dump(runtime, f) + + return ged_mat, runtime + + +def save_trials_as_group(graphs, N, max_num_solutions, ratio): + # Return if the group file exists. + name_middle = '.' + str(N) + '.mnum_sols_' + str(max_num_solutions) + '.ratio_' + "{:.2f}".format(ratio) + '.' + name_group = save_dir + 'groups/ged_mats' + name_middle + 'npy' + if os.path.isfile(name_group): + return + + ged_mats = [] + runtimes = [] + for trial in range(1, 101): + print() + print('Trial:', trial) + ged_mat, runtime = xp_compute_ged_matrix(graphs, N, max_num_solutions, ratio, trial) + ged_mats.append(ged_mat) + runtimes.append(runtime) + + # Group trials and Remove single files. + name_prefix = 'ged_matrix' + name_middle + group_trials(save_dir, name_prefix, True, True, False) + name_prefix = 'runtime' + name_middle + group_trials(save_dir, name_prefix, True, True, False) + + +def results_for_a_ratio(ratio): + + for N in N_list: + print() + print('# of graphs:', N) + for max_num_solutions in [1, 20, 40, 60, 80, 100]: + print() + print('Max # of solutions:', max_num_solutions) + save_trials_as_group(graphs[:N], N, max_num_solutions, ratio) + + +if __name__ == '__main__': + if len(sys.argv) > 1: + N_list = [int(i) for i in sys.argv[1:]] + else: + N_list = [10, 50, 100] + + # Generate graphs. + graphs = generate_graphs() + + save_dir = 'outputs/edit_costs.max_num_sols.N.bipartite/' + os.makedirs(save_dir, exist_ok=True) + os.makedirs(save_dir + 'groups/', exist_ok=True) + + for ratio in [10, 1, 0.1]: + print() + print('Ratio:', ratio) + results_for_a_ratio(ratio) diff --git a/gklearn/experiments/ged/stability/edit_costs.max_num_sols.ratios.bipartite.py b/gklearn/experiments/ged/stability/edit_costs.max_num_sols.ratios.bipartite.py index 1c935d9..1f01fd5 100644 --- a/gklearn/experiments/ged/stability/edit_costs.max_num_sols.ratios.bipartite.py +++ b/gklearn/experiments/ged/stability/edit_costs.max_num_sols.ratios.bipartite.py @@ -22,8 +22,9 @@ def xp_compute_ged_matrix(dataset, ds_name, max_num_solutions, ratio, trial): save_file_suffix = '.' + ds_name + '.mnum_sols_' + str(max_num_solutions) + '.ratio_' + "{:.2f}".format(ratio) + '.trial_' + str(trial) - """**1. Get dataset.**""" - dataset = get_dataset(ds_name) + # Return if the file exists. + if os.path.isfile(save_dir + 'ged_matrix' + save_file_suffix + '.pkl'): + return None, None """**2. Set parameters.**""" @@ -83,6 +84,12 @@ def xp_compute_ged_matrix(dataset, ds_name, max_num_solutions, ratio, trial): def save_trials_as_group(dataset, ds_name, max_num_solutions, ratio): + # Return if the group file exists. + name_middle = '.' + ds_name + '.mnum_sols_' + str(max_num_solutions) + '.ratio_' + "{:.2f}".format(ratio) + '.' + name_group = save_dir + 'groups/ged_mats' + name_middle + 'npy' + if os.path.isfile(name_group): + return + ged_mats = [] runtimes = [] for trial in range(1, 101): @@ -93,25 +100,35 @@ def save_trials_as_group(dataset, ds_name, max_num_solutions, ratio): runtimes.append(runtime) # Group trials and Remove single files. - name_middle = '.' + ds_name + '.mnum_sols_' + str(max_num_solutions) + '.ratio_' + "{:.2f}".format(ratio) + '.' name_prefix = 'ged_matrix' + name_middle group_trials(save_dir, name_prefix, True, True, False) name_prefix = 'runtime' + name_middle group_trials(save_dir, name_prefix, True, True, False) - - + + def results_for_a_dataset(ds_name): """**1. Get dataset.**""" dataset = get_dataset(ds_name) - for max_num_solutions in [1, 20, 40, 60, 80, 100]: + for max_num_solutions in mnum_solutions_list: print() print('Max # of solutions:', max_num_solutions) - for ratio in [0.1, 0.3, 0.5, 0.7, 0.9, 1, 3, 5, 7, 9]: + for ratio in ratio_list: print() print('Ratio:', ratio) save_trials_as_group(dataset, ds_name, max_num_solutions, ratio) + + +def get_param_lists(ds_name): + if ds_name == 'AIDS_symb': + mnum_solutions_list = [1, 20, 40, 60, 80, 100] + ratio_list = [0.1, 0.3, 0.5, 0.7, 0.9, 1, 3, 5, 7, 9] + else: + mnum_solutions_list = [1, 20, 40, 60, 80, 100] + ratio_list = [0.1, 0.3, 0.5, 0.7, 0.9, 1, 3, 5, 7, 9] + return mnum_solutions_list, ratio_list + if __name__ == '__main__': if len(sys.argv) > 1: @@ -126,4 +143,5 @@ if __name__ == '__main__': for ds_name in ds_name_list: print() print('Dataset:', ds_name) + mnum_solutions_list, ratio_list = get_param_lists(ds_name) results_for_a_dataset(ds_name) \ No newline at end of file diff --git a/gklearn/experiments/ged/stability/edit_costs.nums_sols.N.IPFP.py b/gklearn/experiments/ged/stability/edit_costs.nums_sols.N.IPFP.py new file mode 100644 index 0000000..d65358a --- /dev/null +++ b/gklearn/experiments/ged/stability/edit_costs.nums_sols.N.IPFP.py @@ -0,0 +1,137 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +""" +Created on Wed Oct 20 11:48:02 2020 + +@author: ljia +""" +# This script tests the influence of the ratios between node costs and edge costs on the stability of the GED computation, where the base edit costs are [1, 1, 1, 1, 1, 1]. + +import os +import multiprocessing +import pickle +import logging +from gklearn.ged.util import compute_geds +import time +import sys +from group_results import group_trials + + +def generate_graphs(): + from gklearn.utils.graph_synthesizer import GraphSynthesizer + gsyzer = GraphSynthesizer() + graphs = gsyzer.unified_graphs(num_graphs=100, num_nodes=20, num_edges=20, num_node_labels=0, num_edge_labels=0, seed=None, directed=False) + return graphs + + +def xp_compute_ged_matrix(graphs, N, num_solutions, ratio, trial): + + save_file_suffix = '.' + str(N) + '.num_sols_' + str(num_solutions) + '.ratio_' + "{:.2f}".format(ratio) + '.trial_' + str(trial) + + # Return if the file exists. + if os.path.isfile(save_dir + 'ged_matrix' + save_file_suffix + '.pkl'): + return None, None + + """**2. Set parameters.**""" + + # Parameters for GED computation. + ged_options = {'method': 'IPFP', # use IPFP huristic. + 'initialization_method': 'RANDOM', # or 'NODE', etc. + # when bigger than 1, then the method is considered mIPFP. + 'initial_solutions': int(num_solutions * 4), + 'edit_cost': 'CONSTANT', # use CONSTANT cost. + # the distance between non-symbolic node/edge labels is computed by euclidean distance. + 'attr_distance': 'euclidean', + 'ratio_runs_from_initial_solutions': 0.25, + # parallel threads. Do not work if mpg_options['parallel'] = False. + 'threads': multiprocessing.cpu_count(), + 'init_option': 'EAGER_WITHOUT_SHUFFLED_COPIES' + } + + edit_cost_constants = [i * ratio for i in [1, 1, 1]] + [1, 1, 1] +# edit_cost_constants = [item * 0.01 for item in edit_cost_constants] +# pickle.dump(edit_cost_constants, open(save_dir + "edit_costs" + save_file_suffix + ".pkl", "wb")) + + options = ged_options.copy() + options['edit_cost_constants'] = edit_cost_constants + options['node_labels'] = [] + options['edge_labels'] = [] + options['node_attrs'] = [] + options['edge_attrs'] = [] + parallel = True # if num_solutions == 1 else False + + """**5. Compute GED matrix.**""" + ged_mat = 'error' + runtime = 0 + try: + time0 = time.time() + ged_vec_init, ged_mat, n_edit_operations = compute_geds(graphs, options=options, repeats=1, parallel=parallel, verbose=True) + runtime = time.time() - time0 + except Exception as exp: + print('An exception occured when running this experiment:') + LOG_FILENAME = save_dir + 'error.txt' + logging.basicConfig(filename=LOG_FILENAME, level=logging.DEBUG) + logging.exception(save_file_suffix) + print(repr(exp)) + + """**6. Get results.**""" + + with open(save_dir + 'ged_matrix' + save_file_suffix + '.pkl', 'wb') as f: + pickle.dump(ged_mat, f) + with open(save_dir + 'runtime' + save_file_suffix + '.pkl', 'wb') as f: + pickle.dump(runtime, f) + + return ged_mat, runtime + + +def save_trials_as_group(graphs, N, num_solutions, ratio): + # Return if the group file exists. + name_middle = '.' + str(N) + '.num_sols_' + str(num_solutions) + '.ratio_' + "{:.2f}".format(ratio) + '.' + name_group = save_dir + 'groups/ged_mats' + name_middle + 'npy' + if os.path.isfile(name_group): + return + + ged_mats = [] + runtimes = [] + for trial in range(1, 101): + print() + print('Trial:', trial) + ged_mat, runtime = xp_compute_ged_matrix(graphs, N, num_solutions, ratio, trial) + ged_mats.append(ged_mat) + runtimes.append(runtime) + + # Group trials and Remove single files. + name_prefix = 'ged_matrix' + name_middle + group_trials(save_dir, name_prefix, True, True, False) + name_prefix = 'runtime' + name_middle + group_trials(save_dir, name_prefix, True, True, False) + + +def results_for_a_ratio(ratio): + + for N in N_list: + print() + print('# of graphs:', N) + for num_solutions in [1, 20, 40, 60, 80, 100]: + print() + print('# of solutions:', num_solutions) + save_trials_as_group(graphs[:N], N, num_solutions, ratio) + + +if __name__ == '__main__': + if len(sys.argv) > 1: + N_list = [int(i) for i in sys.argv[1:]] + else: + N_list = [10, 50, 100] + + # Generate graphs. + graphs = generate_graphs() + + save_dir = 'outputs/edit_costs.num_sols.N.IPFP/' + os.makedirs(save_dir, exist_ok=True) + os.makedirs(save_dir + 'groups/', exist_ok=True) + + for ratio in [10, 1, 0.1]: + print() + print('Ratio:', ratio) + results_for_a_ratio(ratio) diff --git a/gklearn/experiments/ged/stability/edit_costs.nums_sols.ratios.IPFP.py b/gklearn/experiments/ged/stability/edit_costs.nums_sols.ratios.IPFP.py index 7277e47..710213a 100644 --- a/gklearn/experiments/ged/stability/edit_costs.nums_sols.ratios.IPFP.py +++ b/gklearn/experiments/ged/stability/edit_costs.nums_sols.ratios.IPFP.py @@ -59,7 +59,7 @@ def xp_compute_ged_matrix(dataset, ds_name, num_solutions, ratio, trial): runtime = 0 try: time0 = time.time() - ged_vec_init, ged_mat, n_edit_operations = compute_geds(dataset.graphs, options=options, parallel=parallel, verbose=True) + ged_vec_init, ged_mat, n_edit_operations = compute_geds(dataset.graphs, options=options, repeats=1, parallel=parallel, verbose=True) runtime = time.time() - time0 except Exception as exp: print('An exception occured when running this experiment:') @@ -74,9 +74,9 @@ def xp_compute_ged_matrix(dataset, ds_name, num_solutions, ratio, trial): pickle.dump(ged_mat, f) with open(save_dir + 'runtime' + save_file_suffix + '.pkl', 'wb') as f: pickle.dump(runtime, f) - + return ged_mat, runtime - + def save_trials_as_group(dataset, ds_name, num_solutions, ratio): # Return if the group file exists. @@ -99,8 +99,8 @@ def save_trials_as_group(dataset, ds_name, num_solutions, ratio): group_trials(save_dir, name_prefix, True, True, False) name_prefix = 'runtime' + name_middle group_trials(save_dir, name_prefix, True, True, False) - - + + def results_for_a_dataset(ds_name): """**1. Get dataset.**""" dataset = get_dataset(ds_name) diff --git a/gklearn/experiments/ged/stability/edit_costs.repeats.N.IPFP.py b/gklearn/experiments/ged/stability/edit_costs.repeats.N.IPFP.py new file mode 100644 index 0000000..6f6215e --- /dev/null +++ b/gklearn/experiments/ged/stability/edit_costs.repeats.N.IPFP.py @@ -0,0 +1,137 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +""" +Created on Wed Oct 20 11:48:02 2020 + +@author: ljia +""" +# This script tests the influence of the ratios between node costs and edge costs on the stability of the GED computation, where the base edit costs are [1, 1, 1, 1, 1, 1]. + +import os +import multiprocessing +import pickle +import logging +from gklearn.ged.util import compute_geds +import time +import sys +from group_results import group_trials + + +def generate_graphs(): + from gklearn.utils.graph_synthesizer import GraphSynthesizer + gsyzer = GraphSynthesizer() + graphs = gsyzer.unified_graphs(num_graphs=100, num_nodes=20, num_edges=20, num_node_labels=0, num_edge_labels=0, seed=None, directed=False) + return graphs + + +def xp_compute_ged_matrix(graphs, N, repeats, ratio, trial): + + save_file_suffix = '.' + str(N) + '.repeats_' + str(repeats) + '.ratio_' + "{:.2f}".format(ratio) + '.trial_' + str(trial) + + # Return if the file exists. + if os.path.isfile(save_dir + 'ged_matrix' + save_file_suffix + '.pkl'): + return None, None + + """**2. Set parameters.**""" + + # Parameters for GED computation. + ged_options = {'method': 'IPFP', # use IPFP huristic. + 'initialization_method': 'RANDOM', # or 'NODE', etc. + # when bigger than 1, then the method is considered mIPFP. + 'initial_solutions': 1, + 'edit_cost': 'CONSTANT', # use CONSTANT cost. + # the distance between non-symbolic node/edge labels is computed by euclidean distance. + 'attr_distance': 'euclidean', + 'ratio_runs_from_initial_solutions': 1, + # parallel threads. Do not work if mpg_options['parallel'] = False. + 'threads': multiprocessing.cpu_count(), + 'init_option': 'EAGER_WITHOUT_SHUFFLED_COPIES' + } + + edit_cost_constants = [i * ratio for i in [1, 1, 1]] + [1, 1, 1] +# edit_cost_constants = [item * 0.01 for item in edit_cost_constants] +# pickle.dump(edit_cost_constants, open(save_dir + "edit_costs" + save_file_suffix + ".pkl", "wb")) + + options = ged_options.copy() + options['edit_cost_constants'] = edit_cost_constants + options['node_labels'] = [] + options['edge_labels'] = [] + options['node_attrs'] = [] + options['edge_attrs'] = [] + parallel = True # if num_solutions == 1 else False + + """**5. Compute GED matrix.**""" + ged_mat = 'error' + runtime = 0 + try: + time0 = time.time() + ged_vec_init, ged_mat, n_edit_operations = compute_geds(graphs, options=options, repeats=repeats, parallel=parallel, verbose=True) + runtime = time.time() - time0 + except Exception as exp: + print('An exception occured when running this experiment:') + LOG_FILENAME = save_dir + 'error.txt' + logging.basicConfig(filename=LOG_FILENAME, level=logging.DEBUG) + logging.exception(save_file_suffix) + print(repr(exp)) + + """**6. Get results.**""" + + with open(save_dir + 'ged_matrix' + save_file_suffix + '.pkl', 'wb') as f: + pickle.dump(ged_mat, f) + with open(save_dir + 'runtime' + save_file_suffix + '.pkl', 'wb') as f: + pickle.dump(runtime, f) + + return ged_mat, runtime + + +def save_trials_as_group(graphs, N, repeats, ratio): + # Return if the group file exists. + name_middle = '.' + str(N) + '.repeats_' + str(repeats) + '.ratio_' + "{:.2f}".format(ratio) + '.' + name_group = save_dir + 'groups/ged_mats' + name_middle + 'npy' + if os.path.isfile(name_group): + return + + ged_mats = [] + runtimes = [] + for trial in range(1, 101): + print() + print('Trial:', trial) + ged_mat, runtime = xp_compute_ged_matrix(graphs, N, repeats, ratio, trial) + ged_mats.append(ged_mat) + runtimes.append(runtime) + + # Group trials and Remove single files. + name_prefix = 'ged_matrix' + name_middle + group_trials(save_dir, name_prefix, True, True, False) + name_prefix = 'runtime' + name_middle + group_trials(save_dir, name_prefix, True, True, False) + + +def results_for_a_ratio(ratio): + + for N in N_list: + print() + print('# of graphs:', N) + for repeats in [1, 20, 40, 60, 80, 100]: + print() + print('Repeats:', repeats) + save_trials_as_group(graphs[:N], N, repeats, ratio) + + +if __name__ == '__main__': + if len(sys.argv) > 1: + N_list = [int(i) for i in sys.argv[1:]] + else: + N_list = [10, 50, 100] + + # Generate graphs. + graphs = generate_graphs() + + save_dir = 'outputs/edit_costs.repeats.N.IPFP/' + os.makedirs(save_dir, exist_ok=True) + os.makedirs(save_dir + 'groups/', exist_ok=True) + + for ratio in [10, 1, 0.1]: + print() + print('Ratio:', ratio) + results_for_a_ratio(ratio) diff --git a/gklearn/experiments/ged/stability/edit_costs.repeats.N.bipartite.py b/gklearn/experiments/ged/stability/edit_costs.repeats.N.bipartite.py new file mode 100644 index 0000000..64984de --- /dev/null +++ b/gklearn/experiments/ged/stability/edit_costs.repeats.N.bipartite.py @@ -0,0 +1,142 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +""" +Created on Wed Oct 20 11:48:02 2020 + +@author: ljia +""" +# This script tests the influence of the ratios between node costs and edge costs on the stability of the GED computation, where the base edit costs are [1, 1, 1, 1, 1, 1]. + +import os +import multiprocessing +import pickle +import logging +from gklearn.ged.util import compute_geds +import time +import sys +from group_results import group_trials + + +def generate_graphs(): + from gklearn.utils.graph_synthesizer import GraphSynthesizer + gsyzer = GraphSynthesizer() + graphs = gsyzer.unified_graphs(num_graphs=100, num_nodes=20, num_edges=20, num_node_labels=0, num_edge_labels=0, seed=None, directed=False) + return graphs + + +def xp_compute_ged_matrix(graphs, N, repeats, ratio, trial): + + save_file_suffix = '.' + str(N) + '.repeats_' + str(repeats) + '.ratio_' + "{:.2f}".format(ratio) + '.trial_' + str(trial) + + # Return if the file exists. + if os.path.isfile(save_dir + 'ged_matrix' + save_file_suffix + '.pkl'): + return None, None + + """**2. Set parameters.**""" + + # Parameters for GED computation. + ged_options = {'method': 'BIPARTITE', # use BIPARTITE huristic. + # 'initialization_method': 'RANDOM', # or 'NODE', etc. (for GEDEnv) + 'lsape_model': 'ECBP', # + # ??when bigger than 1, then the method is considered mIPFP. + # the actual number of computed solutions might be smaller than the specified value + 'max_num_solutions': 1, + 'edit_cost': 'CONSTANT', # use CONSTANT cost. + 'greedy_method': 'BASIC', # + # the distance between non-symbolic node/edge labels is computed by euclidean distance. + 'attr_distance': 'euclidean', + 'optimal': True, # if TRUE, the option --greedy-method has no effect + # parallel threads. Do not work if mpg_options['parallel'] = False. + 'threads': multiprocessing.cpu_count(), + 'centrality_method': 'NONE', + 'centrality_weight': 0.7, + 'init_option': 'EAGER_WITHOUT_SHUFFLED_COPIES' + } + + edit_cost_constants = [i * ratio for i in [1, 1, 1]] + [1, 1, 1] +# edit_cost_constants = [item * 0.01 for item in edit_cost_constants] +# pickle.dump(edit_cost_constants, open(save_dir + "edit_costs" + save_file_suffix + ".pkl", "wb")) + + options = ged_options.copy() + options['edit_cost_constants'] = edit_cost_constants + options['node_labels'] = [] + options['edge_labels'] = [] + options['node_attrs'] = [] + options['edge_attrs'] = [] + parallel = True # if num_solutions == 1 else False + + """**5. Compute GED matrix.**""" + ged_mat = 'error' + runtime = 0 + try: + time0 = time.time() + ged_vec_init, ged_mat, n_edit_operations = compute_geds(graphs, options=options, repeats=repeats, parallel=parallel, verbose=True) + runtime = time.time() - time0 + except Exception as exp: + print('An exception occured when running this experiment:') + LOG_FILENAME = save_dir + 'error.txt' + logging.basicConfig(filename=LOG_FILENAME, level=logging.DEBUG) + logging.exception(save_file_suffix) + print(repr(exp)) + + """**6. Get results.**""" + + with open(save_dir + 'ged_matrix' + save_file_suffix + '.pkl', 'wb') as f: + pickle.dump(ged_mat, f) + with open(save_dir + 'runtime' + save_file_suffix + '.pkl', 'wb') as f: + pickle.dump(runtime, f) + + return ged_mat, runtime + + +def save_trials_as_group(graphs, N, repeats, ratio): + # Return if the group file exists. + name_middle = '.' + str(N) + '.repeats_' + str(repeats) + '.ratio_' + "{:.2f}".format(ratio) + '.' + name_group = save_dir + 'groups/ged_mats' + name_middle + 'npy' + if os.path.isfile(name_group): + return + + ged_mats = [] + runtimes = [] + for trial in range(1, 101): + print() + print('Trial:', trial) + ged_mat, runtime = xp_compute_ged_matrix(graphs, N, repeats, ratio, trial) + ged_mats.append(ged_mat) + runtimes.append(runtime) + + # Group trials and Remove single files. + name_prefix = 'ged_matrix' + name_middle + group_trials(save_dir, name_prefix, True, True, False) + name_prefix = 'runtime' + name_middle + group_trials(save_dir, name_prefix, True, True, False) + + +def results_for_a_ratio(ratio): + + for N in N_list: + print() + print('# of graphs:', N) + for repeats in [1, 20, 40, 60, 80, 100]: + print() + print('Repeats:', repeats) + save_trials_as_group(graphs[:N], N, repeats, ratio) + + +if __name__ == '__main__': + if len(sys.argv) > 1: + N_list = [int(i) for i in sys.argv[1:]] + else: + N_list = [10, 50, 100] + + # Generate graphs. + graphs = generate_graphs() + + save_dir = 'outputs/edit_costs.repeats.N.bipartite/' + os.makedirs(save_dir, exist_ok=True) + os.makedirs(save_dir + 'groups/', exist_ok=True) + + for ratio in [10, 1, 0.1]: + print() + print('Ratio:', ratio) + results_for_a_ratio(ratio) diff --git a/gklearn/experiments/ged/stability/edit_costs.repeats.ratios.IPFP.py b/gklearn/experiments/ged/stability/edit_costs.repeats.ratios.IPFP.py index e273377..bdb7a30 100644 --- a/gklearn/experiments/ged/stability/edit_costs.repeats.ratios.IPFP.py +++ b/gklearn/experiments/ged/stability/edit_costs.repeats.ratios.IPFP.py @@ -19,11 +19,12 @@ from group_results import group_trials def xp_compute_ged_matrix(dataset, ds_name, repeats, ratio, trial): - + save_file_suffix = '.' + ds_name + '.repeats_' + str(repeats) + '.ratio_' + "{:.2f}".format(ratio) + '.trial_' + str(trial) - """**1. Get dataset.**""" - dataset = get_dataset(ds_name) + # Return if the file exists. + if os.path.isfile(save_dir + 'ged_matrix' + save_file_suffix + '.pkl'): + return None, None """**2. Set parameters.**""" @@ -78,6 +79,12 @@ def xp_compute_ged_matrix(dataset, ds_name, repeats, ratio, trial): def save_trials_as_group(dataset, ds_name, repeats, ratio): + # Return if the group file exists. + name_middle = '.' + ds_name + '.repeats_' + str(repeats) + '.ratio_' + "{:.2f}".format(ratio) + '.' + name_group = save_dir + 'groups/ged_mats' + name_middle + 'npy' + if os.path.isfile(name_group): + return + ged_mats = [] runtimes = [] for trial in range(1, 101): @@ -88,25 +95,35 @@ def save_trials_as_group(dataset, ds_name, repeats, ratio): runtimes.append(runtime) # Group trials and Remove single files. - name_middle = '.' + ds_name + '.repeats_' + str(repeats) + '.ratio_' + "{:.2f}".format(ratio) + '.' name_prefix = 'ged_matrix' + name_middle group_trials(save_dir, name_prefix, True, True, False) name_prefix = 'runtime' + name_middle group_trials(save_dir, name_prefix, True, True, False) - - + + def results_for_a_dataset(ds_name): """**1. Get dataset.**""" dataset = get_dataset(ds_name) - for repeats in [1, 20, 40, 60, 80, 100]: + for repeats in repeats_list: print() print('Repeats:', repeats) - for ratio in [0.1, 0.3, 0.5, 0.7, 0.9, 1, 3, 5, 7, 9]: + for ratio in ratio_list: print() print('Ratio:', ratio) save_trials_as_group(dataset, ds_name, repeats, ratio) + + +def get_param_lists(ds_name): + if ds_name == 'AIDS_symb': + repeats_list = [1, 20, 40, 60, 80, 100] + ratio_list = [0.1, 0.3, 0.5, 0.7, 0.9, 1, 3, 5, 7, 9] + else: + repeats_list = [1, 20, 40, 60, 80, 100] + ratio_list = [0.1, 0.3, 0.5, 0.7, 0.9, 1, 3, 5, 7, 9] + return repeats_list, ratio_list + if __name__ == '__main__': if len(sys.argv) > 1: @@ -121,4 +138,5 @@ if __name__ == '__main__': for ds_name in ds_name_list: print() print('Dataset:', ds_name) + repeats_list, ratio_list = get_param_lists(ds_name) results_for_a_dataset(ds_name) diff --git a/gklearn/experiments/ged/stability/edit_costs.repeats.ratios.bipartite.py b/gklearn/experiments/ged/stability/edit_costs.repeats.ratios.bipartite.py index 6b76457..b6863e2 100644 --- a/gklearn/experiments/ged/stability/edit_costs.repeats.ratios.bipartite.py +++ b/gklearn/experiments/ged/stability/edit_costs.repeats.ratios.bipartite.py @@ -19,11 +19,12 @@ from group_results import group_trials def xp_compute_ged_matrix(dataset, ds_name, repeats, ratio, trial): - + save_file_suffix = '.' + ds_name + '.repeats_' + str(repeats) + '.ratio_' + "{:.2f}".format(ratio) + '.trial_' + str(trial) - - """**1. Get dataset.**""" - dataset = get_dataset(ds_name) + + # Return if the file exists. + if os.path.isfile(save_dir + 'ged_matrix' + save_file_suffix + '.pkl'): + return None, None """**2. Set parameters.**""" @@ -83,6 +84,12 @@ def xp_compute_ged_matrix(dataset, ds_name, repeats, ratio, trial): def save_trials_as_group(dataset, ds_name, repeats, ratio): + # Return if the group file exists. + name_middle = '.' + ds_name + '.repeats_' + str(repeats) + '.ratio_' + "{:.2f}".format(ratio) + '.' + name_group = save_dir + 'groups/ged_mats' + name_middle + 'npy' + if os.path.isfile(name_group): + return + ged_mats = [] runtimes = [] for trial in range(1, 101): @@ -93,25 +100,35 @@ def save_trials_as_group(dataset, ds_name, repeats, ratio): runtimes.append(runtime) # Group trials and Remove single files. - name_middle = '.' + ds_name + '.repeats_' + str(repeats) + '.ratio_' + "{:.2f}".format(ratio) + '.' name_prefix = 'ged_matrix' + name_middle group_trials(save_dir, name_prefix, True, True, False) name_prefix = 'runtime' + name_middle group_trials(save_dir, name_prefix, True, True, False) - - + + def results_for_a_dataset(ds_name): """**1. Get dataset.**""" dataset = get_dataset(ds_name) - for repeats in [1, 20, 40, 60, 80, 100]: + for repeats in repeats_list: print() print('Repeats:', repeats) - for ratio in [0.1, 0.3, 0.5, 0.7, 0.9, 1, 3, 5, 7, 9]: + for ratio in ratio_list: print() print('Ratio:', ratio) save_trials_as_group(dataset, ds_name, repeats, ratio) + + +def get_param_lists(ds_name): + if ds_name == 'AIDS_symb': + repeats_list = [1, 20, 40, 60, 80, 100] + ratio_list = [0.1, 0.3, 0.5, 0.7, 0.9, 1, 3, 5, 7, 9] + else: + repeats_list = [1, 20, 40, 60, 80, 100] + ratio_list = [0.1, 0.3, 0.5, 0.7, 0.9, 1, 3, 5, 7, 9] + return repeats_list, ratio_list + if __name__ == '__main__': if len(sys.argv) > 1: @@ -126,4 +143,5 @@ if __name__ == '__main__': for ds_name in ds_name_list: print() print('Dataset:', ds_name) + repeats_list, ratio_list = get_param_lists(ds_name) results_for_a_dataset(ds_name) \ No newline at end of file diff --git a/gklearn/experiments/ged/stability/run_job_edit_costs.N.py b/gklearn/experiments/ged/stability/run_job_edit_costs.N.py new file mode 100644 index 0000000..43da338 --- /dev/null +++ b/gklearn/experiments/ged/stability/run_job_edit_costs.N.py @@ -0,0 +1,56 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +""" +Created on Tue Nov 3 20:23:25 2020 + +@author: ljia +""" +import os +import re + + +def get_job_script(arg, params): + ged_method = params[0] + multi_method = params[1] + job_name_label = r"rep." if multi_method == 'repeats' else r"" + script = r""" +#!/bin/bash + +#SBATCH --exclusive +#SBATCH --job-name="st.""" + job_name_label + r"N" + arg + r"." + ged_method + r"""" +#SBATCH --partition=tlong +#SBATCH --mail-type=ALL +#SBATCH --mail-user=jajupmochi@gmail.com +#SBATCH --output="outputs/output_edit_costs.""" + multi_method + r".N." + ged_method + r"." + arg + r""".txt" +#SBATCH --error="errors/error_edit_costs.""" + multi_method + r".N." + ged_method + r"." + arg + r""".txt" +# +#SBATCH --ntasks=1 +#SBATCH --nodes=1 +#SBATCH --cpus-per-task=1 +#SBATCH --time=300:00:00 +#SBATCH --mem-per-cpu=4000 + +srun hostname +srun cd /home/2019015/ljia02/graphkit-learn/gklearn/experiments/ged/stability +srun python3 edit_costs.""" + multi_method + r".N." + ged_method + r".py " + arg + script = script.strip() + script = re.sub('\n\t+', '\n', script) + script = re.sub('\n +', '\n', script) + + return script + +if __name__ == '__main__': + + params_list = [('IPFP', 'nums_sols'), + ('IPFP', 'repeats'), + ('bipartite', 'max_num_sols'), + ('bipartite', 'repeats')] + N_list = [10, 50, 100] + for params in params_list[1:]: + for N in [N_list[i] for i in [0, 1, 2]]: + job_script = get_job_script(str(N), params) + command = 'sbatch <