From cba80472792d383365c0eb0915903acac62fabdf Mon Sep 17 00:00:00 2001 From: jajupmochi Date: Tue, 2 Feb 2021 17:13:11 +0100 Subject: [PATCH] [Exp] Update computation of ged stability. --- .../edit_costs.real_data.nums_sols.ratios.IPFP.py | 34 ++-- gklearn/experiments/ged/stability/utils.py | 177 +++++++++++++++------ 2 files changed, 150 insertions(+), 61 deletions(-) diff --git a/gklearn/experiments/ged/stability/edit_costs.real_data.nums_sols.ratios.IPFP.py b/gklearn/experiments/ged/stability/edit_costs.real_data.nums_sols.ratios.IPFP.py index 33c6973..aa08579 100644 --- a/gklearn/experiments/ged/stability/edit_costs.real_data.nums_sols.ratios.IPFP.py +++ b/gklearn/experiments/ged/stability/edit_costs.real_data.nums_sols.ratios.IPFP.py @@ -13,7 +13,7 @@ import pickle import logging from gklearn.ged.util import compute_geds import time -from utils import get_dataset, set_edit_cost_consts +from utils import get_dataset, set_edit_cost_consts, dichotomous_permutation import sys from group_results import group_trials, check_group_existence, update_group_marker @@ -37,7 +37,7 @@ def xp_compute_ged_matrix(dataset, ds_name, num_solutions, ratio, trial): # the distance between non-symbolic node/edge labels is computed by euclidean distance. 'attr_distance': 'euclidean', 'ratio_runs_from_initial_solutions': 0.25, - # parallel threads. Do not work if mpg_options['parallel'] = False. + # parallel threads. Set to 1 automatically if parallel=True in compute_geds(). 'threads': multiprocessing.cpu_count(), 'init_option': 'EAGER_WITHOUT_SHUFFLED_COPIES' } @@ -98,7 +98,7 @@ def save_trials_as_group(dataset, ds_name, num_solutions, ratio): ged_mats.append(ged_mat) runtimes.append(runtime) - # Group trials and Remove single files. + # Group trials and remove single files. # @todo: if the program stops between the following lines, then there may be errors. name_prefix = 'ged_matrix' + name_middle group_trials(save_dir, name_prefix, True, True, False, num_trials=num_trials) @@ -111,21 +111,25 @@ def results_for_a_dataset(ds_name): """**1. Get dataset.**""" dataset = get_dataset(ds_name) - for ratio in ratio_list: + for params in list(param_grid): print() - print('Ratio:', ratio) - for num_solutions in num_solutions_list: - print() - print('# of solutions:', num_solutions) - save_trials_as_group(dataset, ds_name, num_solutions, ratio) + print(params) + save_trials_as_group(dataset, ds_name, params['num_solutions'], params['ratio']) -def get_param_lists(ds_name, test=False): - if test: - num_solutions_list = [1, 10, 20, 30, 40, 50] +def get_param_lists(ds_name, mode='test'): + if mode == 'test': + num_solutions_list = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30] ratio_list = [10] return num_solutions_list, ratio_list + elif mode == 'simple': + from sklearn.model_selection import ParameterGrid + param_grid = ParameterGrid([ + {'num_solutions': dichotomous_permutation([1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30]), 'ratio': [10]}, + {'num_solutions': [10], 'ratio': dichotomous_permutation([0.1, 0.3, 0.5, 0.7, 0.9, 1, 3, 5, 7, 9, 10])}]) +# print(list(param_grid)) + if ds_name == 'AIDS_symb': num_solutions_list = [1, 20, 40, 60, 80, 100] ratio_list = [0.1, 0.3, 0.5, 0.7, 0.9, 1, 3, 5, 7, 9] @@ -133,7 +137,7 @@ def get_param_lists(ds_name, test=False): num_solutions_list = [1, 10, 20, 30, 40, 50, 60, 70, 80, 90, 100] # [1, 20, 40, 60, 80, 100] ratio_list = [0.1, 0.3, 0.5, 0.7, 0.9, 1, 3, 5, 7, 9, 10][::-1] - return num_solutions_list, ratio_list + return param_grid if __name__ == '__main__': @@ -141,7 +145,7 @@ if __name__ == '__main__': ds_name_list = sys.argv[1:] else: ds_name_list = ['Acyclic', 'Alkane_unlabeled', 'MAO_lite', 'Monoterpenoides', 'MUTAG'] -# ds_name_list = ['Acyclic'] # 'Alkane_unlabeled'] +# ds_name_list = ['MUTAG'] # 'Alkane_unlabeled'] # ds_name_list = ['Acyclic', 'MAO', 'Monoterpenoides', 'MUTAG', 'AIDS_symb'] save_dir = 'outputs/edit_costs.real_data.num_sols.ratios.IPFP/' @@ -151,5 +155,5 @@ if __name__ == '__main__': for ds_name in ds_name_list: print() print('Dataset:', ds_name) - num_solutions_list, ratio_list = get_param_lists(ds_name, test=False) + param_grid = get_param_lists(ds_name, mode='simple') results_for_a_dataset(ds_name) diff --git a/gklearn/experiments/ged/stability/utils.py b/gklearn/experiments/ged/stability/utils.py index cbb45b1..e743b27 100644 --- a/gklearn/experiments/ged/stability/utils.py +++ b/gklearn/experiments/ged/stability/utils.py @@ -16,12 +16,12 @@ from gklearn.experiments import DATASET_ROOT def get_dataset(ds_name): # The node/edge labels that will not be used in the computation. -# if ds_name == 'MAO': -# irrelevant_labels = {'node_attrs': ['x', 'y', 'z'], 'edge_labels': ['bond_stereo']} -# if ds_name == 'Monoterpenoides': -# irrelevant_labels = {'edge_labels': ['valence']} -# elif ds_name == 'MUTAG': -# irrelevant_labels = {'edge_labels': ['label_0']} +# if ds_name == 'MAO': +# irrelevant_labels = {'node_attrs': ['x', 'y', 'z'], 'edge_labels': ['bond_stereo']} +# if ds_name == 'Monoterpenoides': +# irrelevant_labels = {'edge_labels': ['valence']} +# elif ds_name == 'MUTAG': +# irrelevant_labels = {'edge_labels': ['label_0']} if ds_name == 'AIDS_symb': irrelevant_labels = {'node_attrs': ['chem', 'charge', 'x', 'y'], 'edge_labels': ['valence']} ds_name = 'AIDS' @@ -49,34 +49,36 @@ def set_edit_cost_consts(ratio, node_labeled=True, edge_labeled=True, mode='unif def nested_keys_exists(element, *keys): - ''' - Check if *keys (nested) exists in `element` (dict). - ''' - if not isinstance(element, dict): - raise AttributeError('keys_exists() expects dict as first argument.') - if len(keys) == 0: - raise AttributeError('keys_exists() expects at least two arguments, one given.') - - _element = element - for key in keys: - try: - _element = _element[key] - except KeyError: - return False - return True - + ''' + Check if *keys (nested) exists in `element` (dict). + ''' + if not isinstance(element, dict): + raise AttributeError('keys_exists() expects dict as first argument.') + if len(keys) == 0: + raise AttributeError('keys_exists() expects at least two arguments, one given.') + + _element = element + for key in keys: + try: + _element = _element[key] + except KeyError: + return False + return True # Check average relative error along elements in two ged matrices. def matrices_ave_relative_error(m1, m2): - error = 0 - base = 0 - for i in range(m1.shape[0]): - for j in range(m1.shape[1]): - error += np.abs(m1[i, j] - m2[i, j]) - base += (np.abs(m1[i, j]) + np.abs(m2[i, j])) / 2 + error = 0 + base = 0 + for i in range(m1.shape[0]): + for j in range(m1.shape[1]): + error += np.abs(m1[i, j] - m2[i, j]) +# base += (np.abs(m1[i, j]) + np.abs(m2[i, j])) + base += (m1[i, j] + m2[i, j]) # Require only 25% of the time of "base += (np.abs(m1[i, j]) + np.abs(m2[i, j]))". - return error / base + base = base / 2 + + return error / base def compute_relative_error(ged_mats): @@ -92,9 +94,9 @@ def compute_relative_error(ged_mats): errors = [] for i, mat in enumerate(ged_mats): err = matrices_ave_relative_error(mat, ged_mat_s) - # if not per_correct: - # print('matrix # ', str(i)) - # pass + # if not per_correct: + # print('matrix # ', str(i)) + # pass errors.append(err) else: errors = [0] @@ -107,11 +109,11 @@ def parse_group_file_name(fn): key1 = splits_all[1] pos2 = splits_all[2].rfind('_') -# key2 = splits_all[2][:pos2] +# key2 = splits_all[2][:pos2] val2 = splits_all[2][pos2+1:] pos3 = splits_all[3].rfind('_') -# key3 = splits_all[3][:pos3] +# key3 = splits_all[3][:pos3] val3 = splits_all[3][pos3+1:] + '.' + splits_all[4] return key1, val2, val3 @@ -232,7 +234,7 @@ def set_axis_style(ax): ax.tick_params(labelsize=8, color='w', pad=1, grid_color='w') ax.tick_params(axis='x', pad=-2) ax.tick_params(axis='y', labelrotation=-40, pad=-2) -# ax.zaxis._axinfo['juggled'] = (1, 2, 0) +# ax.zaxis._axinfo['juggled'] = (1, 2, 0) ax.set_xlabel(ax.get_xlabel(), fontsize=10, labelpad=-3) ax.set_ylabel(ax.get_ylabel(), fontsize=10, labelpad=-2, rotation=50) ax.set_zlabel(ax.get_zlabel(), fontsize=10, labelpad=-2) @@ -240,16 +242,99 @@ def set_axis_style(ax): return +def dichotomous_permutation(arr, layer=0): + import math + +# def seperate_arr(arr, new_arr): +# if (length % 2) == 0: +# half = int(length / 2) +# new_arr += [arr[half - 1], arr[half]] +# subarr1 = [arr[i] for i in range(1, half - 1)] +# else: +# half = math.floor(length / 2) +# new_arr.append(arr[half]) +# subarr1 = [arr[i] for i in range(1, half)] +# subarr2 = [arr[i] for i in range(half + 1, length - 1)] +# subarrs = [subarr1, subarr2] +# return subarrs + + + if layer == 0: + length = len(arr) + if length <= 2: + return arr + + new_arr = [arr[0], arr[-1]] + if (length % 2) == 0: + half = int(length / 2) + new_arr += [arr[half - 1], arr[half]] + subarr1 = [arr[i] for i in range(1, half - 1)] + else: + half = math.floor(length / 2) + new_arr.append(arr[half]) + subarr1 = [arr[i] for i in range(1, half)] + subarr2 = [arr[i] for i in range(half + 1, length - 1)] + subarrs = [subarr1, subarr2] +# subarrs = seperate_arr(arr, new_arr) + new_arr += dichotomous_permutation(subarrs, layer=layer+1) + + else: + new_arr = [] + subarrs = [] + for a in arr: + length = len(a) + if length <= 2: + new_arr += a + else: +# subarrs += seperate_arr(a, new_arr) + if (length % 2) == 0: + half = int(length / 2) + new_arr += [a[half - 1], a[half]] + subarr1 = [a[i] for i in range(0, half - 1)] + else: + half = math.floor(length / 2) + new_arr.append(a[half]) + subarr1 = [a[i] for i in range(0, half)] + subarr2 = [a[i] for i in range(half + 1, length)] + subarrs += [subarr1, subarr2] + + if len(subarrs) > 0: + new_arr += dichotomous_permutation(subarrs, layer=layer+1) + + return new_arr + +# length = len(arr) +# if length <= 2: +# return arr + +# new_arr = [arr[0], arr[-1]] +# if (length % 2) == 0: +# half = int(length / 2) +# new_arr += [arr[half - 1], arr[half]] +# subarr1 = [arr[i] for i in range(1, half - 1)] +# else: +# half = math.floor(length / 2) +# new_arr.append(arr[half]) +# subarr1 = [arr[i] for i in range(1, half)] +# subarr2 = [arr[i] for i in range(half + 1, length - 1)] +# if len(subarr1) > 0: +# new_arr += dichotomous_permutation(subarr1) +# if len(subarr2) > 0: +# new_arr += dichotomous_permutation(subarr2) + +# return new_arr + + if __name__ == '__main__': root_dir = 'outputs/CRIANN/' -# for dir_ in sorted(os.listdir(root_dir)): -# if os.path.isdir(root_dir): -# full_dir = os.path.join(root_dir, dir_) -# print('---', full_dir,':') -# save_dir = os.path.join(full_dir, 'groups/') -# if os.path.exists(save_dir): -# try: -# get_relative_errors(save_dir) -# except Exception as exp: -# print('An exception occured when running this experiment:') -# print(repr(exp)) \ No newline at end of file +# for dir_ in sorted(os.listdir(root_dir)): +# if os.path.isdir(root_dir): +# full_dir = os.path.join(root_dir, dir_) +# print('---', full_dir,':') +# save_dir = os.path.join(full_dir, 'groups/') +# if os.path.exists(save_dir): +# try: +# get_relative_errors(save_dir) +# except Exception as exp: +# print('An exception occured when running this experiment:') +# print(repr(exp)) \ No newline at end of file