Browse Source

[Exp] Update computation of ged stability.

v0.2.x
jajupmochi 4 years ago
parent
commit
cba8047279
2 changed files with 150 additions and 61 deletions
  1. +19
    -15
      gklearn/experiments/ged/stability/edit_costs.real_data.nums_sols.ratios.IPFP.py
  2. +131
    -46
      gklearn/experiments/ged/stability/utils.py

+ 19
- 15
gklearn/experiments/ged/stability/edit_costs.real_data.nums_sols.ratios.IPFP.py View File

@@ -13,7 +13,7 @@ import pickle
import logging
from gklearn.ged.util import compute_geds
import time
from utils import get_dataset, set_edit_cost_consts
from utils import get_dataset, set_edit_cost_consts, dichotomous_permutation
import sys
from group_results import group_trials, check_group_existence, update_group_marker

@@ -37,7 +37,7 @@ def xp_compute_ged_matrix(dataset, ds_name, num_solutions, ratio, trial):
# the distance between non-symbolic node/edge labels is computed by euclidean distance.
'attr_distance': 'euclidean',
'ratio_runs_from_initial_solutions': 0.25,
# parallel threads. Do not work if mpg_options['parallel'] = False.
# parallel threads. Set to 1 automatically if parallel=True in compute_geds().
'threads': multiprocessing.cpu_count(),
'init_option': 'EAGER_WITHOUT_SHUFFLED_COPIES'
}
@@ -98,7 +98,7 @@ def save_trials_as_group(dataset, ds_name, num_solutions, ratio):
ged_mats.append(ged_mat)
runtimes.append(runtime)

# Group trials and Remove single files.
# Group trials and remove single files.
# @todo: if the program stops between the following lines, then there may be errors.
name_prefix = 'ged_matrix' + name_middle
group_trials(save_dir, name_prefix, True, True, False, num_trials=num_trials)
@@ -111,21 +111,25 @@ def results_for_a_dataset(ds_name):
"""**1. Get dataset.**"""
dataset = get_dataset(ds_name)

for ratio in ratio_list:
for params in list(param_grid):
print()
print('Ratio:', ratio)
for num_solutions in num_solutions_list:
print()
print('# of solutions:', num_solutions)
save_trials_as_group(dataset, ds_name, num_solutions, ratio)
print(params)
save_trials_as_group(dataset, ds_name, params['num_solutions'], params['ratio'])


def get_param_lists(ds_name, test=False):
if test:
num_solutions_list = [1, 10, 20, 30, 40, 50]
def get_param_lists(ds_name, mode='test'):
if mode == 'test':
num_solutions_list = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30]
ratio_list = [10]
return num_solutions_list, ratio_list

elif mode == 'simple':
from sklearn.model_selection import ParameterGrid
param_grid = ParameterGrid([
{'num_solutions': dichotomous_permutation([1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30]), 'ratio': [10]},
{'num_solutions': [10], 'ratio': dichotomous_permutation([0.1, 0.3, 0.5, 0.7, 0.9, 1, 3, 5, 7, 9, 10])}])
# print(list(param_grid))

if ds_name == 'AIDS_symb':
num_solutions_list = [1, 20, 40, 60, 80, 100]
ratio_list = [0.1, 0.3, 0.5, 0.7, 0.9, 1, 3, 5, 7, 9]
@@ -133,7 +137,7 @@ def get_param_lists(ds_name, test=False):
num_solutions_list = [1, 10, 20, 30, 40, 50, 60, 70, 80, 90, 100] # [1, 20, 40, 60, 80, 100]
ratio_list = [0.1, 0.3, 0.5, 0.7, 0.9, 1, 3, 5, 7, 9, 10][::-1]

return num_solutions_list, ratio_list
return param_grid


if __name__ == '__main__':
@@ -141,7 +145,7 @@ if __name__ == '__main__':
ds_name_list = sys.argv[1:]
else:
ds_name_list = ['Acyclic', 'Alkane_unlabeled', 'MAO_lite', 'Monoterpenoides', 'MUTAG']
# ds_name_list = ['Acyclic'] # 'Alkane_unlabeled']
# ds_name_list = ['MUTAG'] # 'Alkane_unlabeled']
# ds_name_list = ['Acyclic', 'MAO', 'Monoterpenoides', 'MUTAG', 'AIDS_symb']

save_dir = 'outputs/edit_costs.real_data.num_sols.ratios.IPFP/'
@@ -151,5 +155,5 @@ if __name__ == '__main__':
for ds_name in ds_name_list:
print()
print('Dataset:', ds_name)
num_solutions_list, ratio_list = get_param_lists(ds_name, test=False)
param_grid = get_param_lists(ds_name, mode='simple')
results_for_a_dataset(ds_name)

+ 131
- 46
gklearn/experiments/ged/stability/utils.py View File

@@ -16,12 +16,12 @@ from gklearn.experiments import DATASET_ROOT

def get_dataset(ds_name):
# The node/edge labels that will not be used in the computation.
# if ds_name == 'MAO':
# irrelevant_labels = {'node_attrs': ['x', 'y', 'z'], 'edge_labels': ['bond_stereo']}
# if ds_name == 'Monoterpenoides':
# irrelevant_labels = {'edge_labels': ['valence']}
# elif ds_name == 'MUTAG':
# irrelevant_labels = {'edge_labels': ['label_0']}
# if ds_name == 'MAO':
# irrelevant_labels = {'node_attrs': ['x', 'y', 'z'], 'edge_labels': ['bond_stereo']}
# if ds_name == 'Monoterpenoides':
# irrelevant_labels = {'edge_labels': ['valence']}
# elif ds_name == 'MUTAG':
# irrelevant_labels = {'edge_labels': ['label_0']}
if ds_name == 'AIDS_symb':
irrelevant_labels = {'node_attrs': ['chem', 'charge', 'x', 'y'], 'edge_labels': ['valence']}
ds_name = 'AIDS'
@@ -49,34 +49,36 @@ def set_edit_cost_consts(ratio, node_labeled=True, edge_labeled=True, mode='unif


def nested_keys_exists(element, *keys):
'''
Check if *keys (nested) exists in `element` (dict).
'''
if not isinstance(element, dict):
raise AttributeError('keys_exists() expects dict as first argument.')
if len(keys) == 0:
raise AttributeError('keys_exists() expects at least two arguments, one given.')

_element = element
for key in keys:
try:
_element = _element[key]
except KeyError:
return False
return True

'''
Check if *keys (nested) exists in `element` (dict).
'''
if not isinstance(element, dict):
raise AttributeError('keys_exists() expects dict as first argument.')
if len(keys) == 0:
raise AttributeError('keys_exists() expects at least two arguments, one given.')

_element = element
for key in keys:
try:
_element = _element[key]
except KeyError:
return False
return True


# Check average relative error along elements in two ged matrices.
def matrices_ave_relative_error(m1, m2):
error = 0
base = 0
for i in range(m1.shape[0]):
for j in range(m1.shape[1]):
error += np.abs(m1[i, j] - m2[i, j])
base += (np.abs(m1[i, j]) + np.abs(m2[i, j])) / 2
error = 0
base = 0
for i in range(m1.shape[0]):
for j in range(m1.shape[1]):
error += np.abs(m1[i, j] - m2[i, j])
# base += (np.abs(m1[i, j]) + np.abs(m2[i, j]))
base += (m1[i, j] + m2[i, j]) # Require only 25% of the time of "base += (np.abs(m1[i, j]) + np.abs(m2[i, j]))".

return error / base
base = base / 2

return error / base


def compute_relative_error(ged_mats):
@@ -92,9 +94,9 @@ def compute_relative_error(ged_mats):
errors = []
for i, mat in enumerate(ged_mats):
err = matrices_ave_relative_error(mat, ged_mat_s)
# if not per_correct:
# print('matrix # ', str(i))
# pass
# if not per_correct:
# print('matrix # ', str(i))
# pass
errors.append(err)
else:
errors = [0]
@@ -107,11 +109,11 @@ def parse_group_file_name(fn):
key1 = splits_all[1]

pos2 = splits_all[2].rfind('_')
# key2 = splits_all[2][:pos2]
# key2 = splits_all[2][:pos2]
val2 = splits_all[2][pos2+1:]

pos3 = splits_all[3].rfind('_')
# key3 = splits_all[3][:pos3]
# key3 = splits_all[3][:pos3]
val3 = splits_all[3][pos3+1:] + '.' + splits_all[4]

return key1, val2, val3
@@ -232,7 +234,7 @@ def set_axis_style(ax):
ax.tick_params(labelsize=8, color='w', pad=1, grid_color='w')
ax.tick_params(axis='x', pad=-2)
ax.tick_params(axis='y', labelrotation=-40, pad=-2)
# ax.zaxis._axinfo['juggled'] = (1, 2, 0)
# ax.zaxis._axinfo['juggled'] = (1, 2, 0)
ax.set_xlabel(ax.get_xlabel(), fontsize=10, labelpad=-3)
ax.set_ylabel(ax.get_ylabel(), fontsize=10, labelpad=-2, rotation=50)
ax.set_zlabel(ax.get_zlabel(), fontsize=10, labelpad=-2)
@@ -240,16 +242,99 @@ def set_axis_style(ax):
return


def dichotomous_permutation(arr, layer=0):
import math

# def seperate_arr(arr, new_arr):
# if (length % 2) == 0:
# half = int(length / 2)
# new_arr += [arr[half - 1], arr[half]]
# subarr1 = [arr[i] for i in range(1, half - 1)]
# else:
# half = math.floor(length / 2)
# new_arr.append(arr[half])
# subarr1 = [arr[i] for i in range(1, half)]
# subarr2 = [arr[i] for i in range(half + 1, length - 1)]
# subarrs = [subarr1, subarr2]
# return subarrs


if layer == 0:
length = len(arr)
if length <= 2:
return arr

new_arr = [arr[0], arr[-1]]
if (length % 2) == 0:
half = int(length / 2)
new_arr += [arr[half - 1], arr[half]]
subarr1 = [arr[i] for i in range(1, half - 1)]
else:
half = math.floor(length / 2)
new_arr.append(arr[half])
subarr1 = [arr[i] for i in range(1, half)]
subarr2 = [arr[i] for i in range(half + 1, length - 1)]
subarrs = [subarr1, subarr2]
# subarrs = seperate_arr(arr, new_arr)
new_arr += dichotomous_permutation(subarrs, layer=layer+1)

else:
new_arr = []
subarrs = []
for a in arr:
length = len(a)
if length <= 2:
new_arr += a
else:
# subarrs += seperate_arr(a, new_arr)
if (length % 2) == 0:
half = int(length / 2)
new_arr += [a[half - 1], a[half]]
subarr1 = [a[i] for i in range(0, half - 1)]
else:
half = math.floor(length / 2)
new_arr.append(a[half])
subarr1 = [a[i] for i in range(0, half)]
subarr2 = [a[i] for i in range(half + 1, length)]
subarrs += [subarr1, subarr2]

if len(subarrs) > 0:
new_arr += dichotomous_permutation(subarrs, layer=layer+1)

return new_arr

# length = len(arr)
# if length <= 2:
# return arr

# new_arr = [arr[0], arr[-1]]
# if (length % 2) == 0:
# half = int(length / 2)
# new_arr += [arr[half - 1], arr[half]]
# subarr1 = [arr[i] for i in range(1, half - 1)]
# else:
# half = math.floor(length / 2)
# new_arr.append(arr[half])
# subarr1 = [arr[i] for i in range(1, half)]
# subarr2 = [arr[i] for i in range(half + 1, length - 1)]
# if len(subarr1) > 0:
# new_arr += dichotomous_permutation(subarr1)
# if len(subarr2) > 0:
# new_arr += dichotomous_permutation(subarr2)

# return new_arr


if __name__ == '__main__':
root_dir = 'outputs/CRIANN/'
# for dir_ in sorted(os.listdir(root_dir)):
# if os.path.isdir(root_dir):
# full_dir = os.path.join(root_dir, dir_)
# print('---', full_dir,':')
# save_dir = os.path.join(full_dir, 'groups/')
# if os.path.exists(save_dir):
# try:
# get_relative_errors(save_dir)
# except Exception as exp:
# print('An exception occured when running this experiment:')
# print(repr(exp))
# for dir_ in sorted(os.listdir(root_dir)):
# if os.path.isdir(root_dir):
# full_dir = os.path.join(root_dir, dir_)
# print('---', full_dir,':')
# save_dir = os.path.join(full_dir, 'groups/')
# if os.path.exists(save_dir):
# try:
# get_relative_errors(save_dir)
# except Exception as exp:
# print('An exception occured when running this experiment:')
# print(repr(exp))

Loading…
Cancel
Save