From cba80472792d383365c0eb0915903acac62fabdf Mon Sep 17 00:00:00 2001
From: jajupmochi <jajupmochi@gmail.com>
Date: Tue, 2 Feb 2021 17:13:11 +0100
Subject: [PATCH] [Exp] Update computation of ged stability.

---
 .../edit_costs.real_data.nums_sols.ratios.IPFP.py  |  34 ++--
 gklearn/experiments/ged/stability/utils.py         | 177 +++++++++++++++------
 2 files changed, 150 insertions(+), 61 deletions(-)

diff --git a/gklearn/experiments/ged/stability/edit_costs.real_data.nums_sols.ratios.IPFP.py b/gklearn/experiments/ged/stability/edit_costs.real_data.nums_sols.ratios.IPFP.py
index 33c6973..aa08579 100644
--- a/gklearn/experiments/ged/stability/edit_costs.real_data.nums_sols.ratios.IPFP.py
+++ b/gklearn/experiments/ged/stability/edit_costs.real_data.nums_sols.ratios.IPFP.py
@@ -13,7 +13,7 @@ import pickle
 import logging
 from gklearn.ged.util import compute_geds
 import time
-from utils import get_dataset, set_edit_cost_consts
+from utils import get_dataset, set_edit_cost_consts, dichotomous_permutation
 import sys
 from group_results import group_trials, check_group_existence, update_group_marker
 
@@ -37,7 +37,7 @@ def xp_compute_ged_matrix(dataset, ds_name, num_solutions, ratio, trial):
 				   # the distance between non-symbolic node/edge labels is computed by euclidean distance.
 				   'attr_distance': 'euclidean',
 				   'ratio_runs_from_initial_solutions': 0.25,
-				   # parallel threads. Do not work if mpg_options['parallel'] = False.
+				   # parallel threads. Set to 1 automatically if parallel=True in compute_geds().
 				   'threads': multiprocessing.cpu_count(),
 				   'init_option': 'EAGER_WITHOUT_SHUFFLED_COPIES'
 				   }
@@ -98,7 +98,7 @@ def save_trials_as_group(dataset, ds_name, num_solutions, ratio):
 		ged_mats.append(ged_mat)
 		runtimes.append(runtime)
 
-	# Group trials and Remove single files.
+	# Group trials and remove single files.
 	# @todo: if the program stops between the following lines, then there may be errors.
 	name_prefix = 'ged_matrix' + name_middle
 	group_trials(save_dir, name_prefix, True, True, False, num_trials=num_trials)
@@ -111,21 +111,25 @@ def results_for_a_dataset(ds_name):
 	"""**1.   Get dataset.**"""
 	dataset = get_dataset(ds_name)
 
-	for ratio in ratio_list:
+	for params in list(param_grid):
 		print()
-		print('Ratio:', ratio)
-		for num_solutions in num_solutions_list:
-			print()
-			print('# of solutions:', num_solutions)
-			save_trials_as_group(dataset, ds_name, num_solutions, ratio)
+		print(params)
+		save_trials_as_group(dataset, ds_name, params['num_solutions'], params['ratio'])
 
 
-def get_param_lists(ds_name, test=False):
-	if test:
-		num_solutions_list = [1, 10, 20, 30, 40, 50]
+def get_param_lists(ds_name, mode='test'):
+	if mode == 'test':
+		num_solutions_list = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30]
 		ratio_list = [10]
 		return num_solutions_list, ratio_list
 
+	elif mode == 'simple':
+		from sklearn.model_selection import ParameterGrid
+		param_grid = ParameterGrid([
+			{'num_solutions': dichotomous_permutation([1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30]), 'ratio': [10]},
+			{'num_solutions': [10], 'ratio': dichotomous_permutation([0.1, 0.3, 0.5, 0.7, 0.9, 1, 3, 5, 7, 9, 10])}])
+# 		print(list(param_grid))
+
 	if ds_name == 'AIDS_symb':
 		num_solutions_list = [1, 20, 40, 60, 80, 100]
 		ratio_list = [0.1, 0.3, 0.5, 0.7, 0.9, 1, 3, 5, 7, 9]
@@ -133,7 +137,7 @@ def get_param_lists(ds_name, test=False):
 		num_solutions_list = [1, 10, 20, 30, 40, 50, 60, 70, 80, 90, 100] # [1, 20, 40, 60, 80, 100]
 		ratio_list = [0.1, 0.3, 0.5, 0.7, 0.9, 1, 3, 5, 7, 9, 10][::-1]
 
-	return num_solutions_list, ratio_list
+	return param_grid
 
 
 if __name__ == '__main__':
@@ -141,7 +145,7 @@ if __name__ == '__main__':
 		ds_name_list = sys.argv[1:]
 	else:
 		ds_name_list = ['Acyclic', 'Alkane_unlabeled', 'MAO_lite', 'Monoterpenoides', 'MUTAG']
-# 		ds_name_list = ['Acyclic'] # 'Alkane_unlabeled']
+# 		ds_name_list = ['MUTAG'] # 'Alkane_unlabeled']
 # 		ds_name_list = ['Acyclic', 'MAO', 'Monoterpenoides', 'MUTAG', 'AIDS_symb']
 
 	save_dir = 'outputs/edit_costs.real_data.num_sols.ratios.IPFP/'
@@ -151,5 +155,5 @@ if __name__ == '__main__':
 	for ds_name in ds_name_list:
 		print()
 		print('Dataset:', ds_name)
-		num_solutions_list, ratio_list = get_param_lists(ds_name, test=False)
+		param_grid = get_param_lists(ds_name, mode='simple')
 		results_for_a_dataset(ds_name)
diff --git a/gklearn/experiments/ged/stability/utils.py b/gklearn/experiments/ged/stability/utils.py
index cbb45b1..e743b27 100644
--- a/gklearn/experiments/ged/stability/utils.py
+++ b/gklearn/experiments/ged/stability/utils.py
@@ -16,12 +16,12 @@ from gklearn.experiments import DATASET_ROOT
 
 def get_dataset(ds_name):
 	# The node/edge labels that will not be used in the computation.
-# 	if ds_name == 'MAO':
-# 		irrelevant_labels = {'node_attrs': ['x', 'y', 'z'], 'edge_labels': ['bond_stereo']}
-# 	if ds_name == 'Monoterpenoides':
-# 		irrelevant_labels = {'edge_labels': ['valence']}
-# 	elif ds_name == 'MUTAG':
-# 		irrelevant_labels = {'edge_labels': ['label_0']}
+#	if ds_name == 'MAO':
+#		irrelevant_labels = {'node_attrs': ['x', 'y', 'z'], 'edge_labels': ['bond_stereo']}
+#	if ds_name == 'Monoterpenoides':
+#		irrelevant_labels = {'edge_labels': ['valence']}
+#	elif ds_name == 'MUTAG':
+#		irrelevant_labels = {'edge_labels': ['label_0']}
 	if ds_name == 'AIDS_symb':
 		irrelevant_labels = {'node_attrs': ['chem', 'charge', 'x', 'y'], 'edge_labels': ['valence']}
 		ds_name = 'AIDS'
@@ -49,34 +49,36 @@ def set_edit_cost_consts(ratio, node_labeled=True, edge_labeled=True, mode='unif
 
 
 def nested_keys_exists(element, *keys):
-    '''
-    Check if *keys (nested) exists in `element` (dict).
-    '''
-    if not isinstance(element, dict):
-        raise AttributeError('keys_exists() expects dict as first argument.')
-    if len(keys) == 0:
-        raise AttributeError('keys_exists() expects at least two arguments, one given.')
-
-    _element = element
-    for key in keys:
-        try:
-            _element = _element[key]
-        except KeyError:
-            return False
-    return True
-
+	'''
+	Check if *keys (nested) exists in `element` (dict).
+	'''
+	if not isinstance(element, dict):
+		raise AttributeError('keys_exists() expects dict as first argument.')
+	if len(keys) == 0:
+		raise AttributeError('keys_exists() expects at least two arguments, one given.')
+
+	_element = element
+	for key in keys:
+		try:
+			_element = _element[key]
+		except KeyError:
+			return False
+	return True
 
 
 # Check average relative error along elements in two ged matrices.
 def matrices_ave_relative_error(m1, m2):
-    error = 0
-    base = 0
-    for i in range(m1.shape[0]):
-        for j in range(m1.shape[1]):
-            error += np.abs(m1[i, j] - m2[i, j])
-            base += (np.abs(m1[i, j]) + np.abs(m2[i, j])) / 2
+	error = 0
+	base = 0
+	for i in range(m1.shape[0]):
+		for j in range(m1.shape[1]):
+			error += np.abs(m1[i, j] - m2[i, j])
+# 			base += (np.abs(m1[i, j]) + np.abs(m2[i, j]))
+			base += (m1[i, j] + m2[i, j]) # Require only 25% of the time of "base += (np.abs(m1[i, j]) + np.abs(m2[i, j]))".
 
-    return error / base
+	base = base / 2
+
+	return error / base
 
 
 def compute_relative_error(ged_mats):
@@ -92,9 +94,9 @@ def compute_relative_error(ged_mats):
 		errors = []
 		for i, mat in enumerate(ged_mats):
 			err = matrices_ave_relative_error(mat, ged_mat_s)
-	    #             if not per_correct:
-	    #                 print('matrix # ', str(i))
-	    #                 pass
+		#			 if not per_correct:
+		#				 print('matrix # ', str(i))
+		#				 pass
 			errors.append(err)
 	else:
 		errors = [0]
@@ -107,11 +109,11 @@ def parse_group_file_name(fn):
 	key1 = splits_all[1]
 
 	pos2 = splits_all[2].rfind('_')
-# 	key2 = splits_all[2][:pos2]
+#	key2 = splits_all[2][:pos2]
 	val2 = splits_all[2][pos2+1:]
 
 	pos3 = splits_all[3].rfind('_')
-# 	key3 = splits_all[3][:pos3]
+#	key3 = splits_all[3][:pos3]
 	val3 = splits_all[3][pos3+1:] + '.' + splits_all[4]
 
 	return key1, val2, val3
@@ -232,7 +234,7 @@ def set_axis_style(ax):
 	ax.tick_params(labelsize=8, color='w', pad=1, grid_color='w')
 	ax.tick_params(axis='x', pad=-2)
 	ax.tick_params(axis='y', labelrotation=-40, pad=-2)
-# 	ax.zaxis._axinfo['juggled'] = (1, 2, 0)
+#	ax.zaxis._axinfo['juggled'] = (1, 2, 0)
 	ax.set_xlabel(ax.get_xlabel(), fontsize=10, labelpad=-3)
 	ax.set_ylabel(ax.get_ylabel(), fontsize=10, labelpad=-2, rotation=50)
 	ax.set_zlabel(ax.get_zlabel(), fontsize=10, labelpad=-2)
@@ -240,16 +242,99 @@ def set_axis_style(ax):
 	return
 
 
+def dichotomous_permutation(arr, layer=0):
+	import math
+
+# 	def seperate_arr(arr, new_arr):
+# 		if (length % 2) == 0:
+# 			half = int(length / 2)
+# 			new_arr += [arr[half - 1], arr[half]]
+# 			subarr1 = [arr[i] for i in range(1, half - 1)]
+# 		else:
+# 			half = math.floor(length / 2)
+# 			new_arr.append(arr[half])
+# 			subarr1 = [arr[i] for i in range(1, half)]
+# 		subarr2 = [arr[i] for i in range(half + 1, length - 1)]
+# 		subarrs = [subarr1, subarr2]
+# 		return subarrs
+
+
+	if layer == 0:
+		length = len(arr)
+		if length <= 2:
+			return arr
+
+		new_arr = [arr[0], arr[-1]]
+		if (length % 2) == 0:
+ 			half = int(length / 2)
+ 			new_arr += [arr[half - 1], arr[half]]
+ 			subarr1 = [arr[i] for i in range(1, half - 1)]
+		else:
+ 			half = math.floor(length / 2)
+ 			new_arr.append(arr[half])
+ 			subarr1 = [arr[i] for i in range(1, half)]
+		subarr2 = [arr[i] for i in range(half + 1, length - 1)]
+		subarrs = [subarr1, subarr2]
+# 		subarrs = seperate_arr(arr, new_arr)
+		new_arr += dichotomous_permutation(subarrs, layer=layer+1)
+
+	else:
+		new_arr = []
+		subarrs = []
+		for a in arr:
+			length = len(a)
+			if length <= 2:
+				new_arr += a
+			else:
+# 				subarrs += seperate_arr(a, new_arr)
+				if (length % 2) == 0:
+ 					half = int(length / 2)
+ 					new_arr += [a[half - 1], a[half]]
+ 					subarr1 = [a[i] for i in range(0, half - 1)]
+				else:
+ 					half = math.floor(length / 2)
+ 					new_arr.append(a[half])
+ 					subarr1 = [a[i] for i in range(0, half)]
+				subarr2 = [a[i] for i in range(half + 1, length)]
+				subarrs += [subarr1, subarr2]
+
+		if len(subarrs) > 0:
+			new_arr += dichotomous_permutation(subarrs, layer=layer+1)
+
+	return new_arr
+
+# 	length = len(arr)
+# 	if length <= 2:
+# 		return arr
+
+# 	new_arr = [arr[0], arr[-1]]
+# 	if (length % 2) == 0:
+# 		half = int(length / 2)
+# 		new_arr += [arr[half - 1], arr[half]]
+# 		subarr1 = [arr[i] for i in range(1, half - 1)]
+# 	else:
+# 		half = math.floor(length / 2)
+# 		new_arr.append(arr[half])
+# 		subarr1 = [arr[i] for i in range(1, half)]
+# 	subarr2 = [arr[i] for i in range(half + 1, length - 1)]
+# 	if len(subarr1) > 0:
+# 		new_arr += dichotomous_permutation(subarr1)
+# 	if len(subarr2) > 0:
+# 		new_arr += dichotomous_permutation(subarr2)
+
+# 	return new_arr
+
+
 if __name__ == '__main__':
 	root_dir = 'outputs/CRIANN/'
-# 	for dir_ in sorted(os.listdir(root_dir)):
-# 		if os.path.isdir(root_dir):
-# 			full_dir = os.path.join(root_dir, dir_)
-# 			print('---', full_dir,':')
-# 			save_dir = os.path.join(full_dir, 'groups/')
-# 			if os.path.exists(save_dir):
-# 				try:
-# 					get_relative_errors(save_dir)
-# 				except Exception as exp:
-# 					print('An exception occured when running this experiment:')
-# 					print(repr(exp))
\ No newline at end of file
+#	for dir_ in sorted(os.listdir(root_dir)):
+#		if os.path.isdir(root_dir):
+#			full_dir = os.path.join(root_dir, dir_)
+#			print('---', full_dir,':')
+#			save_dir = os.path.join(full_dir, 'groups/')
+#			if os.path.exists(save_dir):
+#				try:
+#					get_relative_errors(save_dir)
+#				except Exception as exp:
+#					print('An exception occured when running this experiment:')
+#					print(repr(exp))
\ No newline at end of file