From d24cdec251d81dc6de0ea745c7b22a878a8e2b76 Mon Sep 17 00:00:00 2001
From: jajupmochi <jajupmochi@gmail.com>
Date: Mon, 2 Nov 2020 16:58:27 +0100
Subject: [PATCH] Update exps: ged stability.

---
 ...is_stability.ratios.real_data.relative_error.py | 312 +++++++++++++++++++++
 .../edit_costs.max_num_sols.ratios.bipartite.py    | 130 +++++++++
 .../stability/edit_costs.nums_sols.ratios.IPFP.py  | 101 ++++---
 .../stability/edit_costs.repeats.ratios.IPFP.py    | 125 +++++++++
 .../edit_costs.repeats.ratios.bipartite.py         | 130 +++++++++
 gklearn/experiments/ged/stability/group_results.py | 108 +++++++
 gklearn/experiments/ged/stability/utils.py         |  30 ++
 7 files changed, 893 insertions(+), 43 deletions(-)
 create mode 100644 gklearn/experiments/ged/stability/Analysis_stability.ratios.real_data.relative_error.py
 create mode 100644 gklearn/experiments/ged/stability/edit_costs.max_num_sols.ratios.bipartite.py
 create mode 100644 gklearn/experiments/ged/stability/edit_costs.repeats.ratios.IPFP.py
 create mode 100644 gklearn/experiments/ged/stability/edit_costs.repeats.ratios.bipartite.py
 create mode 100644 gklearn/experiments/ged/stability/group_results.py
 create mode 100644 gklearn/experiments/ged/stability/utils.py

diff --git a/gklearn/experiments/ged/stability/Analysis_stability.ratios.real_data.relative_error.py b/gklearn/experiments/ged/stability/Analysis_stability.ratios.real_data.relative_error.py
new file mode 100644
index 0000000..a618626
--- /dev/null
+++ b/gklearn/experiments/ged/stability/Analysis_stability.ratios.real_data.relative_error.py
@@ -0,0 +1,312 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+"""
+Created on Tue Nov  6 15:35:32 2018
+
+@author: ljia
+"""
+
+#import numpy as np
+import matplotlib.pyplot as plt
+import numpy as np
+import matplotlib.gridspec as gridspec
+# import pickle
+import os
+import sys
+from tqdm import tqdm
+# from mpl_toolkits.mplot3d import Axes3D
+
+
+root_dir = '/media/ljia/DATA/research-repo/codes/Linlin/graphkit-learn/gklearn/experiments/ged/stability/outputs/'
+
+root_dir_criann = '/media/ljia/DATA/research-repo/codes/Linlin/graphkit-learn/gklearn/experiments/ged/stability/outputs/CRIANN/'
+
+Dataset_List = ['MAO', 'Monoterpenoides', 'MUTAG', 'AIDS_symb']
+
+Legend_Labels = ['common walk', 'marginalized', 'Sylvester equation', 'conjugate gradient', 'fixed-point iterations', 'Spectral decomposition', 'shortest path', 'structural sp', 'path up to length $h$', 'treelet', 'WL subtree']
+
+# Colors = ['#084594', '#2171b5', '#4292c6', '#6baed6', '#9ecae1', '#c6dbef', 
+#               '#54278f', '#756bb1', '#9e9ac8', '#de2d26', '#fc9272']
+Colors=[
+    '#1f77b4', '#aec7e8', '#ff7f0e', '#ffbb78', '#2ca02c', '#98df8a',
+    '#d62728', '#ff9896', '#9467bd', '#c5b0d5', '#8c564b', '#c49c94',
+    '#e377c2', '#f7b6d2', '#7f7f7f', '#c7c7c7', '#bcbd22', '#dbdb8d',
+    '#17becf', '#9edae5']
+
+SMALL_SIZE = 8
+MEDIUM_SIZE = 10
+BIGGER_SIZE = 12
+
+
+def read_trials_group(save_dir, ds_name, num_sols, ratio, label):
+	file_name = save_dir + 'groups/ged_mats.' + ds_name + '.' + label + '_' + str(num_sols) + '.ratio_' + "{:.2f}".format(ratio) + '.npy'
+	if os.path.isfile(file_name):
+		with open(file_name, 'rb') as f:
+			ged_mats = np.load(f)
+			return ged_mats
+	else:
+		return []
+			
+#	ged_mats = []
+#	for trial in range(1, 101):
+#		file_name = file_prefix + '.trial_' + str(trial) + '.pkl'
+#		if os.path.isfile(file_name):
+#			ged_matrix = pickle.load(open(file_name, 'rb'))
+#			ged_mats.append(ged_matrix)
+#		else:
+# #			print(trial)
+#			pass	
+		
+		
+# Check average relative error along elements in two ged matrices.
+def matrices_ave_relative_error(m1, m2):
+    error = 0
+    base = 0
+    for i in range(m1.shape[0]):
+        for j in range(m1.shape[1]):
+            error += np.abs(m1[i, j] - m2[i, j])
+            base += (np.abs(m1[i, j]) + np.abs(m2[i, j])) / 2
+                
+    return error / base
+
+
+def compute_relative_error(ged_mats):
+		
+	if len(ged_mats) != 0:
+		# get the smallest "correct" GED matrix.
+		ged_mat_s = np.ones(ged_mats[0].shape) * np.inf
+		for i in range(ged_mats[0].shape[0]):
+			for j in range(ged_mats[0].shape[1]):
+				ged_mat_s[i, j] = np.min([mat[i, j] for mat in ged_mats])
+		
+		# compute average error.
+		errors = []
+		for i, mat in enumerate(ged_mats):
+			err = matrices_ave_relative_error(mat, ged_mat_s)
+	    #             if not per_correct:
+	    #                 print('matrix # ', str(i))
+	    #                 pass
+			errors.append(err)
+	else:
+		errors = [0]
+		
+	return np.mean(errors)
+		
+			 
+
+
+#plt.rc('font', size=SMALL_SIZE)          # controls default text sizes
+plt.rc('axes', titlesize=15)     # fontsize of the axes title
+plt.rc('axes', labelsize=15)    # fontsize of the x and y labels
+plt.rc('xtick', labelsize=15)    # fontsize of the tick labels
+plt.rc('ytick', labelsize=15)    # fontsize of the tick labels
+plt.rc('legend', fontsize=15)    # legend fontsize
+plt.rc('figure', titlesize=15)  # fontsize of the figure title
+
+#fig, _ = plt.subplots(2, 2, figsize=(13, 12))
+#ax1 = plt.subplot(221)
+#ax2 = plt.subplot(222)
+#ax3 = plt.subplot(223)
+#ax4 = plt.subplot(224)
+gs = gridspec.GridSpec(2, 2)
+gs.update(hspace=0.3)
+fig = plt.figure(figsize=(11, 12))
+ax = fig.add_subplot(111)    # The big subplot for common labels
+ax1 = fig.add_subplot(gs[0, 0], projection='3d')
+ax2 = fig.add_subplot(gs[0, 1], projection='3d')
+ax3 = fig.add_subplot(gs[1, 0], projection='3d')
+ax4 = fig.add_subplot(gs[1, 1], projection='3d')
+# ax5 = fig.add_subplot(gs[2, 0])
+# ax6 = fig.add_subplot(gs[2, 1])
+
+# Turn off axis lines and ticks of the big subplot
+ax.spines['top'].set_color('none')
+ax.spines['bottom'].set_color('none')
+ax.spines['left'].set_color('none')
+ax.spines['right'].set_color('none')
+ax.tick_params(labelcolor='w', top='off', bottom='off', left='off', right='off')
+ax.xaxis.set_ticks_position('none')
+ax.yaxis.set_ticks_position('none')
+# Set common labels
+#ax.set_xlabel('accuracy(%)')
+ax.yaxis.set_label_coords(-0.105, 0.5)
+# ax.set_ylabel('runtime($s$)')
+
+
+# -------------- num_sols, IPFP --------------
+def get_num_sol_results():
+	save_dir = root_dir_criann + 'edit_costs.num_sols.ratios.IPFP/'
+	errors = {}
+	print('-------- num_sols, IPFP --------')
+	for ds_name in Dataset_List:
+		print(ds_name)
+		errors[ds_name] = []
+		for num_sols in [1, 10, 20, 30, 40, 50, 60, 70, 80, 90, 100]:
+			errors[ds_name].append([])
+			for ratio in tqdm([0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10], desc='num_sols = ' + str(num_sols), file=sys.stdout):
+				ged_mats = read_trials_group(save_dir, ds_name, num_sols, ratio, 'num_sols')
+				error = compute_relative_error(ged_mats)
+				errors[ds_name][-1].append(error)
+
+	return errors
+	
+x_values = [1, 10, 20, 30, 40, 50, 60, 70, 80, 90, 100]
+y_values = range(0, 19)
+X, Y = np.meshgrid(x_values, y_values)
+errors = get_num_sol_results()
+for i, ds_name in enumerate(Dataset_List):
+	if ds_name in errors:
+		z_values = np.array(errors[ds_name])
+		ax1.plot_wireframe(X, Y, z_values.T, label=Dataset_List[i], color=Colors[i]) #, '.-', label=Legend_Labels[i], color=Colors[i])
+
+# ax1.set_yscale('squareroot')
+# ax1.grid(axis='y')
+ax1.set_xlabel('# of solutions')
+ax1.set_ylabel('ratios')
+ax1.set_zlabel('average relative errors (%)')
+ax1.set_title('(a) num_sols, IPFP')
+ax1.set_yticks(range(0, 19, 2))
+ax1.set_yticklabels([0.1, 0.3, 0.5, 0.7, 0.9, 2, 4, 6, 8, 10])
+# ax1.set_axisbelow(True)
+# ax1.spines['top'].set_visible(False)
+# ax1.spines['bottom'].set_visible(False)
+# ax1.spines['right'].set_visible(False)
+# ax1.spines['left'].set_visible(False)
+# ax1.xaxis.set_ticks_position('none')
+# ax1.yaxis.set_ticks_position('none')
+# ax1.set_ylim(bottom=-1000)
+handles, labels = ax1.get_legend_handles_labels()
+
+
+
+# # -------------- repeats, IPFP --------------
+def get_repeats_results():
+	save_dir = root_dir_criann + 'edit_costs.repeats.ratios.IPFP/'
+	errors = {}
+	print('-------- repeats, IPFP --------')
+	for ds_name in Dataset_List:
+		print(ds_name)
+		errors[ds_name] = []
+		for num_sols in [1, 10, 20, 30, 40, 50, 60, 70, 80, 90, 100]:
+			errors[ds_name].append([])
+			for ratio in tqdm([0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10], desc='num_sols = ' + str(num_sols), file=sys.stdout):
+				ged_mats = read_trials_group(save_dir, ds_name, num_sols, ratio, 'repeats')
+				error = compute_relative_error(ged_mats)
+				errors[ds_name][-1].append(error)
+
+	return errors
+	
+x_values = [1, 10, 20, 30, 40, 50, 60, 70, 80, 90, 100]
+y_values = range(0, 19)
+X, Y = np.meshgrid(x_values, y_values)
+errors = get_repeats_results()
+for i, ds_name in enumerate(Dataset_List):
+	if ds_name in errors:
+		z_values = np.array(errors[ds_name])
+		ax2.plot_wireframe(X, Y, z_values.T, label=Dataset_List[i], color=Colors[i]) #, '.-', label=Legend_Labels[i], color=Colors[i])
+
+# ax2.set_yscale('squareroot')
+# ax2.grid(axis='y')
+ax2.set_xlabel('# of solutions')
+ax2.set_ylabel('ratios')
+ax2.set_zlabel('average relative errors (%)')
+ax2.set_title('(b) repeats, IPFP')
+ax2.set_yticks(range(0, 19, 2))
+ax2.set_yticklabels([0.1, 0.3, 0.5, 0.7, 0.9, 2, 4, 6, 8, 10])
+# ax2.set_axisbelow(True)
+# ax2.spines['top'].set_visible(False)
+# ax2.spines['bottom'].set_visible(False)
+# ax2.spines['right'].set_visible(False)
+# ax2.spines['left'].set_visible(False)
+# ax2.xaxis.set_ticks_position('none')
+# ax2.yaxis.set_ticks_position('none')
+# ax2.set_ylim(bottom=-1000)
+handles, labels = ax2.get_legend_handles_labels()
+
+
+# # -------------- degrees --------------
+# def get_degree_results():
+#	save_dir = root_dir_criann + '28 cores/synthesized_graphs_degrees/'
+#	run_times = {}
+#	for kernel_name in Graph_Kernel_List:
+#		run_times[kernel_name] = []
+#		for num in [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]:
+#			file_name = save_dir + 'run_time.' + kernel_name + '.' + str(num) + '.pkl'
+#			if os.path.isfile(file_name):
+#				run_time = pickle.load(open(file_name, 'rb'))
+#			else:
+#				run_time = 0
+#			run_times[kernel_name].append(run_time)
+#	return run_times
+
+# x_labels = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
+# run_times = get_degree_results()
+# for i, kernel_name in enumerate(Graph_Kernel_List):
+#	if kernel_name in run_times:
+#		ax3.plot(x_labels, run_times[kernel_name], '.-', label=Legend_Labels[i], color=Colors[i])
+
+# ax3.set_yscale('log', nonposy='clip')
+# ax3.grid(axis='y')
+# ax3.set_xlabel('degrees')
+# ax3.set_ylabel('runtime($s$)')
+# #ax3.set_ylabel('runtime($s$) per pair of graphs')
+# ax3.set_title('(c) degrees')
+# ax3.set_axisbelow(True)
+# ax3.spines['top'].set_visible(False)
+# ax3.spines['bottom'].set_visible(False)
+# ax3.spines['right'].set_visible(False)
+# ax3.spines['left'].set_visible(False)
+# ax3.xaxis.set_ticks_position('none')
+# ax3.yaxis.set_ticks_position('none')
+
+
+# # -------------- Node labels --------------
+# def get_node_label_results():
+#	save_dir = root_dir_criann + '28 cores/synthesized_graphs_num_node_label_alphabet/'
+#	run_times = {}
+#	for kernel_name in Graph_Kernel_List_VSym:
+#		run_times[kernel_name] = []
+#		for num in [0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20]:
+#			file_name = save_dir + 'run_time.' + kernel_name + '.' + str(num) + '.pkl'
+#			if os.path.isfile(file_name):
+#				run_time = pickle.load(open(file_name, 'rb'))
+#			else:
+#				run_time = 0
+#			run_times[kernel_name].append(run_time)
+#	return run_times
+
+# #	save_dir = root_dir_criann + 'synthesized_graphs_num_node_label_alphabet/'
+# #	run_times = pickle.load(open(save_dir + 'run_times.pkl', 'rb'))
+# #	return run_times
+
+# x_labels = [0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20]
+# run_times = get_node_label_results()
+# for i, kernel_name in enumerate(Graph_Kernel_List):
+#	if kernel_name in run_times:
+#		ax4.plot(x_labels[1:], run_times[kernel_name][1:], '.-', label=Legend_Labels[i], color=Colors[i])
+
+# ax4.set_yscale('log', nonposy='clip')
+# ax4.grid(axis='y')
+# ax4.set_xlabel('# of alphabets')
+# ax4.set_ylabel('runtime($s$)')
+# #ax4.set_ylabel('runtime($s$) per pair of graphs')
+# ax4.set_title('(d) alphabet size of vertex labels')
+# ax4.set_axisbelow(True)
+# ax4.spines['top'].set_visible(False)
+# ax4.spines['bottom'].set_visible(False)
+# ax4.spines['right'].set_visible(False)
+# ax4.spines['left'].set_visible(False)
+# ax4.xaxis.set_ticks_position('none')
+# ax4.yaxis.set_ticks_position('none')
+
+
+from matplotlib.lines import Line2D
+custom_lines = []
+for color in Colors:
+	custom_lines.append(Line2D([0], [0], color=color, lw=4))
+
+fig.subplots_adjust(bottom=0.135)
+fig.legend(custom_lines, labels, loc='lower center', ncol=4, frameon=False) # , ncol=5, labelspacing=0.1, handletextpad=0.4, columnspacing=0.6)
+plt.savefig('stability.real_data.relative_error.eps', format='eps', dpi=300, transparent=True,
+            bbox_inches='tight')
+plt.show()
\ No newline at end of file
diff --git a/gklearn/experiments/ged/stability/edit_costs.max_num_sols.ratios.bipartite.py b/gklearn/experiments/ged/stability/edit_costs.max_num_sols.ratios.bipartite.py
new file mode 100644
index 0000000..d05558a
--- /dev/null
+++ b/gklearn/experiments/ged/stability/edit_costs.max_num_sols.ratios.bipartite.py
@@ -0,0 +1,130 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+"""
+Created on Mon Nov  2 16:17:01 2020
+
+@author: ljia
+"""	
+# This script tests the influence of the ratios between node costs and edge costs on the stability of the GED computation, where the base edit costs are [1, 1, 1, 1, 1, 1]. The minimum solution from given numbers of repeats are computed.
+
+import os
+import multiprocessing
+import pickle
+import logging
+from gklearn.ged.util import compute_geds
+import numpy as np
+import time
+from utils import get_dataset
+import sys
+
+
+def xp_compute_ged_matrix(dataset, ds_name, max_num_solutions, ratio, trial):
+		
+	save_file_suffix = '.' + ds_name + '.mnum_sols_' + str(max_num_solutions) + '.ratio_' + "{:.2f}".format(ratio) + '.trial_' + str(trial)
+	
+	"""**1.   Get dataset.**"""
+	dataset = get_dataset(ds_name)
+
+	"""**2.  Set parameters.**"""
+
+	# Parameters for GED computation.
+	ged_options = {'method': 'BIPARTITE',  # use BIPARTITE huristic.
+  				   # 'initialization_method': 'RANDOM',  # or 'NODE', etc. (for GEDEnv)
+				   'lsape_model': 'ECBP',  # 
+				   # ??when bigger than 1, then the method is considered mIPFP.
+				   # the actual number of computed solutions might be smaller than the specified value 
+				   'max_num_solutions': max_num_solutions,
+				   'edit_cost': 'CONSTANT',  # use CONSTANT cost.
+				   'greedy_method': 'BASIC',  # 
+				   # the distance between non-symbolic node/edge labels is computed by euclidean distance.
+				   'attr_distance': 'euclidean',
+				   'optimal': True, # if TRUE, the option --greedy-method has no effect 
+				   # parallel threads. Do not work if mpg_options['parallel'] = False.
+				   'threads': multiprocessing.cpu_count(),
+				   'centrality_method': 'NONE',
+				   'centrality_weight': 0.7,
+				   'init_option': 'EAGER_WITHOUT_SHUFFLED_COPIES'
+				   }
+	
+	edit_cost_constants = [i * ratio for i in [1, 1, 1]] + [1, 1, 1]
+# 	edit_cost_constants = [item * 0.01 for item in edit_cost_constants]
+# 	pickle.dump(edit_cost_constants, open(save_dir + "edit_costs" + save_file_suffix + ".pkl", "wb"))
+
+	options = ged_options.copy()
+	options['edit_cost_constants'] = edit_cost_constants
+	options['node_labels'] = dataset.node_labels
+	options['edge_labels'] = dataset.edge_labels
+	options['node_attrs'] = dataset.node_attrs
+	options['edge_attrs'] = dataset.edge_attrs
+	parallel = True # if num_solutions == 1 else False
+	
+	"""**5.   Compute GED matrix.**"""
+	ged_mat = 'error'
+	runtime = 0
+	try:
+		time0 = time.time()
+		ged_vec_init, ged_mat, n_edit_operations = compute_geds(dataset.graphs, options=options, repeats=1, parallel=parallel, verbose=True)
+		runtime = time.time() - time0
+	except Exception as exp:
+		print('An exception occured when running this experiment:')
+		LOG_FILENAME = save_dir + 'error.txt'
+		logging.basicConfig(filename=LOG_FILENAME, level=logging.DEBUG)
+		logging.exception(save_file_suffix)
+		print(repr(exp))
+					
+	"""**6. Get results.**"""
+	
+	with open(save_dir + 'ged_matrix' + save_file_suffix + '.pkl', 'wb') as f:
+		pickle.dump(ged_mat, f)
+	with open(save_dir + 'runtime' + save_file_suffix + '.pkl', 'wb') as f:
+		pickle.dump(runtime, f)
+
+	return ged_mat, runtime
+
+	
+def save_trials_as_group(dataset, ds_name, max_num_solutions, ratio):
+	ged_mats = []
+	runtimes = []
+	for trial in range(1, 101):
+		print()
+		print('Trial:', trial)
+		ged_mat, runtime = xp_compute_ged_matrix(dataset, ds_name, max_num_solutions, ratio, trial)
+		ged_mats.append(ged_mat)
+		runtimes.append(runtime)
+		
+	save_file_suffix = '.' + ds_name + '.mnum_sols_' + str(max_num_solutions) + '.ratio_' + "{:.2f}".format(ratio)
+	with open(save_dir + 'groups/ged_mats' + save_file_suffix + '.npy', 'wb') as f:
+		np.save(f, np.array(ged_mats))
+	with open(save_dir + 'groups/runtimes' + save_file_suffix + '.pkl', 'wb') as f:
+		pickle.dump(runtime, f)
+	
+	
+def results_for_a_dataset(ds_name):
+	"""**1.   Get dataset.**"""
+	dataset = get_dataset(ds_name)
+	
+	for max_num_solutions in [1, 20, 40, 60, 80, 100]:
+		print()
+		print('Max # of solutions:', max_num_solutions)
+		for ratio in [0.1, 0.3, 0.5, 0.7, 0.9, 1, 3, 5, 7, 9]:
+			print()
+			print('Ratio:', ratio)
+			save_trials_as_group(dataset, ds_name, max_num_solutions, ratio)
+		
+
+if __name__ == '__main__':
+	if len(sys.argv) > 1:
+		ds_name_list = sys.argv[1:]
+	else:
+		ds_name_list = ['MAO', 'Monoterpenoides', 'MUTAG', 'AIDS_symb']
+		
+	save_dir = 'outputs/edit_costs.max_num_sols.ratios.bipartite/'
+	if not os.path.exists(save_dir):
+		os.makedirs(save_dir)
+	if not os.path.exists(save_dir + 'groups/'):
+		os.makedirs(save_dir + 'groups/')
+		
+	for ds_name in ds_name_list:
+		print()
+		print('Dataset:', ds_name)
+		results_for_a_dataset(ds_name)
\ No newline at end of file
diff --git a/gklearn/experiments/ged/stability/edit_costs.nums_sols.ratios.IPFP.py b/gklearn/experiments/ged/stability/edit_costs.nums_sols.ratios.IPFP.py
index ed7eb2d..4a3c0da 100644
--- a/gklearn/experiments/ged/stability/edit_costs.nums_sols.ratios.IPFP.py
+++ b/gklearn/experiments/ged/stability/edit_costs.nums_sols.ratios.IPFP.py
@@ -11,41 +11,16 @@ import os
 import multiprocessing
 import pickle
 import logging
-from gklearn.utils import Dataset
 from gklearn.ged.util import compute_geds
+import numpy as np
+import time
+from utils import get_dataset
+import sys
 
 
-def get_dataset(ds_name):
-	# The node/edge labels that will not be used in the computation.
-	if ds_name == 'MAO':
-		irrelevant_labels = {'node_attrs': ['x', 'y', 'z'], 'edge_labels': ['bond_stereo']}
-	elif ds_name == 'Monoterpenoides':
-		irrelevant_labels = {'edge_labels': ['valence']}
-	elif ds_name == 'MUTAG':
-		irrelevant_labels = {'edge_labels': ['label_0']}
-	elif ds_name == 'AIDS_symb':
-		irrelevant_labels = {'node_attrs': ['chem', 'charge', 'x', 'y'], 'edge_labels': ['valence']}
+def xp_compute_ged_matrix(dataset, ds_name, num_solutions, ratio, trial):
 
-	# Initialize a Dataset.
-	dataset = Dataset()
-	# Load predefined dataset.
-	dataset.load_predefined_dataset(ds_name)
-	# Remove irrelevant labels.
-	dataset.remove_labels(**irrelevant_labels)
-	print('dataset size:', len(dataset.graphs))
-	return dataset
-
-
-def xp_compute_ged_matrix(ds_name, num_solutions, ratio, trial):
-
-	save_dir = 'outputs/edit_costs.num_sols.ratios.IPFP/'
-	if not os.path.exists(save_dir):
-		os.makedirs(save_dir)
-		
 	save_file_suffix = '.' + ds_name + '.num_sols_' + str(num_solutions) + '.ratio_' + "{:.2f}".format(ratio) + '.trial_' + str(trial)
-	
-	"""**1.   Get dataset.**"""
-	dataset = get_dataset(ds_name)
 
 	"""**2.  Set parameters.**"""
 
@@ -77,31 +52,71 @@ def xp_compute_ged_matrix(ds_name, num_solutions, ratio, trial):
 	
 	"""**5.   Compute GED matrix.**"""
 	ged_mat = 'error'
+	runtime = 0
 	try:
+		time0 = time.time()
 		ged_vec_init, ged_mat, n_edit_operations = compute_geds(dataset.graphs, options=options, parallel=parallel, verbose=True)
+		runtime = time.time() - time0
 	except Exception as exp:
 		print('An exception occured when running this experiment:')
 		LOG_FILENAME = save_dir + 'error.txt'
 		logging.basicConfig(filename=LOG_FILENAME, level=logging.DEBUG)
-		logging.exception('save_file_suffix')
+		logging.exception(save_file_suffix)
 		print(repr(exp))
 					
 	"""**6. Get results.**"""
 	
-	pickle.dump(ged_mat, open(save_dir + 'ged_matrix' + save_file_suffix + '.pkl', 'wb'))
+	with open(save_dir + 'ged_matrix' + save_file_suffix + '.pkl', 'wb') as f:
+		pickle.dump(ged_mat, f)
+	with open(save_dir + 'runtime' + save_file_suffix + '.pkl', 'wb') as f:
+		pickle.dump(runtime, f)
+	
+	return ged_mat, runtime
+	
+	
+def save_trials_as_group(dataset, ds_name, num_solutions, ratio):
+	ged_mats = []
+	runtimes = []
+	for trial in range(1, 101):
+		print()
+		print('Trial:', trial)
+		ged_mat, runtime = xp_compute_ged_matrix(dataset, ds_name, num_solutions, ratio, trial)
+		ged_mats.append(ged_mat)
+		runtimes.append(runtime)
 		
+	save_file_suffix = '.' + ds_name + '.num_sols_' + str(num_solutions) + '.ratio_' + "{:.2f}".format(ratio)
+	with open(save_dir + 'groups/ged_mats' + save_file_suffix + '.npy', 'wb') as f:
+		np.save(f, np.array(ged_mats))
+	with open(save_dir + 'groups/runtimes' + save_file_suffix + '.pkl', 'wb') as f:
+		pickle.dump(runtime, f)
+		
+		
+def results_for_a_dataset(ds_name):
+	"""**1.   Get dataset.**"""
+	dataset = get_dataset(ds_name)
+	
+	for num_solutions in [1, 20, 40, 60, 80, 100]:
+		print()
+		print('# of solutions:', num_solutions)
+		for ratio in [0.1, 0.3, 0.5, 0.7, 0.9, 1, 3, 5, 7, 9]:
+			print()
+			print('Ratio:', ratio)
+			save_trials_as_group(dataset, ds_name, num_solutions, ratio)
+				
 
 if __name__ == '__main__':
-	for ds_name in ['MAO', 'Monoterpenoides', 'MUTAG', 'AIDS_symb']:
+	if len(sys.argv) > 1:
+		ds_name_list = sys.argv[1:]
+	else:
+		ds_name_list = ['MAO', 'Monoterpenoides', 'MUTAG', 'AIDS_symb']
+		
+	save_dir = 'outputs/edit_costs.num_sols.ratios.IPFP/'
+	if not os.path.exists(save_dir):
+		os.makedirs(save_dir)
+	if not os.path.exists(save_dir + 'groups/'):
+		os.makedirs(save_dir + 'groups/')
+		
+	for ds_name in ds_name_list:
 		print()
 		print('Dataset:', ds_name)
-		for num_solutions in [1, 10, 20, 30, 40, 50, 60, 70, 80, 90, 100]:
-			print()
-			print('# of solutions:', num_solutions)
-			for ratio in [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10]:
-				print()
-				print('Ratio:', ratio)
-				for trial in range(1, 101):
-					print()
-					print('Trial:', trial)
-					xp_compute_ged_matrix(ds_name, num_solutions, ratio, trial)
\ No newline at end of file
+		results_for_a_dataset(ds_name)
diff --git a/gklearn/experiments/ged/stability/edit_costs.repeats.ratios.IPFP.py b/gklearn/experiments/ged/stability/edit_costs.repeats.ratios.IPFP.py
new file mode 100644
index 0000000..5b4576b
--- /dev/null
+++ b/gklearn/experiments/ged/stability/edit_costs.repeats.ratios.IPFP.py
@@ -0,0 +1,125 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+"""
+Created on Wed Oct  20 17:48:02 2020
+
+@author: ljia
+"""	
+# This script tests the influence of the ratios between node costs and edge costs on the stability of the GED computation, where the base edit costs are [1, 1, 1, 1, 1, 1]. The minimum solution from given numbers of repeats are computed.
+
+import os
+import multiprocessing
+import pickle
+import logging
+from gklearn.ged.util import compute_geds
+import numpy as np
+import time
+from utils import get_dataset
+import sys
+
+
+def xp_compute_ged_matrix(dataset, ds_name, repeats, ratio, trial):
+		
+	save_file_suffix = '.' + ds_name + '.repeats_' + str(repeats) + '.ratio_' + "{:.2f}".format(ratio) + '.trial_' + str(trial)
+	
+	"""**1.   Get dataset.**"""
+	dataset = get_dataset(ds_name)
+
+	"""**2.  Set parameters.**"""
+
+	# Parameters for GED computation.
+	ged_options = {'method': 'IPFP',  # use IPFP huristic.
+				   'initialization_method': 'RANDOM',  # or 'NODE', etc.
+				   # when bigger than 1, then the method is considered mIPFP.
+				   'initial_solutions': 1,
+				   'edit_cost': 'CONSTANT',  # use CONSTANT cost.
+				   # the distance between non-symbolic node/edge labels is computed by euclidean distance.
+				   'attr_distance': 'euclidean',
+				   'ratio_runs_from_initial_solutions': 1,
+				   # parallel threads. Do not work if mpg_options['parallel'] = False.
+				   'threads': multiprocessing.cpu_count(),
+				   'init_option': 'EAGER_WITHOUT_SHUFFLED_COPIES'
+				   }
+	
+	edit_cost_constants = [i * ratio for i in [1, 1, 1]] + [1, 1, 1]
+# 	edit_cost_constants = [item * 0.01 for item in edit_cost_constants]
+# 	pickle.dump(edit_cost_constants, open(save_dir + "edit_costs" + save_file_suffix + ".pkl", "wb"))
+
+	options = ged_options.copy()
+	options['edit_cost_constants'] = edit_cost_constants
+	options['node_labels'] = dataset.node_labels
+	options['edge_labels'] = dataset.edge_labels
+	options['node_attrs'] = dataset.node_attrs
+	options['edge_attrs'] = dataset.edge_attrs
+	parallel = True # if num_solutions == 1 else False
+	
+	"""**5.   Compute GED matrix.**"""
+	ged_mat = 'error'
+	runtime = 0
+	try:
+		time0 = time.time()
+		ged_vec_init, ged_mat, n_edit_operations = compute_geds(dataset.graphs, options=options, repeats=repeats, parallel=parallel, verbose=True)
+		runtime = time.time() - time0
+	except Exception as exp:
+		print('An exception occured when running this experiment:')
+		LOG_FILENAME = save_dir + 'error.txt'
+		logging.basicConfig(filename=LOG_FILENAME, level=logging.DEBUG)
+		logging.exception(save_file_suffix)
+		print(repr(exp))
+					
+	"""**6. Get results.**"""
+	
+	with open(save_dir + 'ged_matrix' + save_file_suffix + '.pkl', 'wb') as f:
+		pickle.dump(ged_mat, f)
+	with open(save_dir + 'runtime' + save_file_suffix + '.pkl', 'wb') as f:
+		pickle.dump(runtime, f)
+
+	return ged_mat, runtime
+
+	
+def save_trials_as_group(dataset, ds_name, repeats, ratio):
+	ged_mats = []
+	runtimes = []
+	for trial in range(1, 101):
+		print()
+		print('Trial:', trial)
+		ged_mat, runtime = xp_compute_ged_matrix(dataset, ds_name, repeats, ratio, trial)
+		ged_mats.append(ged_mat)
+		runtimes.append(runtime)
+		
+	save_file_suffix = '.' + ds_name + '.repeats_' + str(repeats) + '.ratio_' + "{:.2f}".format(ratio)
+	with open(save_dir + 'groups/ged_mats' + save_file_suffix + '.npy', 'wb') as f:
+		np.save(f, np.array(ged_mats))
+	with open(save_dir + 'groups/runtimes' + save_file_suffix + '.pkl', 'wb') as f:
+		pickle.dump(runtime, f)
+	
+	
+def results_for_a_dataset(ds_name):
+	"""**1.   Get dataset.**"""
+	dataset = get_dataset(ds_name)
+	
+	for repeats in [1, 20, 40, 60, 80, 100]:
+		print()
+		print('Repeats:', repeats)
+		for ratio in [0.1, 0.3, 0.5, 0.7, 0.9, 1, 3, 5, 7, 9]:
+			print()
+			print('Ratio:', ratio)
+			save_trials_as_group(dataset, ds_name, repeats, ratio)
+		
+
+if __name__ == '__main__':
+	if len(sys.argv) > 1:
+		ds_name_list = sys.argv[1:]
+	else:
+		ds_name_list = ['MAO', 'Monoterpenoides', 'MUTAG', 'AIDS_symb']
+		
+	save_dir = 'outputs/edit_costs.repeats.ratios.IPFP/'
+	if not os.path.exists(save_dir):
+		os.makedirs(save_dir)
+	if not os.path.exists(save_dir + 'groups/'):
+		os.makedirs(save_dir + 'groups/')
+		
+	for ds_name in ds_name_list:
+		print()
+		print('Dataset:', ds_name)
+		results_for_a_dataset(ds_name)
diff --git a/gklearn/experiments/ged/stability/edit_costs.repeats.ratios.bipartite.py b/gklearn/experiments/ged/stability/edit_costs.repeats.ratios.bipartite.py
new file mode 100644
index 0000000..f6ecd99
--- /dev/null
+++ b/gklearn/experiments/ged/stability/edit_costs.repeats.ratios.bipartite.py
@@ -0,0 +1,130 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+"""
+Created on Wed Oct  20 17:48:02 2020
+
+@author: ljia
+"""	
+# This script tests the influence of the ratios between node costs and edge costs on the stability of the GED computation, where the base edit costs are [1, 1, 1, 1, 1, 1]. The minimum solution from given numbers of repeats are computed.
+
+import os
+import multiprocessing
+import pickle
+import logging
+from gklearn.ged.util import compute_geds
+import numpy as np
+import time
+from utils import get_dataset
+import sys
+
+
+def xp_compute_ged_matrix(dataset, ds_name, repeats, ratio, trial):
+		
+	save_file_suffix = '.' + ds_name + '.repeats_' + str(repeats) + '.ratio_' + "{:.2f}".format(ratio) + '.trial_' + str(trial)
+	
+	"""**1.   Get dataset.**"""
+	dataset = get_dataset(ds_name)
+
+	"""**2.  Set parameters.**"""
+
+	# Parameters for GED computation.
+	ged_options = {'method': 'BIPARTITE',  # use BIPARTITE huristic.
+  				   # 'initialization_method': 'RANDOM',  # or 'NODE', etc. (for GEDEnv)
+				   'lsape_model': 'ECBP',  # 
+				   # ??when bigger than 1, then the method is considered mIPFP.
+				   # the actual number of computed solutions might be smaller than the specified value 
+				   'max_num_solutions': 1,
+				   'edit_cost': 'CONSTANT',  # use CONSTANT cost.
+				   'greedy_method': 'BASIC',  # 
+				   # the distance between non-symbolic node/edge labels is computed by euclidean distance.
+				   'attr_distance': 'euclidean',
+				   'optimal': True, # if TRUE, the option --greedy-method has no effect 
+				   # parallel threads. Do not work if mpg_options['parallel'] = False.
+				   'threads': multiprocessing.cpu_count(),
+				   'centrality_method': 'NONE',
+				   'centrality_weight': 0.7,
+				   'init_option': 'EAGER_WITHOUT_SHUFFLED_COPIES'
+				   }
+	
+	edit_cost_constants = [i * ratio for i in [1, 1, 1]] + [1, 1, 1]
+# 	edit_cost_constants = [item * 0.01 for item in edit_cost_constants]
+# 	pickle.dump(edit_cost_constants, open(save_dir + "edit_costs" + save_file_suffix + ".pkl", "wb"))
+
+	options = ged_options.copy()
+	options['edit_cost_constants'] = edit_cost_constants
+	options['node_labels'] = dataset.node_labels
+	options['edge_labels'] = dataset.edge_labels
+	options['node_attrs'] = dataset.node_attrs
+	options['edge_attrs'] = dataset.edge_attrs
+	parallel = True # if num_solutions == 1 else False
+	
+	"""**5.   Compute GED matrix.**"""
+	ged_mat = 'error'
+	runtime = 0
+	try:
+		time0 = time.time()
+		ged_vec_init, ged_mat, n_edit_operations = compute_geds(dataset.graphs, options=options, repeats=repeats, parallel=parallel, verbose=True)
+		runtime = time.time() - time0
+	except Exception as exp:
+		print('An exception occured when running this experiment:')
+		LOG_FILENAME = save_dir + 'error.txt'
+		logging.basicConfig(filename=LOG_FILENAME, level=logging.DEBUG)
+		logging.exception(save_file_suffix)
+		print(repr(exp))
+					
+	"""**6. Get results.**"""
+	
+	with open(save_dir + 'ged_matrix' + save_file_suffix + '.pkl', 'wb') as f:
+		pickle.dump(ged_mat, f)
+	with open(save_dir + 'runtime' + save_file_suffix + '.pkl', 'wb') as f:
+		pickle.dump(runtime, f)
+
+	return ged_mat, runtime
+
+	
+def save_trials_as_group(dataset, ds_name, repeats, ratio):
+	ged_mats = []
+	runtimes = []
+	for trial in range(1, 101):
+		print()
+		print('Trial:', trial)
+		ged_mat, runtime = xp_compute_ged_matrix(dataset, ds_name, repeats, ratio, trial)
+		ged_mats.append(ged_mat)
+		runtimes.append(runtime)
+		
+	save_file_suffix = '.' + ds_name + '.repeats_' + str(repeats) + '.ratio_' + "{:.2f}".format(ratio)
+	with open(save_dir + 'groups/ged_mats' + save_file_suffix + '.npy', 'wb') as f:
+		np.save(f, np.array(ged_mats))
+	with open(save_dir + 'groups/runtimes' + save_file_suffix + '.pkl', 'wb') as f:
+		pickle.dump(runtime, f)
+	
+	
+def results_for_a_dataset(ds_name):
+	"""**1.   Get dataset.**"""
+	dataset = get_dataset(ds_name)
+	
+	for repeats in [1, 20, 40, 60, 80, 100]:
+		print()
+		print('Repeats:', repeats)
+		for ratio in [0.1, 0.3, 0.5, 0.7, 0.9, 1, 3, 5, 7, 9]:
+			print()
+			print('Ratio:', ratio)
+			save_trials_as_group(dataset, ds_name, repeats, ratio)
+		
+
+if __name__ == '__main__':
+	if len(sys.argv) > 1:
+		ds_name_list = sys.argv[1:]
+	else:
+		ds_name_list = ['MAO', 'Monoterpenoides', 'MUTAG', 'AIDS_symb']
+		
+	save_dir = 'outputs/edit_costs.repeats.ratios.bipartite/'
+	if not os.path.exists(save_dir):
+		os.makedirs(save_dir)
+	if not os.path.exists(save_dir + 'groups/'):
+		os.makedirs(save_dir + 'groups/')
+		
+	for ds_name in ds_name_list:
+		print()
+		print('Dataset:', ds_name)
+		results_for_a_dataset(ds_name)
\ No newline at end of file
diff --git a/gklearn/experiments/ged/stability/group_results.py b/gklearn/experiments/ged/stability/group_results.py
new file mode 100644
index 0000000..48ea68d
--- /dev/null
+++ b/gklearn/experiments/ged/stability/group_results.py
@@ -0,0 +1,108 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+"""
+Created on Thu Oct 29 17:26:43 2020
+
+@author: ljia
+
+This script groups results together into a single file for the sake of faster 
+searching and loading.
+"""
+import os
+import pickle
+import numpy as np
+from shutil import copyfile
+from tqdm import tqdm
+import sys
+
+
+def group_trials(dir_folder, name_prefix, override, clear, backup):
+	
+	# Get group name.
+	label_name = name_prefix.split('.')[0]
+	if label_name == 'ged_matrix':
+		group_label = 'ged_mats'
+	elif label_name == 'runtime':
+		group_label = 'runtimes'
+	else:
+		group_label = label_name
+	name_suffix = name_prefix[len(label_name):]
+	if label_name == 'ged_matrix':
+		name_group = dir_folder + 'groups/' + group_label + name_suffix + 'npy'
+	else:
+		name_group = dir_folder + 'groups/' + group_label + name_suffix + 'pkl'
+
+	if not override and os.path.isfile(name_group):
+		# Check if all trial files exist.
+		trials_complete = True
+		for trial in range(1, 101):
+			file_name = dir_folder + name_prefix + 'trial_' + str(trial) + '.pkl'
+			if not os.path.isfile(file_name):
+				trials_complete = False
+				break
+	else:
+		# Get data.
+		data_group = []
+		for trial in range(1, 101):
+			file_name = dir_folder + name_prefix + 'trial_' + str(trial) + '.pkl'
+			if os.path.isfile(file_name):
+				with open(file_name, 'rb') as f:
+					data = pickle.load(f)
+					data_group.append(data)
+			else: # Not all trials are completed.
+				return
+	
+		# Write groups.
+		if label_name == 'ged_matrix':
+			data_group = np.array(data_group)
+			with open(name_group, 'wb') as f:
+				np.save(f, data_group)
+		else:
+			with open(name_group, 'wb') as f:
+				pickle.dump(data_group, f)
+				
+		trials_complete = True
+
+	if trials_complete:
+		# Backup.
+		if backup:
+			for trial in range(1, 101):
+				src = dir_folder + name_prefix + 'trial_' + str(trial) + '.pkl'
+				dst = dir_folder + 'backups/' + name_prefix + 'trial_' + str(trial) + '.pkl'
+				copyfile(src, dst)
+				
+		# Clear.
+		if clear:
+			for trial in range(1, 101):
+				src = dir_folder + name_prefix + 'trial_' + str(trial) + '.pkl'
+				os.remove(src)
+
+
+def group_all_in_folder(dir_folder, override=False, clear=True, backup=True):
+	
+	# Create folders.
+	if not os.path.exists(dir_folder + 'groups/'):
+		os.makedirs(dir_folder + 'groups/')
+	if backup:
+		if not os.path.exists(dir_folder + 'backups'):
+			os.makedirs(dir_folder + 'backups')
+			
+	# Iterate all files.
+	cur_file_prefix = ''
+	for file in tqdm(sorted(os.listdir(dir_folder)), desc='Grouping', file=sys.stdout):
+		if os.path.isfile(os.path.join(dir_folder, file)):
+			name_prefix = file.split('trial_')[0]
+# 			print(name)
+# 			print(name_prefix)
+			if name_prefix != cur_file_prefix:
+				group_trials(dir_folder, name_prefix, override, clear, backup)
+				cur_file_prefix = name_prefix
+	
+		
+
+if __name__ == '__main__':
+ 	dir_folder = 'outputs/CRIANN/edit_costs.num_sols.ratios.IPFP/'
+ 	group_all_in_folder(dir_folder)
+	
+ 	dir_folder = 'outputs/CRIANN/edit_costs.repeats.ratios.IPFP/'
+ 	group_all_in_folder(dir_folder)
\ No newline at end of file
diff --git a/gklearn/experiments/ged/stability/utils.py b/gklearn/experiments/ged/stability/utils.py
new file mode 100644
index 0000000..5feaba4
--- /dev/null
+++ b/gklearn/experiments/ged/stability/utils.py
@@ -0,0 +1,30 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+"""
+Created on Thu Oct 29 19:17:36 2020
+
+@author: ljia
+"""
+from gklearn.utils import Dataset
+
+
+def get_dataset(ds_name):
+	# The node/edge labels that will not be used in the computation.
+	if ds_name == 'MAO':
+		irrelevant_labels = {'node_attrs': ['x', 'y', 'z'], 'edge_labels': ['bond_stereo']}
+	elif ds_name == 'Monoterpenoides':
+		irrelevant_labels = {'edge_labels': ['valence']}
+	elif ds_name == 'MUTAG':
+		irrelevant_labels = {'edge_labels': ['label_0']}
+	elif ds_name == 'AIDS_symb':
+		irrelevant_labels = {'node_attrs': ['chem', 'charge', 'x', 'y'], 'edge_labels': ['valence']}
+		ds_name = 'AIDS'
+
+	# Initialize a Dataset.
+	dataset = Dataset()
+	# Load predefined dataset.
+	dataset.load_predefined_dataset(ds_name)
+	# Remove irrelevant labels.
+	dataset.remove_labels(**irrelevant_labels)
+	print('dataset size:', len(dataset.graphs))
+	return dataset
\ No newline at end of file