Add exps: GED stability on synthesize graphs.

4 years ago · d3daa95141
--- a/gklearn/experiments/ged/stability/edit_costs.max_num_sols.N.bipartite.py
+++ b/gklearn/experiments/ged/stability/edit_costs.max_num_sols.N.bipartite.py
@@ -0,0 +1,142 @@
 #!/usr/bin/env python3
 # -*- coding: utf-8 -*-
 """
 Created on Wed Oct  20 11:48:02 2020

@author: ljia
 """	
 # This script tests the influence of the ratios between node costs and edge costs on the stability of the GED computation, where the base edit costs are [1, 1, 1, 1, 1, 1].

 import os
 import multiprocessing
 import pickle
 import logging
 from gklearn.ged.util import compute_geds
 import time
 import sys
 from group_results import group_trials


 def generate_graphs():
 	from gklearn.utils.graph_synthesizer import GraphSynthesizer
 	gsyzer = GraphSynthesizer()
 	graphs = gsyzer.unified_graphs(num_graphs=100, num_nodes=20, num_edges=20, num_node_labels=0, num_edge_labels=0, seed=None, directed=False)
 	return graphs


 def xp_compute_ged_matrix(graphs, N, max_num_solutions, ratio, trial):

 	save_file_suffix = '.' + str(N) + '.mnum_sols_' + str(max_num_solutions) + '.ratio_' + "{:.2f}".format(ratio) + '.trial_' + str(trial)

 	# Return if the file exists.
 	if os.path.isfile(save_dir + 'ged_matrix' + save_file_suffix + '.pkl'):
 		return None, None

 	"""**2.  Set parameters.**"""

 	# Parameters for GED computation.
 	ged_options = {'method': 'BIPARTITE',  # use BIPARTITE huristic.
  				   # 'initialization_method': 'RANDOM',  # or 'NODE', etc. (for GEDEnv)
 				   'lsape_model': 'ECBP',  # 
 				   # ??when bigger than 1, then the method is considered mIPFP.
 				   # the actual number of computed solutions might be smaller than the specified value 
 				   'max_num_solutions': max_num_solutions,
 				   'edit_cost': 'CONSTANT',  # use CONSTANT cost.
 				   'greedy_method': 'BASIC',  # 
 				   # the distance between non-symbolic node/edge labels is computed by euclidean distance.
 				   'attr_distance': 'euclidean',
 				   'optimal': True, # if TRUE, the option --greedy-method has no effect 
 				   # parallel threads. Do not work if mpg_options['parallel'] = False.
 				   'threads': multiprocessing.cpu_count(),
 				   'centrality_method': 'NONE',
 				   'centrality_weight': 0.7,
 				   'init_option': 'EAGER_WITHOUT_SHUFFLED_COPIES'
 				   }
 	
 	edit_cost_constants = [i * ratio for i in [1, 1, 1]] + [1, 1, 1]
 # 	edit_cost_constants = [item * 0.01 for item in edit_cost_constants]
 # 	pickle.dump(edit_cost_constants, open(save_dir + "edit_costs" + save_file_suffix + ".pkl", "wb"))

 	options = ged_options.copy()
 	options['edit_cost_constants'] = edit_cost_constants
 	options['node_labels'] = []
 	options['edge_labels'] = []
 	options['node_attrs'] = []
 	options['edge_attrs'] = []
 	parallel = True # if num_solutions == 1 else False
 	
 	"""**5.   Compute GED matrix.**"""
 	ged_mat = 'error'
 	runtime = 0
 	try:
 		time0 = time.time()
 		ged_vec_init, ged_mat, n_edit_operations = compute_geds(graphs, options=options, repeats=1, parallel=parallel, verbose=True)
 		runtime = time.time() - time0
 	except Exception as exp:
 		print('An exception occured when running this experiment:')
 		LOG_FILENAME = save_dir + 'error.txt'
 		logging.basicConfig(filename=LOG_FILENAME, level=logging.DEBUG)
 		logging.exception(save_file_suffix)
 		print(repr(exp))
 					
 	"""**6. Get results.**"""
 	
 	with open(save_dir + 'ged_matrix' + save_file_suffix + '.pkl', 'wb') as f:
 		pickle.dump(ged_mat, f)
 	with open(save_dir + 'runtime' + save_file_suffix + '.pkl', 'wb') as f:
 		pickle.dump(runtime, f)

 	return ged_mat, runtime

 	
 def save_trials_as_group(graphs, N, max_num_solutions, ratio):
 	# Return if the group file exists.
 	name_middle = '.' + str(N) + '.mnum_sols_' + str(max_num_solutions) + '.ratio_' + "{:.2f}".format(ratio) + '.'
 	name_group = save_dir + 'groups/ged_mats' +  name_middle + 'npy'
 	if os.path.isfile(name_group):
 		return
 	
 	ged_mats = []
 	runtimes = []
 	for trial in range(1, 101):
 		print()
 		print('Trial:', trial)
 		ged_mat, runtime = xp_compute_ged_matrix(graphs, N, max_num_solutions, ratio, trial)
 		ged_mats.append(ged_mat)
 		runtimes.append(runtime)
 		
 	# Group trials and Remove single files.
 	name_prefix = 'ged_matrix' + name_middle
 	group_trials(save_dir, name_prefix, True, True, False)
 	name_prefix = 'runtime' + name_middle
 	group_trials(save_dir, name_prefix, True, True, False)


 def results_for_a_ratio(ratio):
 	
 	for N in N_list:
 		print()
 		print('# of graphs:', N)
 		for max_num_solutions in [1, 20, 40, 60, 80, 100]:
 			print()
 			print('Max # of solutions:', max_num_solutions)
 			save_trials_as_group(graphs[:N], N, max_num_solutions, ratio)
 				

 if __name__ == '__main__':
 	if len(sys.argv) > 1:
 		N_list = [int(i) for i in sys.argv[1:]]
 	else:
 		N_list = [10, 50, 100]
 		
 	# Generate graphs.
 	graphs = generate_graphs()
 		
 	save_dir = 'outputs/edit_costs.max_num_sols.N.bipartite/'
 	os.makedirs(save_dir, exist_ok=True)
 	os.makedirs(save_dir + 'groups/', exist_ok=True)
 		
 	for ratio in [10, 1, 0.1]:
 		print()
 		print('Ratio:', ratio)
 		results_for_a_ratio(ratio)
--- a/gklearn/experiments/ged/stability/edit_costs.max_num_sols.ratios.bipartite.py
+++ b/gklearn/experiments/ged/stability/edit_costs.max_num_sols.ratios.bipartite.py
@@ -22,8 +22,9 @@ def xp_compute_ged_matrix(dataset, ds_name, max_num_solutions, ratio, trial):
 		
 	save_file_suffix = '.' + ds_name + '.mnum_sols_' + str(max_num_solutions) + '.ratio_' + "{:.2f}".format(ratio) + '.trial_' + str(trial)
 	
 	"""**1.   Get dataset.**"""
 	dataset = get_dataset(ds_name)
 	# Return if the file exists.
 	if os.path.isfile(save_dir + 'ged_matrix' + save_file_suffix + '.pkl'):
 		return None, None

 	"""**2.  Set parameters.**"""

@@ -83,6 +84,12 @@ def xp_compute_ged_matrix(dataset, ds_name, max_num_solutions, ratio, trial):

 	
 def save_trials_as_group(dataset, ds_name, max_num_solutions, ratio):
 	# Return if the group file exists.
 	name_middle = '.' + ds_name + '.mnum_sols_' + str(max_num_solutions) + '.ratio_' + "{:.2f}".format(ratio) + '.'
 	name_group = save_dir + 'groups/ged_mats' +  name_middle + 'npy'
 	if os.path.isfile(name_group):
 		return
 	
 	ged_mats = []
 	runtimes = []
 	for trial in range(1, 101):
@@ -93,25 +100,35 @@ def save_trials_as_group(dataset, ds_name, max_num_solutions, ratio):
 		runtimes.append(runtime)
 		
 	# Group trials and Remove single files.
 	name_middle = '.' + ds_name + '.mnum_sols_' + str(max_num_solutions) + '.ratio_' + "{:.2f}".format(ratio) + '.'
 	name_prefix = 'ged_matrix' + name_middle
 	group_trials(save_dir, name_prefix, True, True, False)
 	name_prefix = 'runtime' + name_middle
 	group_trials(save_dir, name_prefix, True, True, False)
 	
 	


 def results_for_a_dataset(ds_name):
 	"""**1.   Get dataset.**"""
 	dataset = get_dataset(ds_name)
 	
 	for max_num_solutions in [1, 20, 40, 60, 80, 100]:
 	for max_num_solutions in mnum_solutions_list:
 		print()
 		print('Max # of solutions:', max_num_solutions)
 		for ratio in [0.1, 0.3, 0.5, 0.7, 0.9, 1, 3, 5, 7, 9]:
 		for ratio in ratio_list:
 			print()
 			print('Ratio:', ratio)
 			save_trials_as_group(dataset, ds_name, max_num_solutions, ratio)
 			
 			
 def get_param_lists(ds_name):
 	if ds_name == 'AIDS_symb':
 		mnum_solutions_list = [1, 20, 40, 60, 80, 100]
 		ratio_list = [0.1, 0.3, 0.5, 0.7, 0.9, 1, 3, 5, 7, 9]
 	else:
 		mnum_solutions_list = [1, 20, 40, 60, 80, 100]
 		ratio_list = [0.1, 0.3, 0.5, 0.7, 0.9, 1, 3, 5, 7, 9]
 		
 	return mnum_solutions_list, ratio_list
 				

 if __name__ == '__main__':
 	if len(sys.argv) > 1:
@@ -126,4 +143,5 @@ if __name__ == '__main__':
 	for ds_name in ds_name_list:
 		print()
 		print('Dataset:', ds_name)
 		mnum_solutions_list, ratio_list = get_param_lists(ds_name)
 		results_for_a_dataset(ds_name)
--- a/gklearn/experiments/ged/stability/edit_costs.nums_sols.N.IPFP.py
+++ b/gklearn/experiments/ged/stability/edit_costs.nums_sols.N.IPFP.py
@@ -0,0 +1,137 @@
 #!/usr/bin/env python3
 # -*- coding: utf-8 -*-
 """
 Created on Wed Oct  20 11:48:02 2020

@author: ljia
 """	
 # This script tests the influence of the ratios between node costs and edge costs on the stability of the GED computation, where the base edit costs are [1, 1, 1, 1, 1, 1].

 import os
 import multiprocessing
 import pickle
 import logging
 from gklearn.ged.util import compute_geds
 import time
 import sys
 from group_results import group_trials


 def generate_graphs():
 	from gklearn.utils.graph_synthesizer import GraphSynthesizer
 	gsyzer = GraphSynthesizer()
 	graphs = gsyzer.unified_graphs(num_graphs=100, num_nodes=20, num_edges=20, num_node_labels=0, num_edge_labels=0, seed=None, directed=False)
 	return graphs


 def xp_compute_ged_matrix(graphs, N, num_solutions, ratio, trial):

 	save_file_suffix = '.' + str(N) + '.num_sols_' + str(num_solutions) + '.ratio_' + "{:.2f}".format(ratio) + '.trial_' + str(trial)

 	# Return if the file exists.
 	if os.path.isfile(save_dir + 'ged_matrix' + save_file_suffix + '.pkl'):
 		return None, None

 	"""**2.  Set parameters.**"""

 	# Parameters for GED computation.
 	ged_options = {'method': 'IPFP',  # use IPFP huristic.
 				   'initialization_method': 'RANDOM',  # or 'NODE', etc.
 				   # when bigger than 1, then the method is considered mIPFP.
 				   'initial_solutions': int(num_solutions * 4),
 				   'edit_cost': 'CONSTANT',  # use CONSTANT cost.
 				   # the distance between non-symbolic node/edge labels is computed by euclidean distance.
 				   'attr_distance': 'euclidean',
 				   'ratio_runs_from_initial_solutions': 0.25,
 				   # parallel threads. Do not work if mpg_options['parallel'] = False.
 				   'threads': multiprocessing.cpu_count(),
 				   'init_option': 'EAGER_WITHOUT_SHUFFLED_COPIES'
 				   }
 	
 	edit_cost_constants = [i * ratio for i in [1, 1, 1]] + [1, 1, 1]
 # 	edit_cost_constants = [item * 0.01 for item in edit_cost_constants]
 # 	pickle.dump(edit_cost_constants, open(save_dir + "edit_costs" + save_file_suffix + ".pkl", "wb"))

 	options = ged_options.copy()
 	options['edit_cost_constants'] = edit_cost_constants
 	options['node_labels'] = []
 	options['edge_labels'] = []
 	options['node_attrs'] = []
 	options['edge_attrs'] = []
 	parallel = True # if num_solutions == 1 else False
 	
 	"""**5.   Compute GED matrix.**"""
 	ged_mat = 'error'
 	runtime = 0
 	try:
 		time0 = time.time()
 		ged_vec_init, ged_mat, n_edit_operations = compute_geds(graphs, options=options, repeats=1, parallel=parallel, verbose=True)
 		runtime = time.time() - time0
 	except Exception as exp:
 		print('An exception occured when running this experiment:')
 		LOG_FILENAME = save_dir + 'error.txt'
 		logging.basicConfig(filename=LOG_FILENAME, level=logging.DEBUG)
 		logging.exception(save_file_suffix)
 		print(repr(exp))
 					
 	"""**6. Get results.**"""
 	
 	with open(save_dir + 'ged_matrix' + save_file_suffix + '.pkl', 'wb') as f:
 		pickle.dump(ged_mat, f)
 	with open(save_dir + 'runtime' + save_file_suffix + '.pkl', 'wb') as f:
 		pickle.dump(runtime, f)

 	return ged_mat, runtime

 	
 def save_trials_as_group(graphs, N, num_solutions, ratio):
 	# Return if the group file exists.
 	name_middle = '.' + str(N) + '.num_sols_' + str(num_solutions) + '.ratio_' + "{:.2f}".format(ratio) + '.'
 	name_group = save_dir + 'groups/ged_mats' +  name_middle + 'npy'
 	if os.path.isfile(name_group):
 		return
 	
 	ged_mats = []
 	runtimes = []
 	for trial in range(1, 101):
 		print()
 		print('Trial:', trial)
 		ged_mat, runtime = xp_compute_ged_matrix(graphs, N, num_solutions, ratio, trial)
 		ged_mats.append(ged_mat)
 		runtimes.append(runtime)
 		
 	# Group trials and Remove single files.
 	name_prefix = 'ged_matrix' + name_middle
 	group_trials(save_dir, name_prefix, True, True, False)
 	name_prefix = 'runtime' + name_middle
 	group_trials(save_dir, name_prefix, True, True, False)


 def results_for_a_ratio(ratio):
 	
 	for N in N_list:
 		print()
 		print('# of graphs:', N)
 		for num_solutions in [1, 20, 40, 60, 80, 100]:
 			print()
 			print('# of solutions:', num_solutions)
 			save_trials_as_group(graphs[:N], N, num_solutions, ratio)
 				

 if __name__ == '__main__':
 	if len(sys.argv) > 1:
 		N_list = [int(i) for i in sys.argv[1:]]
 	else:
 		N_list = [10, 50, 100]
 		
 	# Generate graphs.
 	graphs = generate_graphs()
 		
 	save_dir = 'outputs/edit_costs.num_sols.N.IPFP/'
 	os.makedirs(save_dir, exist_ok=True)
 	os.makedirs(save_dir + 'groups/', exist_ok=True)
 		
 	for ratio in [10, 1, 0.1]:
 		print()
 		print('Ratio:', ratio)
 		results_for_a_ratio(ratio)
--- a/gklearn/experiments/ged/stability/edit_costs.nums_sols.ratios.IPFP.py
+++ b/gklearn/experiments/ged/stability/edit_costs.nums_sols.ratios.IPFP.py
@@ -59,7 +59,7 @@ def xp_compute_ged_matrix(dataset, ds_name, num_solutions, ratio, trial):
 	runtime = 0
 	try:
 		time0 = time.time()
 		ged_vec_init, ged_mat, n_edit_operations = compute_geds(dataset.graphs, options=options, parallel=parallel, verbose=True)
 		ged_vec_init, ged_mat, n_edit_operations = compute_geds(dataset.graphs, options=options, repeats=1, parallel=parallel, verbose=True)
 		runtime = time.time() - time0
 	except Exception as exp:
 		print('An exception occured when running this experiment:')
@@ -74,9 +74,9 @@ def xp_compute_ged_matrix(dataset, ds_name, num_solutions, ratio, trial):
 		pickle.dump(ged_mat, f)
 	with open(save_dir + 'runtime' + save_file_suffix + '.pkl', 'wb') as f:
 		pickle.dump(runtime, f)
 	

 	return ged_mat, runtime
 	

 	
 def save_trials_as_group(dataset, ds_name, num_solutions, ratio):
 	# Return if the group file exists.
@@ -99,8 +99,8 @@ def save_trials_as_group(dataset, ds_name, num_solutions, ratio):
 	group_trials(save_dir, name_prefix, True, True, False)
 	name_prefix = 'runtime' + name_middle
 	group_trials(save_dir, name_prefix, True, True, False)
 		
 		


 def results_for_a_dataset(ds_name):
 	"""**1.   Get dataset.**"""
 	dataset = get_dataset(ds_name)
--- a/gklearn/experiments/ged/stability/edit_costs.repeats.N.IPFP.py
+++ b/gklearn/experiments/ged/stability/edit_costs.repeats.N.IPFP.py
@@ -0,0 +1,137 @@
 #!/usr/bin/env python3
 # -*- coding: utf-8 -*-
 """
 Created on Wed Oct  20 11:48:02 2020

@author: ljia
 """	
 # This script tests the influence of the ratios between node costs and edge costs on the stability of the GED computation, where the base edit costs are [1, 1, 1, 1, 1, 1].

 import os
 import multiprocessing
 import pickle
 import logging
 from gklearn.ged.util import compute_geds
 import time
 import sys
 from group_results import group_trials


 def generate_graphs():
 	from gklearn.utils.graph_synthesizer import GraphSynthesizer
 	gsyzer = GraphSynthesizer()
 	graphs = gsyzer.unified_graphs(num_graphs=100, num_nodes=20, num_edges=20, num_node_labels=0, num_edge_labels=0, seed=None, directed=False)
 	return graphs


 def xp_compute_ged_matrix(graphs, N, repeats, ratio, trial):

 	save_file_suffix = '.' + str(N) + '.repeats_' + str(repeats) + '.ratio_' + "{:.2f}".format(ratio) + '.trial_' + str(trial)

 	# Return if the file exists.
 	if os.path.isfile(save_dir + 'ged_matrix' + save_file_suffix + '.pkl'):
 		return None, None

 	"""**2.  Set parameters.**"""

 	# Parameters for GED computation.
 	ged_options = {'method': 'IPFP',  # use IPFP huristic.
 				   'initialization_method': 'RANDOM',  # or 'NODE', etc.
 				   # when bigger than 1, then the method is considered mIPFP.
 				   'initial_solutions': 1,
 				   'edit_cost': 'CONSTANT',  # use CONSTANT cost.
 				   # the distance between non-symbolic node/edge labels is computed by euclidean distance.
 				   'attr_distance': 'euclidean',
 				   'ratio_runs_from_initial_solutions': 1,
 				   # parallel threads. Do not work if mpg_options['parallel'] = False.
 				   'threads': multiprocessing.cpu_count(),
 				   'init_option': 'EAGER_WITHOUT_SHUFFLED_COPIES'
 				   }
 	
 	edit_cost_constants = [i * ratio for i in [1, 1, 1]] + [1, 1, 1]
 # 	edit_cost_constants = [item * 0.01 for item in edit_cost_constants]
 # 	pickle.dump(edit_cost_constants, open(save_dir + "edit_costs" + save_file_suffix + ".pkl", "wb"))

 	options = ged_options.copy()
 	options['edit_cost_constants'] = edit_cost_constants
 	options['node_labels'] = []
 	options['edge_labels'] = []
 	options['node_attrs'] = []
 	options['edge_attrs'] = []
 	parallel = True # if num_solutions == 1 else False
 	
 	"""**5.   Compute GED matrix.**"""
 	ged_mat = 'error'
 	runtime = 0
 	try:
 		time0 = time.time()
 		ged_vec_init, ged_mat, n_edit_operations = compute_geds(graphs, options=options, repeats=repeats, parallel=parallel, verbose=True)
 		runtime = time.time() - time0
 	except Exception as exp:
 		print('An exception occured when running this experiment:')
 		LOG_FILENAME = save_dir + 'error.txt'
 		logging.basicConfig(filename=LOG_FILENAME, level=logging.DEBUG)
 		logging.exception(save_file_suffix)
 		print(repr(exp))
 					
 	"""**6. Get results.**"""
 	
 	with open(save_dir + 'ged_matrix' + save_file_suffix + '.pkl', 'wb') as f:
 		pickle.dump(ged_mat, f)
 	with open(save_dir + 'runtime' + save_file_suffix + '.pkl', 'wb') as f:
 		pickle.dump(runtime, f)

 	return ged_mat, runtime

 	
 def save_trials_as_group(graphs, N, repeats, ratio):
 	# Return if the group file exists.
 	name_middle = '.' + str(N) + '.repeats_' + str(repeats) + '.ratio_' + "{:.2f}".format(ratio) + '.'
 	name_group = save_dir + 'groups/ged_mats' +  name_middle + 'npy'
 	if os.path.isfile(name_group):
 		return
 	
 	ged_mats = []
 	runtimes = []
 	for trial in range(1, 101):
 		print()
 		print('Trial:', trial)
 		ged_mat, runtime = xp_compute_ged_matrix(graphs, N, repeats, ratio, trial)
 		ged_mats.append(ged_mat)
 		runtimes.append(runtime)
 		
 	# Group trials and Remove single files.
 	name_prefix = 'ged_matrix' + name_middle
 	group_trials(save_dir, name_prefix, True, True, False)
 	name_prefix = 'runtime' + name_middle
 	group_trials(save_dir, name_prefix, True, True, False)


 def results_for_a_ratio(ratio):
 	
 	for N in N_list:
 		print()
 		print('# of graphs:', N)
 		for repeats in [1, 20, 40, 60, 80, 100]:
 			print()
 			print('Repeats:', repeats)
 			save_trials_as_group(graphs[:N], N, repeats, ratio)
 				

 if __name__ == '__main__':
 	if len(sys.argv) > 1:
 		N_list = [int(i) for i in sys.argv[1:]]
 	else:
 		N_list = [10, 50, 100]
 		
 	# Generate graphs.
 	graphs = generate_graphs()
 		
 	save_dir = 'outputs/edit_costs.repeats.N.IPFP/'
 	os.makedirs(save_dir, exist_ok=True)
 	os.makedirs(save_dir + 'groups/', exist_ok=True)
 		
 	for ratio in [10, 1, 0.1]:
 		print()
 		print('Ratio:', ratio)
 		results_for_a_ratio(ratio)
--- a/gklearn/experiments/ged/stability/edit_costs.repeats.N.bipartite.py
+++ b/gklearn/experiments/ged/stability/edit_costs.repeats.N.bipartite.py
@@ -0,0 +1,142 @@
 #!/usr/bin/env python3
 # -*- coding: utf-8 -*-
 """
 Created on Wed Oct  20 11:48:02 2020

@author: ljia
 """	
 # This script tests the influence of the ratios between node costs and edge costs on the stability of the GED computation, where the base edit costs are [1, 1, 1, 1, 1, 1].

 import os
 import multiprocessing
 import pickle
 import logging
 from gklearn.ged.util import compute_geds
 import time
 import sys
 from group_results import group_trials


 def generate_graphs():
 	from gklearn.utils.graph_synthesizer import GraphSynthesizer
 	gsyzer = GraphSynthesizer()
 	graphs = gsyzer.unified_graphs(num_graphs=100, num_nodes=20, num_edges=20, num_node_labels=0, num_edge_labels=0, seed=None, directed=False)
 	return graphs


 def xp_compute_ged_matrix(graphs, N, repeats, ratio, trial):

 	save_file_suffix = '.' + str(N) + '.repeats_' + str(repeats) + '.ratio_' + "{:.2f}".format(ratio) + '.trial_' + str(trial)

 	# Return if the file exists.
 	if os.path.isfile(save_dir + 'ged_matrix' + save_file_suffix + '.pkl'):
 		return None, None

 	"""**2.  Set parameters.**"""

 	# Parameters for GED computation.
 	ged_options = {'method': 'BIPARTITE',  # use BIPARTITE huristic.
  				   # 'initialization_method': 'RANDOM',  # or 'NODE', etc. (for GEDEnv)
 				   'lsape_model': 'ECBP',  # 
 				   # ??when bigger than 1, then the method is considered mIPFP.
 				   # the actual number of computed solutions might be smaller than the specified value 
 				   'max_num_solutions': 1,
 				   'edit_cost': 'CONSTANT',  # use CONSTANT cost.
 				   'greedy_method': 'BASIC',  # 
 				   # the distance between non-symbolic node/edge labels is computed by euclidean distance.
 				   'attr_distance': 'euclidean',
 				   'optimal': True, # if TRUE, the option --greedy-method has no effect 
 				   # parallel threads. Do not work if mpg_options['parallel'] = False.
 				   'threads': multiprocessing.cpu_count(),
 				   'centrality_method': 'NONE',
 				   'centrality_weight': 0.7,
 				   'init_option': 'EAGER_WITHOUT_SHUFFLED_COPIES'
 				   }
 	
 	edit_cost_constants = [i * ratio for i in [1, 1, 1]] + [1, 1, 1]
 # 	edit_cost_constants = [item * 0.01 for item in edit_cost_constants]
 # 	pickle.dump(edit_cost_constants, open(save_dir + "edit_costs" + save_file_suffix + ".pkl", "wb"))

 	options = ged_options.copy()
 	options['edit_cost_constants'] = edit_cost_constants
 	options['node_labels'] = []
 	options['edge_labels'] = []
 	options['node_attrs'] = []
 	options['edge_attrs'] = []
 	parallel = True # if num_solutions == 1 else False
 	
 	"""**5.   Compute GED matrix.**"""
 	ged_mat = 'error'
 	runtime = 0
 	try:
 		time0 = time.time()
 		ged_vec_init, ged_mat, n_edit_operations = compute_geds(graphs, options=options, repeats=repeats, parallel=parallel, verbose=True)
 		runtime = time.time() - time0
 	except Exception as exp:
 		print('An exception occured when running this experiment:')
 		LOG_FILENAME = save_dir + 'error.txt'
 		logging.basicConfig(filename=LOG_FILENAME, level=logging.DEBUG)
 		logging.exception(save_file_suffix)
 		print(repr(exp))
 					
 	"""**6. Get results.**"""
 	
 	with open(save_dir + 'ged_matrix' + save_file_suffix + '.pkl', 'wb') as f:
 		pickle.dump(ged_mat, f)
 	with open(save_dir + 'runtime' + save_file_suffix + '.pkl', 'wb') as f:
 		pickle.dump(runtime, f)

 	return ged_mat, runtime

 	
 def save_trials_as_group(graphs, N, repeats, ratio):
 	# Return if the group file exists.
 	name_middle = '.' + str(N) + '.repeats_' + str(repeats) + '.ratio_' + "{:.2f}".format(ratio) + '.'
 	name_group = save_dir + 'groups/ged_mats' +  name_middle + 'npy'
 	if os.path.isfile(name_group):
 		return
 	
 	ged_mats = []
 	runtimes = []
 	for trial in range(1, 101):
 		print()
 		print('Trial:', trial)
 		ged_mat, runtime = xp_compute_ged_matrix(graphs, N, repeats, ratio, trial)
 		ged_mats.append(ged_mat)
 		runtimes.append(runtime)
 		
 	# Group trials and Remove single files.
 	name_prefix = 'ged_matrix' + name_middle
 	group_trials(save_dir, name_prefix, True, True, False)
 	name_prefix = 'runtime' + name_middle
 	group_trials(save_dir, name_prefix, True, True, False)


 def results_for_a_ratio(ratio):
 	
 	for N in N_list:
 		print()
 		print('# of graphs:', N)
 		for repeats in [1, 20, 40, 60, 80, 100]:
 			print()
 			print('Repeats:', repeats)
 			save_trials_as_group(graphs[:N], N, repeats, ratio)
 				

 if __name__ == '__main__':
 	if len(sys.argv) > 1:
 		N_list = [int(i) for i in sys.argv[1:]]
 	else:
 		N_list = [10, 50, 100]
 		
 	# Generate graphs.
 	graphs = generate_graphs()
 		
 	save_dir = 'outputs/edit_costs.repeats.N.bipartite/'
 	os.makedirs(save_dir, exist_ok=True)
 	os.makedirs(save_dir + 'groups/', exist_ok=True)
 		
 	for ratio in [10, 1, 0.1]:
 		print()
 		print('Ratio:', ratio)
 		results_for_a_ratio(ratio)
--- a/gklearn/experiments/ged/stability/edit_costs.repeats.ratios.IPFP.py
+++ b/gklearn/experiments/ged/stability/edit_costs.repeats.ratios.IPFP.py
@@ -19,11 +19,12 @@ from group_results import group_trials


 def xp_compute_ged_matrix(dataset, ds_name, repeats, ratio, trial):
 		

 	save_file_suffix = '.' + ds_name + '.repeats_' + str(repeats) + '.ratio_' + "{:.2f}".format(ratio) + '.trial_' + str(trial)
 	
 	"""**1.   Get dataset.**"""
 	dataset = get_dataset(ds_name)
 	# Return if the file exists.
 	if os.path.isfile(save_dir + 'ged_matrix' + save_file_suffix + '.pkl'):
 		return None, None

 	"""**2.  Set parameters.**"""

@@ -78,6 +79,12 @@ def xp_compute_ged_matrix(dataset, ds_name, repeats, ratio, trial):

 	
 def save_trials_as_group(dataset, ds_name, repeats, ratio):
 	# Return if the group file exists.
 	name_middle = '.' + ds_name + '.repeats_' + str(repeats) + '.ratio_' + "{:.2f}".format(ratio) + '.'
 	name_group = save_dir + 'groups/ged_mats' +  name_middle + 'npy'
 	if os.path.isfile(name_group):
 		return
 	
 	ged_mats = []
 	runtimes = []
 	for trial in range(1, 101):
@@ -88,25 +95,35 @@ def save_trials_as_group(dataset, ds_name, repeats, ratio):
 		runtimes.append(runtime)
 		
 	# Group trials and Remove single files.
 	name_middle = '.' + ds_name + '.repeats_' + str(repeats) + '.ratio_' + "{:.2f}".format(ratio) + '.'
 	name_prefix = 'ged_matrix' + name_middle
 	group_trials(save_dir, name_prefix, True, True, False)
 	name_prefix = 'runtime' + name_middle
 	group_trials(save_dir, name_prefix, True, True, False)
 	
 	


 def results_for_a_dataset(ds_name):
 	"""**1.   Get dataset.**"""
 	dataset = get_dataset(ds_name)
 	
 	for repeats in [1, 20, 40, 60, 80, 100]:
 	for repeats in repeats_list:
 		print()
 		print('Repeats:', repeats)
 		for ratio in [0.1, 0.3, 0.5, 0.7, 0.9, 1, 3, 5, 7, 9]:
 		for ratio in ratio_list:
 			print()
 			print('Ratio:', ratio)
 			save_trials_as_group(dataset, ds_name, repeats, ratio)
 			
 			
 def get_param_lists(ds_name):
 	if ds_name == 'AIDS_symb':
 		repeats_list = [1, 20, 40, 60, 80, 100]
 		ratio_list = [0.1, 0.3, 0.5, 0.7, 0.9, 1, 3, 5, 7, 9]
 	else:
 		repeats_list = [1, 20, 40, 60, 80, 100]
 		ratio_list = [0.1, 0.3, 0.5, 0.7, 0.9, 1, 3, 5, 7, 9]
 		
 	return repeats_list, ratio_list
 				

 if __name__ == '__main__':
 	if len(sys.argv) > 1:
@@ -121,4 +138,5 @@ if __name__ == '__main__':
 	for ds_name in ds_name_list:
 		print()
 		print('Dataset:', ds_name)
 		repeats_list, ratio_list = get_param_lists(ds_name)
 		results_for_a_dataset(ds_name)
--- a/gklearn/experiments/ged/stability/edit_costs.repeats.ratios.bipartite.py
+++ b/gklearn/experiments/ged/stability/edit_costs.repeats.ratios.bipartite.py
@@ -19,11 +19,12 @@ from group_results import group_trials


 def xp_compute_ged_matrix(dataset, ds_name, repeats, ratio, trial):
 		

 	save_file_suffix = '.' + ds_name + '.repeats_' + str(repeats) + '.ratio_' + "{:.2f}".format(ratio) + '.trial_' + str(trial)
 	
 	"""**1.   Get dataset.**"""
 	dataset = get_dataset(ds_name)

 	# Return if the file exists.
 	if os.path.isfile(save_dir + 'ged_matrix' + save_file_suffix + '.pkl'):
 		return None, None

 	"""**2.  Set parameters.**"""

@@ -83,6 +84,12 @@ def xp_compute_ged_matrix(dataset, ds_name, repeats, ratio, trial):

 	
 def save_trials_as_group(dataset, ds_name, repeats, ratio):
 	# Return if the group file exists.
 	name_middle = '.' + ds_name + '.repeats_' + str(repeats) + '.ratio_' + "{:.2f}".format(ratio) + '.'
 	name_group = save_dir + 'groups/ged_mats' +  name_middle + 'npy'
 	if os.path.isfile(name_group):
 		return
 	
 	ged_mats = []
 	runtimes = []
 	for trial in range(1, 101):
@@ -93,25 +100,35 @@ def save_trials_as_group(dataset, ds_name, repeats, ratio):
 		runtimes.append(runtime)
 		
 	# Group trials and Remove single files.
 	name_middle = '.' + ds_name + '.repeats_' + str(repeats) + '.ratio_' + "{:.2f}".format(ratio) + '.'
 	name_prefix = 'ged_matrix' + name_middle
 	group_trials(save_dir, name_prefix, True, True, False)
 	name_prefix = 'runtime' + name_middle
 	group_trials(save_dir, name_prefix, True, True, False)
 	
 	


 def results_for_a_dataset(ds_name):
 	"""**1.   Get dataset.**"""
 	dataset = get_dataset(ds_name)
 	
 	for repeats in [1, 20, 40, 60, 80, 100]:
 	for repeats in repeats_list:
 		print()
 		print('Repeats:', repeats)
 		for ratio in [0.1, 0.3, 0.5, 0.7, 0.9, 1, 3, 5, 7, 9]:
 		for ratio in ratio_list:
 			print()
 			print('Ratio:', ratio)
 			save_trials_as_group(dataset, ds_name, repeats, ratio)
 			
 			
 def get_param_lists(ds_name):
 	if ds_name == 'AIDS_symb':
 		repeats_list = [1, 20, 40, 60, 80, 100]
 		ratio_list = [0.1, 0.3, 0.5, 0.7, 0.9, 1, 3, 5, 7, 9]
 	else:
 		repeats_list = [1, 20, 40, 60, 80, 100]
 		ratio_list = [0.1, 0.3, 0.5, 0.7, 0.9, 1, 3, 5, 7, 9]
 		
 	return repeats_list, ratio_list
 				

 if __name__ == '__main__':
 	if len(sys.argv) > 1:
@@ -126,4 +143,5 @@ if __name__ == '__main__':
 	for ds_name in ds_name_list:
 		print()
 		print('Dataset:', ds_name)
 		repeats_list, ratio_list = get_param_lists(ds_name)
 		results_for_a_dataset(ds_name)
--- a/gklearn/experiments/ged/stability/run_job_edit_costs.N.py
+++ b/gklearn/experiments/ged/stability/run_job_edit_costs.N.py
@@ -0,0 +1,56 @@
 #!/usr/bin/env python3
 # -*- coding: utf-8 -*-
 """
 Created on Tue Nov  3 20:23:25 2020

@author: ljia
 """
 import os
 import re


 def get_job_script(arg, params):
 	ged_method = params[0]
 	multi_method = params[1]
 	job_name_label = r"rep." if multi_method == 'repeats' else r""
 	script = r"""
 #!/bin/bash

 #SBATCH --exclusive
 #SBATCH --job-name="st.""" + job_name_label + r"N" + arg + r"." + ged_method + r""""
 #SBATCH --partition=tlong
 #SBATCH --mail-type=ALL
 #SBATCH --mail-user=jajupmochi@gmail.com
 #SBATCH --output="outputs/output_edit_costs.""" + multi_method + r".N." + ged_method + r"." + arg + r""".txt"
 #SBATCH --error="errors/error_edit_costs.""" + multi_method + r".N." + ged_method + r"." + arg + r""".txt"
 #
 #SBATCH --ntasks=1
 #SBATCH --nodes=1
 #SBATCH --cpus-per-task=1
 #SBATCH --time=300:00:00
 #SBATCH --mem-per-cpu=4000

 srun hostname
 srun cd /home/2019015/ljia02/graphkit-learn/gklearn/experiments/ged/stability
 srun python3 edit_costs.""" + multi_method + r".N." + ged_method + r".py " + arg
 	script = script.strip()
 	script = re.sub('\n\t+', '\n', script)
 	script = re.sub('\n +', '\n', script)
 	
 	return script

 if __name__ == '__main__':
 	
 	params_list = [('IPFP', 'nums_sols'), 
 				   ('IPFP', 'repeats'), 
 				   ('bipartite', 'max_num_sols'), 
 				   ('bipartite', 'repeats')]
 	N_list = [10, 50, 100]
 	for params in params_list[1:]:
 		for N in [N_list[i] for i in [0, 1, 2]]:
 			job_script = get_job_script(str(N), params)
 			command = 'sbatch <<EOF\n' + job_script + '\nEOF'
 # 			print(command)
 			os.system(command)
 	# 		os.popen(command)
 	# 		output = stream.readlines()
--- a/gklearn/experiments/ged/stability/run_job_edit_costs.max_nums_sols.ratios.bipartite.py
+++ b/gklearn/experiments/ged/stability/run_job_edit_costs.max_nums_sols.ratios.bipartite.py
@@ -0,0 +1,47 @@
 #!/usr/bin/env python3
 # -*- coding: utf-8 -*-
 """
 Created on Tue Nov  3 20:23:25 2020

@author: ljia
 """
 import os
 import re


 def get_job_script(arg):
 	script = r"""
 #!/bin/bash

 #SBATCH --exclusive
 #SBATCH --job-name="st.""" + arg + r""".bp"
 #SBATCH --partition=tlong
 #SBATCH --mail-type=ALL
 #SBATCH --mail-user=jajupmochi@gmail.com
 #SBATCH --output="outputs/output_edit_costs.max_num_sols.ratios.bipartite.""" + arg + """.txt"
 #SBATCH --error="errors/error_edit_costs.max_num_sols.ratios.bipartite.""" + arg + """.txt"
 #
 #SBATCH --ntasks=1
 #SBATCH --nodes=1
 #SBATCH --cpus-per-task=1
 #SBATCH --time=300:00:00
 #SBATCH --mem-per-cpu=4000

 srun hostname
 srun cd /home/2019015/ljia02/graphkit-learn/gklearn/experiments/ged/stability
 srun python3 edit_costs.max_nums_sols.ratios.bipartite.py """ + arg
 	script = script.strip()
 	script = re.sub('\n\t+', '\n', script)
 	script = re.sub('\n +', '\n', script)
 	
 	return script

 if __name__ == '__main__':
 	ds_list = ['MAO', 'Monoterpenoides', 'MUTAG', 'AIDS_symb']
 	for ds_name in [ds_list[i] for i in [0, 1, 2, 3]]:
 		job_script = get_job_script(ds_name)
 		command = 'sbatch <<EOF\n' + job_script + '\nEOF'
 # 		print(command)
 		os.system(command)
 # 		os.popen(command)
 # 		output = stream.readlines()
--- a/gklearn/experiments/ged/stability/run_job_edit_costs.repeats.ratios.IPFP.py
+++ b/gklearn/experiments/ged/stability/run_job_edit_costs.repeats.ratios.IPFP.py
@@ -0,0 +1,47 @@
 #!/usr/bin/env python3
 # -*- coding: utf-8 -*-
 """
 Created on Tue Nov  3 20:23:25 2020

@author: ljia
 """
 import os
 import re


 def get_job_script(arg):
 	script = r"""
 #!/bin/bash

 #SBATCH --exclusive
 #SBATCH --job-name="st.rep.""" + arg + r""".IPFP"
 #SBATCH --partition=tlong
 #SBATCH --mail-type=ALL
 #SBATCH --mail-user=jajupmochi@gmail.com
 #SBATCH --output="outputs/output_edit_costs.repeats.ratios.IPFP.""" + arg + """.txt"
 #SBATCH --error="errors/error_edit_costs.repeats.ratios.IPFP.""" + arg + """.txt"
 #
 #SBATCH --ntasks=1
 #SBATCH --nodes=1
 #SBATCH --cpus-per-task=1
 #SBATCH --time=300:00:00
 #SBATCH --mem-per-cpu=4000

 srun hostname
 srun cd /home/2019015/ljia02/graphkit-learn/gklearn/experiments/ged/stability
 srun python3 edit_costs.repeats.ratios.IPFP.py """ + arg
 	script = script.strip()
 	script = re.sub('\n\t+', '\n', script)
 	script = re.sub('\n +', '\n', script)
 	
 	return script

 if __name__ == '__main__':
 	ds_list = ['MAO', 'Monoterpenoides', 'MUTAG', 'AIDS_symb']
 	for ds_name in [ds_list[i] for i in [0, 3]]:
 		job_script = get_job_script(ds_name)
 		command = 'sbatch <<EOF\n' + job_script + '\nEOF'
 # 		print(command)
 		os.system(command)
 # 		os.popen(command)
 # 		output = stream.readlines()
--- a/gklearn/experiments/ged/stability/run_job_edit_costs.repeats.ratios.bipartite.py
+++ b/gklearn/experiments/ged/stability/run_job_edit_costs.repeats.ratios.bipartite.py
@@ -0,0 +1,47 @@
 #!/usr/bin/env python3
 # -*- coding: utf-8 -*-
 """
 Created on Tue Nov  3 20:23:25 2020

@author: ljia
 """
 import os
 import re


 def get_job_script(arg):
 	script = r"""
 #!/bin/bash

 #SBATCH --exclusive
 #SBATCH --job-name="st.rep.""" + arg + r""".bp"
 #SBATCH --partition=tlong
 #SBATCH --mail-type=ALL
 #SBATCH --mail-user=jajupmochi@gmail.com
 #SBATCH --output="outputs/output_edit_costs.repeats.ratios.bipartite.""" + arg + """.txt"
 #SBATCH --error="errors/error_edit_costs.repeats.ratios.bipartite.""" + arg + """.txt"
 #
 #SBATCH --ntasks=1
 #SBATCH --nodes=1
 #SBATCH --cpus-per-task=1
 #SBATCH --time=300:00:00
 #SBATCH --mem-per-cpu=4000

 srun hostname
 srun cd /home/2019015/ljia02/graphkit-learn/gklearn/experiments/ged/stability
 srun python3 edit_costs.repeats.ratios.bipartite.py """ + arg
 	script = script.strip()
 	script = re.sub('\n\t+', '\n', script)
 	script = re.sub('\n +', '\n', script)
 	
 	return script

 if __name__ == '__main__':
 	ds_list = ['MAO', 'Monoterpenoides', 'MUTAG', 'AIDS_symb']
 	for ds_name in [ds_list[i] for i in [0, 1, 2, 3]]:
 		job_script = get_job_script(ds_name)
 		command = 'sbatch <<EOF\n' + job_script + '\nEOF'
 # 		print(command)
 		os.system(command)
 # 		os.popen(command)
 # 		output = stream.readlines()