Browse Source

Add exps: GED stability on synthesize graphs.

v0.2.x
jajupmochi 4 years ago
parent
commit
d3daa95141
12 changed files with 838 additions and 29 deletions
  1. +142
    -0
      gklearn/experiments/ged/stability/edit_costs.max_num_sols.N.bipartite.py
  2. +25
    -7
      gklearn/experiments/ged/stability/edit_costs.max_num_sols.ratios.bipartite.py
  3. +137
    -0
      gklearn/experiments/ged/stability/edit_costs.nums_sols.N.IPFP.py
  4. +5
    -5
      gklearn/experiments/ged/stability/edit_costs.nums_sols.ratios.IPFP.py
  5. +137
    -0
      gklearn/experiments/ged/stability/edit_costs.repeats.N.IPFP.py
  6. +142
    -0
      gklearn/experiments/ged/stability/edit_costs.repeats.N.bipartite.py
  7. +26
    -8
      gklearn/experiments/ged/stability/edit_costs.repeats.ratios.IPFP.py
  8. +27
    -9
      gklearn/experiments/ged/stability/edit_costs.repeats.ratios.bipartite.py
  9. +56
    -0
      gklearn/experiments/ged/stability/run_job_edit_costs.N.py
  10. +47
    -0
      gklearn/experiments/ged/stability/run_job_edit_costs.max_nums_sols.ratios.bipartite.py
  11. +47
    -0
      gklearn/experiments/ged/stability/run_job_edit_costs.repeats.ratios.IPFP.py
  12. +47
    -0
      gklearn/experiments/ged/stability/run_job_edit_costs.repeats.ratios.bipartite.py

+ 142
- 0
gklearn/experiments/ged/stability/edit_costs.max_num_sols.N.bipartite.py View File

@@ -0,0 +1,142 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Wed Oct 20 11:48:02 2020

@author: ljia
"""
# This script tests the influence of the ratios between node costs and edge costs on the stability of the GED computation, where the base edit costs are [1, 1, 1, 1, 1, 1].

import os
import multiprocessing
import pickle
import logging
from gklearn.ged.util import compute_geds
import time
import sys
from group_results import group_trials


def generate_graphs():
from gklearn.utils.graph_synthesizer import GraphSynthesizer
gsyzer = GraphSynthesizer()
graphs = gsyzer.unified_graphs(num_graphs=100, num_nodes=20, num_edges=20, num_node_labels=0, num_edge_labels=0, seed=None, directed=False)
return graphs


def xp_compute_ged_matrix(graphs, N, max_num_solutions, ratio, trial):

save_file_suffix = '.' + str(N) + '.mnum_sols_' + str(max_num_solutions) + '.ratio_' + "{:.2f}".format(ratio) + '.trial_' + str(trial)

# Return if the file exists.
if os.path.isfile(save_dir + 'ged_matrix' + save_file_suffix + '.pkl'):
return None, None

"""**2. Set parameters.**"""

# Parameters for GED computation.
ged_options = {'method': 'BIPARTITE', # use BIPARTITE huristic.
# 'initialization_method': 'RANDOM', # or 'NODE', etc. (for GEDEnv)
'lsape_model': 'ECBP', #
# ??when bigger than 1, then the method is considered mIPFP.
# the actual number of computed solutions might be smaller than the specified value
'max_num_solutions': max_num_solutions,
'edit_cost': 'CONSTANT', # use CONSTANT cost.
'greedy_method': 'BASIC', #
# the distance between non-symbolic node/edge labels is computed by euclidean distance.
'attr_distance': 'euclidean',
'optimal': True, # if TRUE, the option --greedy-method has no effect
# parallel threads. Do not work if mpg_options['parallel'] = False.
'threads': multiprocessing.cpu_count(),
'centrality_method': 'NONE',
'centrality_weight': 0.7,
'init_option': 'EAGER_WITHOUT_SHUFFLED_COPIES'
}
edit_cost_constants = [i * ratio for i in [1, 1, 1]] + [1, 1, 1]
# edit_cost_constants = [item * 0.01 for item in edit_cost_constants]
# pickle.dump(edit_cost_constants, open(save_dir + "edit_costs" + save_file_suffix + ".pkl", "wb"))

options = ged_options.copy()
options['edit_cost_constants'] = edit_cost_constants
options['node_labels'] = []
options['edge_labels'] = []
options['node_attrs'] = []
options['edge_attrs'] = []
parallel = True # if num_solutions == 1 else False
"""**5. Compute GED matrix.**"""
ged_mat = 'error'
runtime = 0
try:
time0 = time.time()
ged_vec_init, ged_mat, n_edit_operations = compute_geds(graphs, options=options, repeats=1, parallel=parallel, verbose=True)
runtime = time.time() - time0
except Exception as exp:
print('An exception occured when running this experiment:')
LOG_FILENAME = save_dir + 'error.txt'
logging.basicConfig(filename=LOG_FILENAME, level=logging.DEBUG)
logging.exception(save_file_suffix)
print(repr(exp))
"""**6. Get results.**"""
with open(save_dir + 'ged_matrix' + save_file_suffix + '.pkl', 'wb') as f:
pickle.dump(ged_mat, f)
with open(save_dir + 'runtime' + save_file_suffix + '.pkl', 'wb') as f:
pickle.dump(runtime, f)

return ged_mat, runtime

def save_trials_as_group(graphs, N, max_num_solutions, ratio):
# Return if the group file exists.
name_middle = '.' + str(N) + '.mnum_sols_' + str(max_num_solutions) + '.ratio_' + "{:.2f}".format(ratio) + '.'
name_group = save_dir + 'groups/ged_mats' + name_middle + 'npy'
if os.path.isfile(name_group):
return
ged_mats = []
runtimes = []
for trial in range(1, 101):
print()
print('Trial:', trial)
ged_mat, runtime = xp_compute_ged_matrix(graphs, N, max_num_solutions, ratio, trial)
ged_mats.append(ged_mat)
runtimes.append(runtime)
# Group trials and Remove single files.
name_prefix = 'ged_matrix' + name_middle
group_trials(save_dir, name_prefix, True, True, False)
name_prefix = 'runtime' + name_middle
group_trials(save_dir, name_prefix, True, True, False)


def results_for_a_ratio(ratio):
for N in N_list:
print()
print('# of graphs:', N)
for max_num_solutions in [1, 20, 40, 60, 80, 100]:
print()
print('Max # of solutions:', max_num_solutions)
save_trials_as_group(graphs[:N], N, max_num_solutions, ratio)

if __name__ == '__main__':
if len(sys.argv) > 1:
N_list = [int(i) for i in sys.argv[1:]]
else:
N_list = [10, 50, 100]
# Generate graphs.
graphs = generate_graphs()
save_dir = 'outputs/edit_costs.max_num_sols.N.bipartite/'
os.makedirs(save_dir, exist_ok=True)
os.makedirs(save_dir + 'groups/', exist_ok=True)
for ratio in [10, 1, 0.1]:
print()
print('Ratio:', ratio)
results_for_a_ratio(ratio)

+ 25
- 7
gklearn/experiments/ged/stability/edit_costs.max_num_sols.ratios.bipartite.py View File

@@ -22,8 +22,9 @@ def xp_compute_ged_matrix(dataset, ds_name, max_num_solutions, ratio, trial):
save_file_suffix = '.' + ds_name + '.mnum_sols_' + str(max_num_solutions) + '.ratio_' + "{:.2f}".format(ratio) + '.trial_' + str(trial)
"""**1. Get dataset.**"""
dataset = get_dataset(ds_name)
# Return if the file exists.
if os.path.isfile(save_dir + 'ged_matrix' + save_file_suffix + '.pkl'):
return None, None

"""**2. Set parameters.**"""

@@ -83,6 +84,12 @@ def xp_compute_ged_matrix(dataset, ds_name, max_num_solutions, ratio, trial):

def save_trials_as_group(dataset, ds_name, max_num_solutions, ratio):
# Return if the group file exists.
name_middle = '.' + ds_name + '.mnum_sols_' + str(max_num_solutions) + '.ratio_' + "{:.2f}".format(ratio) + '.'
name_group = save_dir + 'groups/ged_mats' + name_middle + 'npy'
if os.path.isfile(name_group):
return
ged_mats = []
runtimes = []
for trial in range(1, 101):
@@ -93,25 +100,35 @@ def save_trials_as_group(dataset, ds_name, max_num_solutions, ratio):
runtimes.append(runtime)
# Group trials and Remove single files.
name_middle = '.' + ds_name + '.mnum_sols_' + str(max_num_solutions) + '.ratio_' + "{:.2f}".format(ratio) + '.'
name_prefix = 'ged_matrix' + name_middle
group_trials(save_dir, name_prefix, True, True, False)
name_prefix = 'runtime' + name_middle
group_trials(save_dir, name_prefix, True, True, False)
def results_for_a_dataset(ds_name):
"""**1. Get dataset.**"""
dataset = get_dataset(ds_name)
for max_num_solutions in [1, 20, 40, 60, 80, 100]:
for max_num_solutions in mnum_solutions_list:
print()
print('Max # of solutions:', max_num_solutions)
for ratio in [0.1, 0.3, 0.5, 0.7, 0.9, 1, 3, 5, 7, 9]:
for ratio in ratio_list:
print()
print('Ratio:', ratio)
save_trials_as_group(dataset, ds_name, max_num_solutions, ratio)
def get_param_lists(ds_name):
if ds_name == 'AIDS_symb':
mnum_solutions_list = [1, 20, 40, 60, 80, 100]
ratio_list = [0.1, 0.3, 0.5, 0.7, 0.9, 1, 3, 5, 7, 9]
else:
mnum_solutions_list = [1, 20, 40, 60, 80, 100]
ratio_list = [0.1, 0.3, 0.5, 0.7, 0.9, 1, 3, 5, 7, 9]
return mnum_solutions_list, ratio_list

if __name__ == '__main__':
if len(sys.argv) > 1:
@@ -126,4 +143,5 @@ if __name__ == '__main__':
for ds_name in ds_name_list:
print()
print('Dataset:', ds_name)
mnum_solutions_list, ratio_list = get_param_lists(ds_name)
results_for_a_dataset(ds_name)

+ 137
- 0
gklearn/experiments/ged/stability/edit_costs.nums_sols.N.IPFP.py View File

@@ -0,0 +1,137 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Wed Oct 20 11:48:02 2020

@author: ljia
"""
# This script tests the influence of the ratios between node costs and edge costs on the stability of the GED computation, where the base edit costs are [1, 1, 1, 1, 1, 1].

import os
import multiprocessing
import pickle
import logging
from gklearn.ged.util import compute_geds
import time
import sys
from group_results import group_trials


def generate_graphs():
from gklearn.utils.graph_synthesizer import GraphSynthesizer
gsyzer = GraphSynthesizer()
graphs = gsyzer.unified_graphs(num_graphs=100, num_nodes=20, num_edges=20, num_node_labels=0, num_edge_labels=0, seed=None, directed=False)
return graphs


def xp_compute_ged_matrix(graphs, N, num_solutions, ratio, trial):

save_file_suffix = '.' + str(N) + '.num_sols_' + str(num_solutions) + '.ratio_' + "{:.2f}".format(ratio) + '.trial_' + str(trial)

# Return if the file exists.
if os.path.isfile(save_dir + 'ged_matrix' + save_file_suffix + '.pkl'):
return None, None

"""**2. Set parameters.**"""

# Parameters for GED computation.
ged_options = {'method': 'IPFP', # use IPFP huristic.
'initialization_method': 'RANDOM', # or 'NODE', etc.
# when bigger than 1, then the method is considered mIPFP.
'initial_solutions': int(num_solutions * 4),
'edit_cost': 'CONSTANT', # use CONSTANT cost.
# the distance between non-symbolic node/edge labels is computed by euclidean distance.
'attr_distance': 'euclidean',
'ratio_runs_from_initial_solutions': 0.25,
# parallel threads. Do not work if mpg_options['parallel'] = False.
'threads': multiprocessing.cpu_count(),
'init_option': 'EAGER_WITHOUT_SHUFFLED_COPIES'
}
edit_cost_constants = [i * ratio for i in [1, 1, 1]] + [1, 1, 1]
# edit_cost_constants = [item * 0.01 for item in edit_cost_constants]
# pickle.dump(edit_cost_constants, open(save_dir + "edit_costs" + save_file_suffix + ".pkl", "wb"))

options = ged_options.copy()
options['edit_cost_constants'] = edit_cost_constants
options['node_labels'] = []
options['edge_labels'] = []
options['node_attrs'] = []
options['edge_attrs'] = []
parallel = True # if num_solutions == 1 else False
"""**5. Compute GED matrix.**"""
ged_mat = 'error'
runtime = 0
try:
time0 = time.time()
ged_vec_init, ged_mat, n_edit_operations = compute_geds(graphs, options=options, repeats=1, parallel=parallel, verbose=True)
runtime = time.time() - time0
except Exception as exp:
print('An exception occured when running this experiment:')
LOG_FILENAME = save_dir + 'error.txt'
logging.basicConfig(filename=LOG_FILENAME, level=logging.DEBUG)
logging.exception(save_file_suffix)
print(repr(exp))
"""**6. Get results.**"""
with open(save_dir + 'ged_matrix' + save_file_suffix + '.pkl', 'wb') as f:
pickle.dump(ged_mat, f)
with open(save_dir + 'runtime' + save_file_suffix + '.pkl', 'wb') as f:
pickle.dump(runtime, f)

return ged_mat, runtime

def save_trials_as_group(graphs, N, num_solutions, ratio):
# Return if the group file exists.
name_middle = '.' + str(N) + '.num_sols_' + str(num_solutions) + '.ratio_' + "{:.2f}".format(ratio) + '.'
name_group = save_dir + 'groups/ged_mats' + name_middle + 'npy'
if os.path.isfile(name_group):
return
ged_mats = []
runtimes = []
for trial in range(1, 101):
print()
print('Trial:', trial)
ged_mat, runtime = xp_compute_ged_matrix(graphs, N, num_solutions, ratio, trial)
ged_mats.append(ged_mat)
runtimes.append(runtime)
# Group trials and Remove single files.
name_prefix = 'ged_matrix' + name_middle
group_trials(save_dir, name_prefix, True, True, False)
name_prefix = 'runtime' + name_middle
group_trials(save_dir, name_prefix, True, True, False)


def results_for_a_ratio(ratio):
for N in N_list:
print()
print('# of graphs:', N)
for num_solutions in [1, 20, 40, 60, 80, 100]:
print()
print('# of solutions:', num_solutions)
save_trials_as_group(graphs[:N], N, num_solutions, ratio)

if __name__ == '__main__':
if len(sys.argv) > 1:
N_list = [int(i) for i in sys.argv[1:]]
else:
N_list = [10, 50, 100]
# Generate graphs.
graphs = generate_graphs()
save_dir = 'outputs/edit_costs.num_sols.N.IPFP/'
os.makedirs(save_dir, exist_ok=True)
os.makedirs(save_dir + 'groups/', exist_ok=True)
for ratio in [10, 1, 0.1]:
print()
print('Ratio:', ratio)
results_for_a_ratio(ratio)

+ 5
- 5
gklearn/experiments/ged/stability/edit_costs.nums_sols.ratios.IPFP.py View File

@@ -59,7 +59,7 @@ def xp_compute_ged_matrix(dataset, ds_name, num_solutions, ratio, trial):
runtime = 0
try:
time0 = time.time()
ged_vec_init, ged_mat, n_edit_operations = compute_geds(dataset.graphs, options=options, parallel=parallel, verbose=True)
ged_vec_init, ged_mat, n_edit_operations = compute_geds(dataset.graphs, options=options, repeats=1, parallel=parallel, verbose=True)
runtime = time.time() - time0
except Exception as exp:
print('An exception occured when running this experiment:')
@@ -74,9 +74,9 @@ def xp_compute_ged_matrix(dataset, ds_name, num_solutions, ratio, trial):
pickle.dump(ged_mat, f)
with open(save_dir + 'runtime' + save_file_suffix + '.pkl', 'wb') as f:
pickle.dump(runtime, f)
return ged_mat, runtime
def save_trials_as_group(dataset, ds_name, num_solutions, ratio):
# Return if the group file exists.
@@ -99,8 +99,8 @@ def save_trials_as_group(dataset, ds_name, num_solutions, ratio):
group_trials(save_dir, name_prefix, True, True, False)
name_prefix = 'runtime' + name_middle
group_trials(save_dir, name_prefix, True, True, False)
def results_for_a_dataset(ds_name):
"""**1. Get dataset.**"""
dataset = get_dataset(ds_name)


+ 137
- 0
gklearn/experiments/ged/stability/edit_costs.repeats.N.IPFP.py View File

@@ -0,0 +1,137 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Wed Oct 20 11:48:02 2020

@author: ljia
"""
# This script tests the influence of the ratios between node costs and edge costs on the stability of the GED computation, where the base edit costs are [1, 1, 1, 1, 1, 1].

import os
import multiprocessing
import pickle
import logging
from gklearn.ged.util import compute_geds
import time
import sys
from group_results import group_trials


def generate_graphs():
from gklearn.utils.graph_synthesizer import GraphSynthesizer
gsyzer = GraphSynthesizer()
graphs = gsyzer.unified_graphs(num_graphs=100, num_nodes=20, num_edges=20, num_node_labels=0, num_edge_labels=0, seed=None, directed=False)
return graphs


def xp_compute_ged_matrix(graphs, N, repeats, ratio, trial):

save_file_suffix = '.' + str(N) + '.repeats_' + str(repeats) + '.ratio_' + "{:.2f}".format(ratio) + '.trial_' + str(trial)

# Return if the file exists.
if os.path.isfile(save_dir + 'ged_matrix' + save_file_suffix + '.pkl'):
return None, None

"""**2. Set parameters.**"""

# Parameters for GED computation.
ged_options = {'method': 'IPFP', # use IPFP huristic.
'initialization_method': 'RANDOM', # or 'NODE', etc.
# when bigger than 1, then the method is considered mIPFP.
'initial_solutions': 1,
'edit_cost': 'CONSTANT', # use CONSTANT cost.
# the distance between non-symbolic node/edge labels is computed by euclidean distance.
'attr_distance': 'euclidean',
'ratio_runs_from_initial_solutions': 1,
# parallel threads. Do not work if mpg_options['parallel'] = False.
'threads': multiprocessing.cpu_count(),
'init_option': 'EAGER_WITHOUT_SHUFFLED_COPIES'
}
edit_cost_constants = [i * ratio for i in [1, 1, 1]] + [1, 1, 1]
# edit_cost_constants = [item * 0.01 for item in edit_cost_constants]
# pickle.dump(edit_cost_constants, open(save_dir + "edit_costs" + save_file_suffix + ".pkl", "wb"))

options = ged_options.copy()
options['edit_cost_constants'] = edit_cost_constants
options['node_labels'] = []
options['edge_labels'] = []
options['node_attrs'] = []
options['edge_attrs'] = []
parallel = True # if num_solutions == 1 else False
"""**5. Compute GED matrix.**"""
ged_mat = 'error'
runtime = 0
try:
time0 = time.time()
ged_vec_init, ged_mat, n_edit_operations = compute_geds(graphs, options=options, repeats=repeats, parallel=parallel, verbose=True)
runtime = time.time() - time0
except Exception as exp:
print('An exception occured when running this experiment:')
LOG_FILENAME = save_dir + 'error.txt'
logging.basicConfig(filename=LOG_FILENAME, level=logging.DEBUG)
logging.exception(save_file_suffix)
print(repr(exp))
"""**6. Get results.**"""
with open(save_dir + 'ged_matrix' + save_file_suffix + '.pkl', 'wb') as f:
pickle.dump(ged_mat, f)
with open(save_dir + 'runtime' + save_file_suffix + '.pkl', 'wb') as f:
pickle.dump(runtime, f)

return ged_mat, runtime

def save_trials_as_group(graphs, N, repeats, ratio):
# Return if the group file exists.
name_middle = '.' + str(N) + '.repeats_' + str(repeats) + '.ratio_' + "{:.2f}".format(ratio) + '.'
name_group = save_dir + 'groups/ged_mats' + name_middle + 'npy'
if os.path.isfile(name_group):
return
ged_mats = []
runtimes = []
for trial in range(1, 101):
print()
print('Trial:', trial)
ged_mat, runtime = xp_compute_ged_matrix(graphs, N, repeats, ratio, trial)
ged_mats.append(ged_mat)
runtimes.append(runtime)
# Group trials and Remove single files.
name_prefix = 'ged_matrix' + name_middle
group_trials(save_dir, name_prefix, True, True, False)
name_prefix = 'runtime' + name_middle
group_trials(save_dir, name_prefix, True, True, False)


def results_for_a_ratio(ratio):
for N in N_list:
print()
print('# of graphs:', N)
for repeats in [1, 20, 40, 60, 80, 100]:
print()
print('Repeats:', repeats)
save_trials_as_group(graphs[:N], N, repeats, ratio)

if __name__ == '__main__':
if len(sys.argv) > 1:
N_list = [int(i) for i in sys.argv[1:]]
else:
N_list = [10, 50, 100]
# Generate graphs.
graphs = generate_graphs()
save_dir = 'outputs/edit_costs.repeats.N.IPFP/'
os.makedirs(save_dir, exist_ok=True)
os.makedirs(save_dir + 'groups/', exist_ok=True)
for ratio in [10, 1, 0.1]:
print()
print('Ratio:', ratio)
results_for_a_ratio(ratio)

+ 142
- 0
gklearn/experiments/ged/stability/edit_costs.repeats.N.bipartite.py View File

@@ -0,0 +1,142 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Wed Oct 20 11:48:02 2020

@author: ljia
"""
# This script tests the influence of the ratios between node costs and edge costs on the stability of the GED computation, where the base edit costs are [1, 1, 1, 1, 1, 1].

import os
import multiprocessing
import pickle
import logging
from gklearn.ged.util import compute_geds
import time
import sys
from group_results import group_trials


def generate_graphs():
from gklearn.utils.graph_synthesizer import GraphSynthesizer
gsyzer = GraphSynthesizer()
graphs = gsyzer.unified_graphs(num_graphs=100, num_nodes=20, num_edges=20, num_node_labels=0, num_edge_labels=0, seed=None, directed=False)
return graphs


def xp_compute_ged_matrix(graphs, N, repeats, ratio, trial):

save_file_suffix = '.' + str(N) + '.repeats_' + str(repeats) + '.ratio_' + "{:.2f}".format(ratio) + '.trial_' + str(trial)

# Return if the file exists.
if os.path.isfile(save_dir + 'ged_matrix' + save_file_suffix + '.pkl'):
return None, None

"""**2. Set parameters.**"""

# Parameters for GED computation.
ged_options = {'method': 'BIPARTITE', # use BIPARTITE huristic.
# 'initialization_method': 'RANDOM', # or 'NODE', etc. (for GEDEnv)
'lsape_model': 'ECBP', #
# ??when bigger than 1, then the method is considered mIPFP.
# the actual number of computed solutions might be smaller than the specified value
'max_num_solutions': 1,
'edit_cost': 'CONSTANT', # use CONSTANT cost.
'greedy_method': 'BASIC', #
# the distance between non-symbolic node/edge labels is computed by euclidean distance.
'attr_distance': 'euclidean',
'optimal': True, # if TRUE, the option --greedy-method has no effect
# parallel threads. Do not work if mpg_options['parallel'] = False.
'threads': multiprocessing.cpu_count(),
'centrality_method': 'NONE',
'centrality_weight': 0.7,
'init_option': 'EAGER_WITHOUT_SHUFFLED_COPIES'
}
edit_cost_constants = [i * ratio for i in [1, 1, 1]] + [1, 1, 1]
# edit_cost_constants = [item * 0.01 for item in edit_cost_constants]
# pickle.dump(edit_cost_constants, open(save_dir + "edit_costs" + save_file_suffix + ".pkl", "wb"))

options = ged_options.copy()
options['edit_cost_constants'] = edit_cost_constants
options['node_labels'] = []
options['edge_labels'] = []
options['node_attrs'] = []
options['edge_attrs'] = []
parallel = True # if num_solutions == 1 else False
"""**5. Compute GED matrix.**"""
ged_mat = 'error'
runtime = 0
try:
time0 = time.time()
ged_vec_init, ged_mat, n_edit_operations = compute_geds(graphs, options=options, repeats=repeats, parallel=parallel, verbose=True)
runtime = time.time() - time0
except Exception as exp:
print('An exception occured when running this experiment:')
LOG_FILENAME = save_dir + 'error.txt'
logging.basicConfig(filename=LOG_FILENAME, level=logging.DEBUG)
logging.exception(save_file_suffix)
print(repr(exp))
"""**6. Get results.**"""
with open(save_dir + 'ged_matrix' + save_file_suffix + '.pkl', 'wb') as f:
pickle.dump(ged_mat, f)
with open(save_dir + 'runtime' + save_file_suffix + '.pkl', 'wb') as f:
pickle.dump(runtime, f)

return ged_mat, runtime

def save_trials_as_group(graphs, N, repeats, ratio):
# Return if the group file exists.
name_middle = '.' + str(N) + '.repeats_' + str(repeats) + '.ratio_' + "{:.2f}".format(ratio) + '.'
name_group = save_dir + 'groups/ged_mats' + name_middle + 'npy'
if os.path.isfile(name_group):
return
ged_mats = []
runtimes = []
for trial in range(1, 101):
print()
print('Trial:', trial)
ged_mat, runtime = xp_compute_ged_matrix(graphs, N, repeats, ratio, trial)
ged_mats.append(ged_mat)
runtimes.append(runtime)
# Group trials and Remove single files.
name_prefix = 'ged_matrix' + name_middle
group_trials(save_dir, name_prefix, True, True, False)
name_prefix = 'runtime' + name_middle
group_trials(save_dir, name_prefix, True, True, False)


def results_for_a_ratio(ratio):
for N in N_list:
print()
print('# of graphs:', N)
for repeats in [1, 20, 40, 60, 80, 100]:
print()
print('Repeats:', repeats)
save_trials_as_group(graphs[:N], N, repeats, ratio)

if __name__ == '__main__':
if len(sys.argv) > 1:
N_list = [int(i) for i in sys.argv[1:]]
else:
N_list = [10, 50, 100]
# Generate graphs.
graphs = generate_graphs()
save_dir = 'outputs/edit_costs.repeats.N.bipartite/'
os.makedirs(save_dir, exist_ok=True)
os.makedirs(save_dir + 'groups/', exist_ok=True)
for ratio in [10, 1, 0.1]:
print()
print('Ratio:', ratio)
results_for_a_ratio(ratio)

+ 26
- 8
gklearn/experiments/ged/stability/edit_costs.repeats.ratios.IPFP.py View File

@@ -19,11 +19,12 @@ from group_results import group_trials


def xp_compute_ged_matrix(dataset, ds_name, repeats, ratio, trial):
save_file_suffix = '.' + ds_name + '.repeats_' + str(repeats) + '.ratio_' + "{:.2f}".format(ratio) + '.trial_' + str(trial)
"""**1. Get dataset.**"""
dataset = get_dataset(ds_name)
# Return if the file exists.
if os.path.isfile(save_dir + 'ged_matrix' + save_file_suffix + '.pkl'):
return None, None

"""**2. Set parameters.**"""

@@ -78,6 +79,12 @@ def xp_compute_ged_matrix(dataset, ds_name, repeats, ratio, trial):

def save_trials_as_group(dataset, ds_name, repeats, ratio):
# Return if the group file exists.
name_middle = '.' + ds_name + '.repeats_' + str(repeats) + '.ratio_' + "{:.2f}".format(ratio) + '.'
name_group = save_dir + 'groups/ged_mats' + name_middle + 'npy'
if os.path.isfile(name_group):
return
ged_mats = []
runtimes = []
for trial in range(1, 101):
@@ -88,25 +95,35 @@ def save_trials_as_group(dataset, ds_name, repeats, ratio):
runtimes.append(runtime)
# Group trials and Remove single files.
name_middle = '.' + ds_name + '.repeats_' + str(repeats) + '.ratio_' + "{:.2f}".format(ratio) + '.'
name_prefix = 'ged_matrix' + name_middle
group_trials(save_dir, name_prefix, True, True, False)
name_prefix = 'runtime' + name_middle
group_trials(save_dir, name_prefix, True, True, False)
def results_for_a_dataset(ds_name):
"""**1. Get dataset.**"""
dataset = get_dataset(ds_name)
for repeats in [1, 20, 40, 60, 80, 100]:
for repeats in repeats_list:
print()
print('Repeats:', repeats)
for ratio in [0.1, 0.3, 0.5, 0.7, 0.9, 1, 3, 5, 7, 9]:
for ratio in ratio_list:
print()
print('Ratio:', ratio)
save_trials_as_group(dataset, ds_name, repeats, ratio)
def get_param_lists(ds_name):
if ds_name == 'AIDS_symb':
repeats_list = [1, 20, 40, 60, 80, 100]
ratio_list = [0.1, 0.3, 0.5, 0.7, 0.9, 1, 3, 5, 7, 9]
else:
repeats_list = [1, 20, 40, 60, 80, 100]
ratio_list = [0.1, 0.3, 0.5, 0.7, 0.9, 1, 3, 5, 7, 9]
return repeats_list, ratio_list

if __name__ == '__main__':
if len(sys.argv) > 1:
@@ -121,4 +138,5 @@ if __name__ == '__main__':
for ds_name in ds_name_list:
print()
print('Dataset:', ds_name)
repeats_list, ratio_list = get_param_lists(ds_name)
results_for_a_dataset(ds_name)

+ 27
- 9
gklearn/experiments/ged/stability/edit_costs.repeats.ratios.bipartite.py View File

@@ -19,11 +19,12 @@ from group_results import group_trials


def xp_compute_ged_matrix(dataset, ds_name, repeats, ratio, trial):
save_file_suffix = '.' + ds_name + '.repeats_' + str(repeats) + '.ratio_' + "{:.2f}".format(ratio) + '.trial_' + str(trial)
"""**1. Get dataset.**"""
dataset = get_dataset(ds_name)

# Return if the file exists.
if os.path.isfile(save_dir + 'ged_matrix' + save_file_suffix + '.pkl'):
return None, None

"""**2. Set parameters.**"""

@@ -83,6 +84,12 @@ def xp_compute_ged_matrix(dataset, ds_name, repeats, ratio, trial):

def save_trials_as_group(dataset, ds_name, repeats, ratio):
# Return if the group file exists.
name_middle = '.' + ds_name + '.repeats_' + str(repeats) + '.ratio_' + "{:.2f}".format(ratio) + '.'
name_group = save_dir + 'groups/ged_mats' + name_middle + 'npy'
if os.path.isfile(name_group):
return
ged_mats = []
runtimes = []
for trial in range(1, 101):
@@ -93,25 +100,35 @@ def save_trials_as_group(dataset, ds_name, repeats, ratio):
runtimes.append(runtime)
# Group trials and Remove single files.
name_middle = '.' + ds_name + '.repeats_' + str(repeats) + '.ratio_' + "{:.2f}".format(ratio) + '.'
name_prefix = 'ged_matrix' + name_middle
group_trials(save_dir, name_prefix, True, True, False)
name_prefix = 'runtime' + name_middle
group_trials(save_dir, name_prefix, True, True, False)
def results_for_a_dataset(ds_name):
"""**1. Get dataset.**"""
dataset = get_dataset(ds_name)
for repeats in [1, 20, 40, 60, 80, 100]:
for repeats in repeats_list:
print()
print('Repeats:', repeats)
for ratio in [0.1, 0.3, 0.5, 0.7, 0.9, 1, 3, 5, 7, 9]:
for ratio in ratio_list:
print()
print('Ratio:', ratio)
save_trials_as_group(dataset, ds_name, repeats, ratio)
def get_param_lists(ds_name):
if ds_name == 'AIDS_symb':
repeats_list = [1, 20, 40, 60, 80, 100]
ratio_list = [0.1, 0.3, 0.5, 0.7, 0.9, 1, 3, 5, 7, 9]
else:
repeats_list = [1, 20, 40, 60, 80, 100]
ratio_list = [0.1, 0.3, 0.5, 0.7, 0.9, 1, 3, 5, 7, 9]
return repeats_list, ratio_list

if __name__ == '__main__':
if len(sys.argv) > 1:
@@ -126,4 +143,5 @@ if __name__ == '__main__':
for ds_name in ds_name_list:
print()
print('Dataset:', ds_name)
repeats_list, ratio_list = get_param_lists(ds_name)
results_for_a_dataset(ds_name)

+ 56
- 0
gklearn/experiments/ged/stability/run_job_edit_costs.N.py View File

@@ -0,0 +1,56 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Tue Nov 3 20:23:25 2020

@author: ljia
"""
import os
import re


def get_job_script(arg, params):
ged_method = params[0]
multi_method = params[1]
job_name_label = r"rep." if multi_method == 'repeats' else r""
script = r"""
#!/bin/bash

#SBATCH --exclusive
#SBATCH --job-name="st.""" + job_name_label + r"N" + arg + r"." + ged_method + r""""
#SBATCH --partition=tlong
#SBATCH --mail-type=ALL
#SBATCH --mail-user=jajupmochi@gmail.com
#SBATCH --output="outputs/output_edit_costs.""" + multi_method + r".N." + ged_method + r"." + arg + r""".txt"
#SBATCH --error="errors/error_edit_costs.""" + multi_method + r".N." + ged_method + r"." + arg + r""".txt"
#
#SBATCH --ntasks=1
#SBATCH --nodes=1
#SBATCH --cpus-per-task=1
#SBATCH --time=300:00:00
#SBATCH --mem-per-cpu=4000

srun hostname
srun cd /home/2019015/ljia02/graphkit-learn/gklearn/experiments/ged/stability
srun python3 edit_costs.""" + multi_method + r".N." + ged_method + r".py " + arg
script = script.strip()
script = re.sub('\n\t+', '\n', script)
script = re.sub('\n +', '\n', script)
return script

if __name__ == '__main__':
params_list = [('IPFP', 'nums_sols'),
('IPFP', 'repeats'),
('bipartite', 'max_num_sols'),
('bipartite', 'repeats')]
N_list = [10, 50, 100]
for params in params_list[1:]:
for N in [N_list[i] for i in [0, 1, 2]]:
job_script = get_job_script(str(N), params)
command = 'sbatch <<EOF\n' + job_script + '\nEOF'
# print(command)
os.system(command)
# os.popen(command)
# output = stream.readlines()

+ 47
- 0
gklearn/experiments/ged/stability/run_job_edit_costs.max_nums_sols.ratios.bipartite.py View File

@@ -0,0 +1,47 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Tue Nov 3 20:23:25 2020

@author: ljia
"""
import os
import re


def get_job_script(arg):
script = r"""
#!/bin/bash

#SBATCH --exclusive
#SBATCH --job-name="st.""" + arg + r""".bp"
#SBATCH --partition=tlong
#SBATCH --mail-type=ALL
#SBATCH --mail-user=jajupmochi@gmail.com
#SBATCH --output="outputs/output_edit_costs.max_num_sols.ratios.bipartite.""" + arg + """.txt"
#SBATCH --error="errors/error_edit_costs.max_num_sols.ratios.bipartite.""" + arg + """.txt"
#
#SBATCH --ntasks=1
#SBATCH --nodes=1
#SBATCH --cpus-per-task=1
#SBATCH --time=300:00:00
#SBATCH --mem-per-cpu=4000

srun hostname
srun cd /home/2019015/ljia02/graphkit-learn/gklearn/experiments/ged/stability
srun python3 edit_costs.max_nums_sols.ratios.bipartite.py """ + arg
script = script.strip()
script = re.sub('\n\t+', '\n', script)
script = re.sub('\n +', '\n', script)
return script

if __name__ == '__main__':
ds_list = ['MAO', 'Monoterpenoides', 'MUTAG', 'AIDS_symb']
for ds_name in [ds_list[i] for i in [0, 1, 2, 3]]:
job_script = get_job_script(ds_name)
command = 'sbatch <<EOF\n' + job_script + '\nEOF'
# print(command)
os.system(command)
# os.popen(command)
# output = stream.readlines()

+ 47
- 0
gklearn/experiments/ged/stability/run_job_edit_costs.repeats.ratios.IPFP.py View File

@@ -0,0 +1,47 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Tue Nov 3 20:23:25 2020

@author: ljia
"""
import os
import re


def get_job_script(arg):
script = r"""
#!/bin/bash

#SBATCH --exclusive
#SBATCH --job-name="st.rep.""" + arg + r""".IPFP"
#SBATCH --partition=tlong
#SBATCH --mail-type=ALL
#SBATCH --mail-user=jajupmochi@gmail.com
#SBATCH --output="outputs/output_edit_costs.repeats.ratios.IPFP.""" + arg + """.txt"
#SBATCH --error="errors/error_edit_costs.repeats.ratios.IPFP.""" + arg + """.txt"
#
#SBATCH --ntasks=1
#SBATCH --nodes=1
#SBATCH --cpus-per-task=1
#SBATCH --time=300:00:00
#SBATCH --mem-per-cpu=4000

srun hostname
srun cd /home/2019015/ljia02/graphkit-learn/gklearn/experiments/ged/stability
srun python3 edit_costs.repeats.ratios.IPFP.py """ + arg
script = script.strip()
script = re.sub('\n\t+', '\n', script)
script = re.sub('\n +', '\n', script)
return script

if __name__ == '__main__':
ds_list = ['MAO', 'Monoterpenoides', 'MUTAG', 'AIDS_symb']
for ds_name in [ds_list[i] for i in [0, 3]]:
job_script = get_job_script(ds_name)
command = 'sbatch <<EOF\n' + job_script + '\nEOF'
# print(command)
os.system(command)
# os.popen(command)
# output = stream.readlines()

+ 47
- 0
gklearn/experiments/ged/stability/run_job_edit_costs.repeats.ratios.bipartite.py View File

@@ -0,0 +1,47 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Tue Nov 3 20:23:25 2020

@author: ljia
"""
import os
import re


def get_job_script(arg):
script = r"""
#!/bin/bash

#SBATCH --exclusive
#SBATCH --job-name="st.rep.""" + arg + r""".bp"
#SBATCH --partition=tlong
#SBATCH --mail-type=ALL
#SBATCH --mail-user=jajupmochi@gmail.com
#SBATCH --output="outputs/output_edit_costs.repeats.ratios.bipartite.""" + arg + """.txt"
#SBATCH --error="errors/error_edit_costs.repeats.ratios.bipartite.""" + arg + """.txt"
#
#SBATCH --ntasks=1
#SBATCH --nodes=1
#SBATCH --cpus-per-task=1
#SBATCH --time=300:00:00
#SBATCH --mem-per-cpu=4000

srun hostname
srun cd /home/2019015/ljia02/graphkit-learn/gklearn/experiments/ged/stability
srun python3 edit_costs.repeats.ratios.bipartite.py """ + arg
script = script.strip()
script = re.sub('\n\t+', '\n', script)
script = re.sub('\n +', '\n', script)
return script

if __name__ == '__main__':
ds_list = ['MAO', 'Monoterpenoides', 'MUTAG', 'AIDS_symb']
for ds_name in [ds_list[i] for i in [0, 1, 2, 3]]:
job_script = get_job_script(ds_name)
command = 'sbatch <<EOF\n' + job_script + '\nEOF'
# print(command)
os.system(command)
# os.popen(command)
# output = stream.readlines()

Loading…
Cancel
Save