From 56bc9a8131298859b473107ff320401878240070 Mon Sep 17 00:00:00 2001 From: jajupmochi Date: Fri, 10 Apr 2020 16:11:20 +0200 Subject: [PATCH] Simplify function gklearn.utils.graph_file.load_from_ds(). --- gklearn/preimage/experiments/xp_median_preimage.py | 69 +++++++++++++++++++++- gklearn/utils/dataset.py | 3 +- gklearn/utils/graph_files.py | 62 ++++++++----------- 3 files changed, 95 insertions(+), 39 deletions(-) diff --git a/gklearn/preimage/experiments/xp_median_preimage.py b/gklearn/preimage/experiments/xp_median_preimage.py index d23a0c8..aed85cc 100644 --- a/gklearn/preimage/experiments/xp_median_preimage.py +++ b/gklearn/preimage/experiments/xp_median_preimage.py @@ -12,6 +12,70 @@ from gklearn.preimage.utils import generate_median_preimages_by_class from gklearn.utils import compute_gram_matrices_by_class +def xp_median_preimage_9_1(): + """xp 9_1: Acyclic, sspkernel, using CONSTANT. + """ + # set parameters. + ds_name = 'Acyclic' # + mpg_options = {'fit_method': 'k-graphs', + 'init_ecc': [4, 4, 2, 1, 1, 1], # + 'ds_name': ds_name, + 'parallel': True, # False + 'time_limit_in_sec': 0, + 'max_itrs': 100, # + 'max_itrs_without_update': 3, + 'epsilon_residual': 0.01, + 'epsilon_ec': 0.1, + 'verbose': 2} + mixkernel = functools.partial(kernelproduct, deltakernel, gaussiankernel) + sub_kernels = {'symb': deltakernel, 'nsymb': gaussiankernel, 'mix': mixkernel} + kernel_options = {'name': 'structuralspkernel', + 'edge_weight': None, + 'node_kernels': sub_kernels, + 'edge_kernels': sub_kernels, + 'compute_method': 'naive', + 'parallel': 'imap_unordered', + # 'parallel': None, + 'n_jobs': multiprocessing.cpu_count(), + 'normalize': True, + 'verbose': 2} + ged_options = {'method': 'IPFP', + 'initialization_method': 'RANDOM', # 'NODE' + 'initial_solutions': 10, # 1 + 'edit_cost': 'CONSTANT', # + 'attr_distance': 'euclidean', + 'ratio_runs_from_initial_solutions': 1, + 'threads': multiprocessing.cpu_count(), + 'init_option': 'EAGER_WITHOUT_SHUFFLED_COPIES'} + mge_options = {'init_type': 'MEDOID', + 'random_inits': 10, + 'time_limit': 600, + 'verbose': 2, + 'refine': False} + save_results = True + dir_save='../results/xp_median_preimage/' + irrelevant_labels = {'node_attrs': ['x', 'y', 'z'], 'edge_labels': ['bond_stereo']} # + edge_required = False # + + # print settings. + print('parameters:') + print('dataset name:', ds_name) + print('mpg_options:', mpg_options) + print('kernel_options:', kernel_options) + print('ged_options:', ged_options) + print('mge_options:', mge_options) + print('save_results:', save_results) + print('irrelevant_labels:', irrelevant_labels) + print() + + # generate preimages. + for fit_method in ['k-graphs', 'expert', 'random', 'random', 'random']: + print('\n-------------------------------------') + print('fit method:', fit_method, '\n') + mpg_options['fit_method'] = fit_method + generate_median_preimages_by_class(ds_name, mpg_options, kernel_options, ged_options, mge_options, save_results=save_results, save_medians=True, plot_medians=True, load_gm='auto', dir_save=dir_save, irrelevant_labels=irrelevant_labels, edge_required=edge_required) + + def xp_median_preimage_8_1(): """xp 8_1: Monoterpenoides, sspkernel, using CONSTANT. """ @@ -546,4 +610,7 @@ if __name__ == "__main__": # xp_median_preimage_7_1() #### xp 8_1: Monoterpenoides, sspkernel, using CONSTANT. - xp_median_preimage_8_1() \ No newline at end of file +# xp_median_preimage_8_1() + + #### xp 9_1: Acyclic, sspkernel, using CONSTANT. + xp_median_preimage_9_1() \ No newline at end of file diff --git a/gklearn/utils/dataset.py b/gklearn/utils/dataset.py index ed84725..9249c79 100644 --- a/gklearn/utils/dataset.py +++ b/gklearn/utils/dataset.py @@ -68,7 +68,8 @@ class Dataset(object): def load_predefined_dataset(self, ds_name): current_path = os.path.dirname(os.path.realpath(__file__)) + '/' if ds_name == 'Acyclic': - pass + ds_file = current_path + '../../datasets/Acyclic/dataset_bps.ds' + self.__graphs, self.__targets, label_names = load_dataset(ds_file) elif ds_name == 'COIL-DEL': ds_file = current_path + '../../datasets/COIL-DEL/COIL-DEL_A.txt' self.__graphs, self.__targets, label_names = load_dataset(ds_file) diff --git a/gklearn/utils/graph_files.py b/gklearn/utils/graph_files.py index fafecf6..f20d097 100644 --- a/gklearn/utils/graph_files.py +++ b/gklearn/utils/graph_files.py @@ -720,38 +720,26 @@ def load_from_ds(filename, filename_targets): label_names = {'node_labels': [], 'edge_labels': [], 'node_attrs': [], 'edge_attrs': []} content = open(filename).read().splitlines() extension = splitext(content[0].split(' ')[0])[1][1:] + if extension == 'ct': + load_file_fun = load_ct + elif extension == 'gxl': + load_file_fun = load_gxl + if filename_targets is None or filename_targets == '': - if extension == 'ct': - for i in range(0, len(content)): - tmp = content[i].split(' ') - # remove the '#'s in file names - g, l_names = load_ct(dirname_dataset + '/' + tmp[0].replace('#', '', 1)) - data.append(g) - __append_label_names(label_names, l_names) - y.append(float(tmp[1])) - elif extension == 'gxl': - for i in range(0, len(content)): - tmp = content[i].split(' ') - # remove the '#'s in file names - g, l_names = load_gxl(dirname_dataset + '/' + tmp[0].replace('#', '', 1)) - data.append(g) - __append_label_names(label_names, l_names) - y.append(float(tmp[1])) - else: # y in a seperate file - if extension == 'ct': - for i in range(0, len(content)): - tmp = content[i] - # remove the '#'s in file names - g, l_names = load_ct(dirname_dataset + '/' + tmp.replace('#', '', 1)) - data.append(g) - __append_label_names(label_names, l_names) - elif extension == 'gxl': - for i in range(0, len(content)): - tmp = content[i] - # remove the '#'s in file names - g, l_names = load_gxl(dirname_dataset + '/' + tmp.replace('#', '', 1)) - data.append(g) - __append_label_names(label_names, l_names) + for i in range(0, len(content)): + tmp = content[i].split(' ') + # remove the '#'s in file names + g, l_names = load_file_fun(dirname_dataset + '/' + tmp[0].replace('#', '', 1)) + data.append(g) + __append_label_names(label_names, l_names) + y.append(float(tmp[1])) + else: # targets in a seperate file + for i in range(0, len(content)): + tmp = content[i] + # remove the '#'s in file names + g, l_names = load_file_fun(dirname_dataset + '/' + tmp.replace('#', '', 1)) + data.append(g) + __append_label_names(label_names, l_names) content_y = open(filename_targets).read().splitlines() # assume entries in filename and filename_targets have the same order. @@ -774,16 +762,16 @@ if __name__ == '__main__': # ds = {'name': 'Alkane', 'dataset': '../../datasets/Alkane/dataset.ds', # 'dataset_y': '../../datasets/Alkane/dataset_boiling_point_names.txt'} # Gn, y = loadDataset(ds['dataset'], filename_y=ds['dataset_y']) - ds_file = '../../datasets/acyclic/dataset_bps.ds' # node symb - Gn, targets, label_names = load_dataset(ds_file) +# ds_file = '../../datasets/Acyclic/dataset_bps.ds' # node symb +# Gn, targets, label_names = load_dataset(ds_file) ## ds = {'name': 'MAO', 'dataset': '../../datasets/MAO/dataset.ds'} # node/edge symb ## Gn, y = loadDataset(ds['dataset']) ## ds = {'name': 'PAH', 'dataset': '../../datasets/PAH/dataset.ds'} # unlabeled ## Gn, y = loadDataset(ds['dataset']) - print(Gn[1].graph) - print(Gn[1].nodes(data=True)) - print(Gn[1].edges(data=True)) - print(targets[1]) +# print(Gn[1].graph) +# print(Gn[1].nodes(data=True)) +# print(Gn[1].edges(data=True)) +# print(targets[1]) # # .gxl file. # ds_file = '../../datasets/monoterpenoides/dataset_10+.ds' # node/edge symb