@@ -12,6 +12,70 @@ from gklearn.preimage.utils import generate_median_preimages_by_class | |||
from gklearn.utils import compute_gram_matrices_by_class | |||
def xp_median_preimage_9_1(): | |||
"""xp 9_1: Acyclic, sspkernel, using CONSTANT. | |||
""" | |||
# set parameters. | |||
ds_name = 'Acyclic' # | |||
mpg_options = {'fit_method': 'k-graphs', | |||
'init_ecc': [4, 4, 2, 1, 1, 1], # | |||
'ds_name': ds_name, | |||
'parallel': True, # False | |||
'time_limit_in_sec': 0, | |||
'max_itrs': 100, # | |||
'max_itrs_without_update': 3, | |||
'epsilon_residual': 0.01, | |||
'epsilon_ec': 0.1, | |||
'verbose': 2} | |||
mixkernel = functools.partial(kernelproduct, deltakernel, gaussiankernel) | |||
sub_kernels = {'symb': deltakernel, 'nsymb': gaussiankernel, 'mix': mixkernel} | |||
kernel_options = {'name': 'structuralspkernel', | |||
'edge_weight': None, | |||
'node_kernels': sub_kernels, | |||
'edge_kernels': sub_kernels, | |||
'compute_method': 'naive', | |||
'parallel': 'imap_unordered', | |||
# 'parallel': None, | |||
'n_jobs': multiprocessing.cpu_count(), | |||
'normalize': True, | |||
'verbose': 2} | |||
ged_options = {'method': 'IPFP', | |||
'initialization_method': 'RANDOM', # 'NODE' | |||
'initial_solutions': 10, # 1 | |||
'edit_cost': 'CONSTANT', # | |||
'attr_distance': 'euclidean', | |||
'ratio_runs_from_initial_solutions': 1, | |||
'threads': multiprocessing.cpu_count(), | |||
'init_option': 'EAGER_WITHOUT_SHUFFLED_COPIES'} | |||
mge_options = {'init_type': 'MEDOID', | |||
'random_inits': 10, | |||
'time_limit': 600, | |||
'verbose': 2, | |||
'refine': False} | |||
save_results = True | |||
dir_save='../results/xp_median_preimage/' | |||
irrelevant_labels = {'node_attrs': ['x', 'y', 'z'], 'edge_labels': ['bond_stereo']} # | |||
edge_required = False # | |||
# print settings. | |||
print('parameters:') | |||
print('dataset name:', ds_name) | |||
print('mpg_options:', mpg_options) | |||
print('kernel_options:', kernel_options) | |||
print('ged_options:', ged_options) | |||
print('mge_options:', mge_options) | |||
print('save_results:', save_results) | |||
print('irrelevant_labels:', irrelevant_labels) | |||
print() | |||
# generate preimages. | |||
for fit_method in ['k-graphs', 'expert', 'random', 'random', 'random']: | |||
print('\n-------------------------------------') | |||
print('fit method:', fit_method, '\n') | |||
mpg_options['fit_method'] = fit_method | |||
generate_median_preimages_by_class(ds_name, mpg_options, kernel_options, ged_options, mge_options, save_results=save_results, save_medians=True, plot_medians=True, load_gm='auto', dir_save=dir_save, irrelevant_labels=irrelevant_labels, edge_required=edge_required) | |||
def xp_median_preimage_8_1(): | |||
"""xp 8_1: Monoterpenoides, sspkernel, using CONSTANT. | |||
""" | |||
@@ -546,4 +610,7 @@ if __name__ == "__main__": | |||
# xp_median_preimage_7_1() | |||
#### xp 8_1: Monoterpenoides, sspkernel, using CONSTANT. | |||
xp_median_preimage_8_1() | |||
# xp_median_preimage_8_1() | |||
#### xp 9_1: Acyclic, sspkernel, using CONSTANT. | |||
xp_median_preimage_9_1() |
@@ -68,7 +68,8 @@ class Dataset(object): | |||
def load_predefined_dataset(self, ds_name): | |||
current_path = os.path.dirname(os.path.realpath(__file__)) + '/' | |||
if ds_name == 'Acyclic': | |||
pass | |||
ds_file = current_path + '../../datasets/Acyclic/dataset_bps.ds' | |||
self.__graphs, self.__targets, label_names = load_dataset(ds_file) | |||
elif ds_name == 'COIL-DEL': | |||
ds_file = current_path + '../../datasets/COIL-DEL/COIL-DEL_A.txt' | |||
self.__graphs, self.__targets, label_names = load_dataset(ds_file) | |||
@@ -720,38 +720,26 @@ def load_from_ds(filename, filename_targets): | |||
label_names = {'node_labels': [], 'edge_labels': [], 'node_attrs': [], 'edge_attrs': []} | |||
content = open(filename).read().splitlines() | |||
extension = splitext(content[0].split(' ')[0])[1][1:] | |||
if extension == 'ct': | |||
load_file_fun = load_ct | |||
elif extension == 'gxl': | |||
load_file_fun = load_gxl | |||
if filename_targets is None or filename_targets == '': | |||
if extension == 'ct': | |||
for i in range(0, len(content)): | |||
tmp = content[i].split(' ') | |||
# remove the '#'s in file names | |||
g, l_names = load_ct(dirname_dataset + '/' + tmp[0].replace('#', '', 1)) | |||
data.append(g) | |||
__append_label_names(label_names, l_names) | |||
y.append(float(tmp[1])) | |||
elif extension == 'gxl': | |||
for i in range(0, len(content)): | |||
tmp = content[i].split(' ') | |||
# remove the '#'s in file names | |||
g, l_names = load_gxl(dirname_dataset + '/' + tmp[0].replace('#', '', 1)) | |||
data.append(g) | |||
__append_label_names(label_names, l_names) | |||
y.append(float(tmp[1])) | |||
else: # y in a seperate file | |||
if extension == 'ct': | |||
for i in range(0, len(content)): | |||
tmp = content[i] | |||
# remove the '#'s in file names | |||
g, l_names = load_ct(dirname_dataset + '/' + tmp.replace('#', '', 1)) | |||
data.append(g) | |||
__append_label_names(label_names, l_names) | |||
elif extension == 'gxl': | |||
for i in range(0, len(content)): | |||
tmp = content[i] | |||
# remove the '#'s in file names | |||
g, l_names = load_gxl(dirname_dataset + '/' + tmp.replace('#', '', 1)) | |||
data.append(g) | |||
__append_label_names(label_names, l_names) | |||
for i in range(0, len(content)): | |||
tmp = content[i].split(' ') | |||
# remove the '#'s in file names | |||
g, l_names = load_file_fun(dirname_dataset + '/' + tmp[0].replace('#', '', 1)) | |||
data.append(g) | |||
__append_label_names(label_names, l_names) | |||
y.append(float(tmp[1])) | |||
else: # targets in a seperate file | |||
for i in range(0, len(content)): | |||
tmp = content[i] | |||
# remove the '#'s in file names | |||
g, l_names = load_file_fun(dirname_dataset + '/' + tmp.replace('#', '', 1)) | |||
data.append(g) | |||
__append_label_names(label_names, l_names) | |||
content_y = open(filename_targets).read().splitlines() | |||
# assume entries in filename and filename_targets have the same order. | |||
@@ -774,16 +762,16 @@ if __name__ == '__main__': | |||
# ds = {'name': 'Alkane', 'dataset': '../../datasets/Alkane/dataset.ds', | |||
# 'dataset_y': '../../datasets/Alkane/dataset_boiling_point_names.txt'} | |||
# Gn, y = loadDataset(ds['dataset'], filename_y=ds['dataset_y']) | |||
ds_file = '../../datasets/acyclic/dataset_bps.ds' # node symb | |||
Gn, targets, label_names = load_dataset(ds_file) | |||
# ds_file = '../../datasets/Acyclic/dataset_bps.ds' # node symb | |||
# Gn, targets, label_names = load_dataset(ds_file) | |||
## ds = {'name': 'MAO', 'dataset': '../../datasets/MAO/dataset.ds'} # node/edge symb | |||
## Gn, y = loadDataset(ds['dataset']) | |||
## ds = {'name': 'PAH', 'dataset': '../../datasets/PAH/dataset.ds'} # unlabeled | |||
## Gn, y = loadDataset(ds['dataset']) | |||
print(Gn[1].graph) | |||
print(Gn[1].nodes(data=True)) | |||
print(Gn[1].edges(data=True)) | |||
print(targets[1]) | |||
# print(Gn[1].graph) | |||
# print(Gn[1].nodes(data=True)) | |||
# print(Gn[1].edges(data=True)) | |||
# print(targets[1]) | |||
# # .gxl file. | |||
# ds_file = '../../datasets/monoterpenoides/dataset_10+.ds' # node/edge symb | |||