Browse Source

Simplify function gklearn.utils.graph_file.load_from_ds().

v0.2.x
jajupmochi 5 years ago
parent
commit
56bc9a8131
3 changed files with 95 additions and 39 deletions
  1. +68
    -1
      gklearn/preimage/experiments/xp_median_preimage.py
  2. +2
    -1
      gklearn/utils/dataset.py
  3. +25
    -37
      gklearn/utils/graph_files.py

+ 68
- 1
gklearn/preimage/experiments/xp_median_preimage.py View File

@@ -12,6 +12,70 @@ from gklearn.preimage.utils import generate_median_preimages_by_class
from gklearn.utils import compute_gram_matrices_by_class


def xp_median_preimage_9_1():
"""xp 9_1: Acyclic, sspkernel, using CONSTANT.
"""
# set parameters.
ds_name = 'Acyclic' #
mpg_options = {'fit_method': 'k-graphs',
'init_ecc': [4, 4, 2, 1, 1, 1], #
'ds_name': ds_name,
'parallel': True, # False
'time_limit_in_sec': 0,
'max_itrs': 100, #
'max_itrs_without_update': 3,
'epsilon_residual': 0.01,
'epsilon_ec': 0.1,
'verbose': 2}
mixkernel = functools.partial(kernelproduct, deltakernel, gaussiankernel)
sub_kernels = {'symb': deltakernel, 'nsymb': gaussiankernel, 'mix': mixkernel}
kernel_options = {'name': 'structuralspkernel',
'edge_weight': None,
'node_kernels': sub_kernels,
'edge_kernels': sub_kernels,
'compute_method': 'naive',
'parallel': 'imap_unordered',
# 'parallel': None,
'n_jobs': multiprocessing.cpu_count(),
'normalize': True,
'verbose': 2}
ged_options = {'method': 'IPFP',
'initialization_method': 'RANDOM', # 'NODE'
'initial_solutions': 10, # 1
'edit_cost': 'CONSTANT', #
'attr_distance': 'euclidean',
'ratio_runs_from_initial_solutions': 1,
'threads': multiprocessing.cpu_count(),
'init_option': 'EAGER_WITHOUT_SHUFFLED_COPIES'}
mge_options = {'init_type': 'MEDOID',
'random_inits': 10,
'time_limit': 600,
'verbose': 2,
'refine': False}
save_results = True
dir_save='../results/xp_median_preimage/'
irrelevant_labels = {'node_attrs': ['x', 'y', 'z'], 'edge_labels': ['bond_stereo']} #
edge_required = False #
# print settings.
print('parameters:')
print('dataset name:', ds_name)
print('mpg_options:', mpg_options)
print('kernel_options:', kernel_options)
print('ged_options:', ged_options)
print('mge_options:', mge_options)
print('save_results:', save_results)
print('irrelevant_labels:', irrelevant_labels)
print()
# generate preimages.
for fit_method in ['k-graphs', 'expert', 'random', 'random', 'random']:
print('\n-------------------------------------')
print('fit method:', fit_method, '\n')
mpg_options['fit_method'] = fit_method
generate_median_preimages_by_class(ds_name, mpg_options, kernel_options, ged_options, mge_options, save_results=save_results, save_medians=True, plot_medians=True, load_gm='auto', dir_save=dir_save, irrelevant_labels=irrelevant_labels, edge_required=edge_required)


def xp_median_preimage_8_1():
"""xp 8_1: Monoterpenoides, sspkernel, using CONSTANT.
"""
@@ -546,4 +610,7 @@ if __name__ == "__main__":
# xp_median_preimage_7_1()
#### xp 8_1: Monoterpenoides, sspkernel, using CONSTANT.
xp_median_preimage_8_1()
# xp_median_preimage_8_1()

#### xp 9_1: Acyclic, sspkernel, using CONSTANT.
xp_median_preimage_9_1()

+ 2
- 1
gklearn/utils/dataset.py View File

@@ -68,7 +68,8 @@ class Dataset(object):
def load_predefined_dataset(self, ds_name):
current_path = os.path.dirname(os.path.realpath(__file__)) + '/'
if ds_name == 'Acyclic':
pass
ds_file = current_path + '../../datasets/Acyclic/dataset_bps.ds'
self.__graphs, self.__targets, label_names = load_dataset(ds_file)
elif ds_name == 'COIL-DEL':
ds_file = current_path + '../../datasets/COIL-DEL/COIL-DEL_A.txt'
self.__graphs, self.__targets, label_names = load_dataset(ds_file)


+ 25
- 37
gklearn/utils/graph_files.py View File

@@ -720,38 +720,26 @@ def load_from_ds(filename, filename_targets):
label_names = {'node_labels': [], 'edge_labels': [], 'node_attrs': [], 'edge_attrs': []}
content = open(filename).read().splitlines()
extension = splitext(content[0].split(' ')[0])[1][1:]
if extension == 'ct':
load_file_fun = load_ct
elif extension == 'gxl':
load_file_fun = load_gxl
if filename_targets is None or filename_targets == '':
if extension == 'ct':
for i in range(0, len(content)):
tmp = content[i].split(' ')
# remove the '#'s in file names
g, l_names = load_ct(dirname_dataset + '/' + tmp[0].replace('#', '', 1))
data.append(g)
__append_label_names(label_names, l_names)
y.append(float(tmp[1]))
elif extension == 'gxl':
for i in range(0, len(content)):
tmp = content[i].split(' ')
# remove the '#'s in file names
g, l_names = load_gxl(dirname_dataset + '/' + tmp[0].replace('#', '', 1))
data.append(g)
__append_label_names(label_names, l_names)
y.append(float(tmp[1]))
else: # y in a seperate file
if extension == 'ct':
for i in range(0, len(content)):
tmp = content[i]
# remove the '#'s in file names
g, l_names = load_ct(dirname_dataset + '/' + tmp.replace('#', '', 1))
data.append(g)
__append_label_names(label_names, l_names)
elif extension == 'gxl':
for i in range(0, len(content)):
tmp = content[i]
# remove the '#'s in file names
g, l_names = load_gxl(dirname_dataset + '/' + tmp.replace('#', '', 1))
data.append(g)
__append_label_names(label_names, l_names)
for i in range(0, len(content)):
tmp = content[i].split(' ')
# remove the '#'s in file names
g, l_names = load_file_fun(dirname_dataset + '/' + tmp[0].replace('#', '', 1))
data.append(g)
__append_label_names(label_names, l_names)
y.append(float(tmp[1]))
else: # targets in a seperate file
for i in range(0, len(content)):
tmp = content[i]
# remove the '#'s in file names
g, l_names = load_file_fun(dirname_dataset + '/' + tmp.replace('#', '', 1))
data.append(g)
__append_label_names(label_names, l_names)
content_y = open(filename_targets).read().splitlines()
# assume entries in filename and filename_targets have the same order.
@@ -774,16 +762,16 @@ if __name__ == '__main__':
# ds = {'name': 'Alkane', 'dataset': '../../datasets/Alkane/dataset.ds',
# 'dataset_y': '../../datasets/Alkane/dataset_boiling_point_names.txt'}
# Gn, y = loadDataset(ds['dataset'], filename_y=ds['dataset_y'])
ds_file = '../../datasets/acyclic/dataset_bps.ds' # node symb
Gn, targets, label_names = load_dataset(ds_file)
# ds_file = '../../datasets/Acyclic/dataset_bps.ds' # node symb
# Gn, targets, label_names = load_dataset(ds_file)
## ds = {'name': 'MAO', 'dataset': '../../datasets/MAO/dataset.ds'} # node/edge symb
## Gn, y = loadDataset(ds['dataset'])
## ds = {'name': 'PAH', 'dataset': '../../datasets/PAH/dataset.ds'} # unlabeled
## Gn, y = loadDataset(ds['dataset'])
print(Gn[1].graph)
print(Gn[1].nodes(data=True))
print(Gn[1].edges(data=True))
print(targets[1])
# print(Gn[1].graph)
# print(Gn[1].nodes(data=True))
# print(Gn[1].edges(data=True))
# print(targets[1])
# # .gxl file.
# ds_file = '../../datasets/monoterpenoides/dataset_10+.ds' # node/edge symb


Loading…
Cancel
Save