Browse Source

Update function gklearn.utils.graph_file.load_gxl()

v0.2.x
jajupmochi 5 years ago
parent
commit
9f0090179d
4 changed files with 606 additions and 36 deletions
  1. +0
    -2
      .travis.yml
  2. +549
    -0
      gklearn/preimage/experiments/xp_median_preimage.py
  3. +4
    -1
      gklearn/utils/dataset.py
  4. +53
    -33
      gklearn/utils/graph_files.py

+ 0
- 2
.travis.yml View File

@@ -1,7 +1,5 @@
language: python
python:
- '3'
- '3.4'
- '3.5'
- '3.6'
- '3.7'


+ 549
- 0
gklearn/preimage/experiments/xp_median_preimage.py View File

@@ -0,0 +1,549 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Tue Jan 14 15:39:29 2020

@author: ljia
"""
import multiprocessing
import functools
from gklearn.utils.kernels import deltakernel, gaussiankernel, kernelproduct
from gklearn.preimage.utils import generate_median_preimages_by_class
from gklearn.utils import compute_gram_matrices_by_class


def xp_median_preimage_8_1():
"""xp 8_1: Monoterpenoides, sspkernel, using CONSTANT.
"""
# set parameters.
ds_name = 'Monoterpenoides' #
mpg_options = {'fit_method': 'k-graphs',
'init_ecc': [3, 3, 1, 3, 3, 1], #
'ds_name': ds_name,
'parallel': True, # False
'time_limit_in_sec': 0,
'max_itrs': 100, #
'max_itrs_without_update': 3,
'epsilon_residual': 0.01,
'epsilon_ec': 0.1,
'verbose': 2}
mixkernel = functools.partial(kernelproduct, deltakernel, gaussiankernel)
sub_kernels = {'symb': deltakernel, 'nsymb': gaussiankernel, 'mix': mixkernel}
kernel_options = {'name': 'structuralspkernel',
'edge_weight': None,
'node_kernels': sub_kernels,
'edge_kernels': sub_kernels,
'compute_method': 'naive',
'parallel': 'imap_unordered',
# 'parallel': None,
'n_jobs': multiprocessing.cpu_count(),
'normalize': True,
'verbose': 2}
ged_options = {'method': 'IPFP',
'initialization_method': 'RANDOM', # 'NODE'
'initial_solutions': 10, # 1
'edit_cost': 'CONSTANT', #
'attr_distance': 'euclidean',
'ratio_runs_from_initial_solutions': 1,
'threads': multiprocessing.cpu_count(),
'init_option': 'EAGER_WITHOUT_SHUFFLED_COPIES'}
mge_options = {'init_type': 'MEDOID',
'random_inits': 10,
'time_limit': 600,
'verbose': 2,
'refine': False}
save_results = True
dir_save='../results/xp_median_preimage/'
irrelevant_labels = None #
edge_required = False #
# print settings.
print('parameters:')
print('dataset name:', ds_name)
print('mpg_options:', mpg_options)
print('kernel_options:', kernel_options)
print('ged_options:', ged_options)
print('mge_options:', mge_options)
print('save_results:', save_results)
print('irrelevant_labels:', irrelevant_labels)
print()
# generate preimages.
for fit_method in ['k-graphs', 'expert', 'random', 'random', 'random']:
print('\n-------------------------------------')
print('fit method:', fit_method, '\n')
mpg_options['fit_method'] = fit_method
generate_median_preimages_by_class(ds_name, mpg_options, kernel_options, ged_options, mge_options, save_results=save_results, save_medians=True, plot_medians=True, load_gm='auto', dir_save=dir_save, irrelevant_labels=irrelevant_labels, edge_required=edge_required)

def xp_median_preimage_7_1():
"""xp 7_1: MUTAG, sspkernel, using CONSTANT.
"""
# set parameters.
ds_name = 'MUTAG' #
mpg_options = {'fit_method': 'k-graphs',
'init_ecc': [4, 4, 2, 1, 1, 1], #
'ds_name': ds_name,
'parallel': True, # False
'time_limit_in_sec': 0,
'max_itrs': 100, #
'max_itrs_without_update': 3,
'epsilon_residual': 0.01,
'epsilon_ec': 0.1,
'verbose': 2}
mixkernel = functools.partial(kernelproduct, deltakernel, gaussiankernel)
sub_kernels = {'symb': deltakernel, 'nsymb': gaussiankernel, 'mix': mixkernel}
kernel_options = {'name': 'structuralspkernel',
'edge_weight': None,
'node_kernels': sub_kernels,
'edge_kernels': sub_kernels,
'compute_method': 'naive',
'parallel': 'imap_unordered',
# 'parallel': None,
'n_jobs': multiprocessing.cpu_count(),
'normalize': True,
'verbose': 2}
ged_options = {'method': 'IPFP',
'initialization_method': 'RANDOM', # 'NODE'
'initial_solutions': 10, # 1
'edit_cost': 'CONSTANT', #
'attr_distance': 'euclidean',
'ratio_runs_from_initial_solutions': 1,
'threads': multiprocessing.cpu_count(),
'init_option': 'EAGER_WITHOUT_SHUFFLED_COPIES'}
mge_options = {'init_type': 'MEDOID',
'random_inits': 10,
'time_limit': 600,
'verbose': 2,
'refine': False}
save_results = True
dir_save='../results/xp_median_preimage/'
irrelevant_labels = None #
edge_required = False #
# print settings.
print('parameters:')
print('dataset name:', ds_name)
print('mpg_options:', mpg_options)
print('kernel_options:', kernel_options)
print('ged_options:', ged_options)
print('mge_options:', mge_options)
print('save_results:', save_results)
print('irrelevant_labels:', irrelevant_labels)
print()
# generate preimages.
for fit_method in ['k-graphs', 'expert', 'random', 'random', 'random']:
print('\n-------------------------------------')
print('fit method:', fit_method, '\n')
mpg_options['fit_method'] = fit_method
generate_median_preimages_by_class(ds_name, mpg_options, kernel_options, ged_options, mge_options, save_results=save_results, save_medians=True, plot_medians=True, load_gm='auto', dir_save=dir_save, irrelevant_labels=irrelevant_labels, edge_required=edge_required)


def xp_median_preimage_6_1():
"""xp 6_1: COIL-RAG, sspkernel, using NON_SYMBOLIC.
"""
# set parameters.
ds_name = 'COIL-RAG' #
mpg_options = {'fit_method': 'k-graphs',
'init_ecc': [3, 3, 1, 3, 3, 1], #
'ds_name': ds_name,
'parallel': True, # False
'time_limit_in_sec': 0,
'max_itrs': 100,
'max_itrs_without_update': 3,
'epsilon_residual': 0.01,
'epsilon_ec': 0.1,
'verbose': 2}
mixkernel = functools.partial(kernelproduct, deltakernel, gaussiankernel)
sub_kernels = {'symb': deltakernel, 'nsymb': gaussiankernel, 'mix': mixkernel}
kernel_options = {'name': 'structuralspkernel',
'edge_weight': None,
'node_kernels': sub_kernels,
'edge_kernels': sub_kernels,
'compute_method': 'naive',
'parallel': 'imap_unordered',
# 'parallel': None,
'n_jobs': multiprocessing.cpu_count(),
'normalize': True,
'verbose': 2}
ged_options = {'method': 'IPFP',
'initialization_method': 'RANDOM', # 'NODE'
'initial_solutions': 10, # 1
'edit_cost': 'NON_SYMBOLIC', #
'attr_distance': 'euclidean',
'ratio_runs_from_initial_solutions': 1,
'threads': multiprocessing.cpu_count(),
'init_option': 'EAGER_WITHOUT_SHUFFLED_COPIES'}
mge_options = {'init_type': 'MEDOID',
'random_inits': 10,
'time_limit': 600,
'verbose': 2,
'refine': False}
save_results = True
dir_save='../results/xp_median_preimage/'
irrelevant_labels = None #
edge_required = False #
# print settings.
print('parameters:')
print('dataset name:', ds_name)
print('mpg_options:', mpg_options)
print('kernel_options:', kernel_options)
print('ged_options:', ged_options)
print('mge_options:', mge_options)
print('save_results:', save_results)
print('irrelevant_labels:', irrelevant_labels)
print()
# generate preimages.
for fit_method in ['k-graphs', 'random', 'random', 'random']:
print('\n-------------------------------------')
print('fit method:', fit_method, '\n')
mpg_options['fit_method'] = fit_method
generate_median_preimages_by_class(ds_name, mpg_options, kernel_options, ged_options, mge_options, save_results=save_results, save_medians=True, plot_medians=True, load_gm='auto', dir_save=dir_save, irrelevant_labels=irrelevant_labels, edge_required=edge_required)


def xp_median_preimage_5_1():
"""xp 5_1: FRANKENSTEIN, sspkernel, using NON_SYMBOLIC.
"""
# set parameters.
ds_name = 'FRANKENSTEIN' #
mpg_options = {'fit_method': 'k-graphs',
'init_ecc': [3, 3, 1, 3, 3, 0], #
'ds_name': ds_name,
'parallel': True, # False
'time_limit_in_sec': 0,
'max_itrs': 100,
'max_itrs_without_update': 3,
'epsilon_residual': 0.01,
'epsilon_ec': 0.1,
'verbose': 2}
mixkernel = functools.partial(kernelproduct, deltakernel, gaussiankernel)
sub_kernels = {'symb': deltakernel, 'nsymb': gaussiankernel, 'mix': mixkernel}
kernel_options = {'name': 'structuralspkernel',
'edge_weight': None,
'node_kernels': sub_kernels,
'edge_kernels': sub_kernels,
'compute_method': 'naive',
'parallel': 'imap_unordered',
# 'parallel': None,
'n_jobs': multiprocessing.cpu_count(),
'normalize': True,
'verbose': 2}
ged_options = {'method': 'IPFP',
'initialization_method': 'RANDOM', # 'NODE'
'initial_solutions': 10, # 1
'edit_cost': 'NON_SYMBOLIC',
'attr_distance': 'euclidean',
'ratio_runs_from_initial_solutions': 1,
'threads': multiprocessing.cpu_count(),
'init_option': 'EAGER_WITHOUT_SHUFFLED_COPIES'}
mge_options = {'init_type': 'MEDOID',
'random_inits': 10,
'time_limit': 600,
'verbose': 2,
'refine': False}
save_results = True
dir_save='../results/xp_median_preimage/'
irrelevant_labels = None #
edge_required = False #
# print settings.
print('parameters:')
print('dataset name:', ds_name)
print('mpg_options:', mpg_options)
print('kernel_options:', kernel_options)
print('ged_options:', ged_options)
print('mge_options:', mge_options)
print('save_results:', save_results)
print('irrelevant_labels:', irrelevant_labels)
print()
# generate preimages.
for fit_method in ['k-graphs', 'random', 'random', 'random']:
print('\n-------------------------------------')
print('fit method:', fit_method, '\n')
mpg_options['fit_method'] = fit_method
generate_median_preimages_by_class(ds_name, mpg_options, kernel_options, ged_options, mge_options, save_results=save_results, save_medians=True, plot_medians=True, load_gm='auto', dir_save=dir_save, irrelevant_labels=irrelevant_labels, edge_required=edge_required)

def xp_median_preimage_4_1():
"""xp 4_1: COLORS-3, sspkernel, using NON_SYMBOLIC.
"""
# set parameters.
ds_name = 'COLORS-3' #
mpg_options = {'fit_method': 'k-graphs',
'init_ecc': [3, 3, 1, 3, 3, 0], #
'ds_name': ds_name,
'parallel': True, # False
'time_limit_in_sec': 0,
'max_itrs': 100,
'max_itrs_without_update': 3,
'epsilon_residual': 0.01,
'epsilon_ec': 0.1,
'verbose': 2}
mixkernel = functools.partial(kernelproduct, deltakernel, gaussiankernel)
sub_kernels = {'symb': deltakernel, 'nsymb': gaussiankernel, 'mix': mixkernel}
kernel_options = {'name': 'structuralspkernel',
'edge_weight': None,
'node_kernels': sub_kernels,
'edge_kernels': sub_kernels,
'compute_method': 'naive',
'parallel': 'imap_unordered',
# 'parallel': None,
'n_jobs': multiprocessing.cpu_count(),
'normalize': True,
'verbose': 2}
ged_options = {'method': 'IPFP',
'initialization_method': 'RANDOM', # 'NODE'
'initial_solutions': 10, # 1
'edit_cost': 'NON_SYMBOLIC',
'attr_distance': 'euclidean',
'ratio_runs_from_initial_solutions': 1,
'threads': multiprocessing.cpu_count(),
'init_option': 'EAGER_WITHOUT_SHUFFLED_COPIES'}
mge_options = {'init_type': 'MEDOID',
'random_inits': 10,
'time_limit': 600,
'verbose': 2,
'refine': False}
save_results = True
dir_save='../results/xp_median_preimage/'
irrelevant_labels = None #
edge_required = False #
# print settings.
print('parameters:')
print('dataset name:', ds_name)
print('mpg_options:', mpg_options)
print('kernel_options:', kernel_options)
print('ged_options:', ged_options)
print('mge_options:', mge_options)
print('save_results:', save_results)
print('irrelevant_labels:', irrelevant_labels)
print()
# generate preimages.
for fit_method in ['k-graphs', 'random', 'random', 'random']:
print('\n-------------------------------------')
print('fit method:', fit_method, '\n')
mpg_options['fit_method'] = fit_method
generate_median_preimages_by_class(ds_name, mpg_options, kernel_options, ged_options, mge_options, save_results=save_results, save_medians=True, plot_medians=True, load_gm='auto', dir_save=dir_save, irrelevant_labels=irrelevant_labels, edge_required=edge_required)


def xp_median_preimage_3_1():
"""xp 3_1: Fingerprint, sspkernel, using LETTER2, only node attrs.
"""
# set parameters.
ds_name = 'Fingerprint' #
mpg_options = {'fit_method': 'k-graphs',
'init_ecc': [0.525, 0.525, 0.001, 0.125, 0.125], #
'ds_name': ds_name,
'parallel': True, # False
'time_limit_in_sec': 0,
'max_itrs': 100,
'max_itrs_without_update': 3,
'epsilon_residual': 0.01,
'epsilon_ec': 0.1,
'verbose': 2}
mixkernel = functools.partial(kernelproduct, deltakernel, gaussiankernel)
sub_kernels = {'symb': deltakernel, 'nsymb': gaussiankernel, 'mix': mixkernel}
kernel_options = {'name': 'structuralspkernel',
'edge_weight': None,
'node_kernels': sub_kernels,
'edge_kernels': sub_kernels,
'compute_method': 'naive',
'parallel': 'imap_unordered',
# 'parallel': None,
'n_jobs': multiprocessing.cpu_count(),
'normalize': True,
'verbose': 2}
ged_options = {'method': 'IPFP',
'initialization_method': 'RANDOM', # 'NODE'
'initial_solutions': 10, # 1
'edit_cost': 'LETTER2',
'attr_distance': 'euclidean',
'ratio_runs_from_initial_solutions': 1,
'threads': multiprocessing.cpu_count(),
'init_option': 'EAGER_WITHOUT_SHUFFLED_COPIES'}
mge_options = {'init_type': 'MEDOID',
'random_inits': 10,
'time_limit': 600,
'verbose': 2,
'refine': False}
save_results = True
dir_save='../results/xp_median_preimage/'
irrelevant_labels = {'edge_attrs': ['orient', 'angle']} #
edge_required = False #
# print settings.
print('parameters:')
print('dataset name:', ds_name)
print('mpg_options:', mpg_options)
print('kernel_options:', kernel_options)
print('ged_options:', ged_options)
print('mge_options:', mge_options)
print('save_results:', save_results)
print('irrelevant_labels:', irrelevant_labels)
print()
# generate preimages.
for fit_method in ['k-graphs', 'random', 'random', 'random']:
print('\n-------------------------------------')
print('fit method:', fit_method, '\n')
mpg_options['fit_method'] = fit_method
generate_median_preimages_by_class(ds_name, mpg_options, kernel_options, ged_options, mge_options, save_results=save_results, save_medians=True, plot_medians=True, load_gm='auto', dir_save=dir_save, irrelevant_labels=irrelevant_labels, edge_required=edge_required)

def xp_median_preimage_2_1():
"""xp 2_1: COIL-DEL, sspkernel, using LETTER2, only node attrs.
"""
# set parameters.
ds_name = 'COIL-DEL' #
mpg_options = {'fit_method': 'k-graphs',
'init_ecc': [3, 3, 1, 3, 3],
'ds_name': ds_name,
'parallel': True, # False
'time_limit_in_sec': 0,
'max_itrs': 100,
'max_itrs_without_update': 3,
'epsilon_residual': 0.01,
'epsilon_ec': 0.1,
'verbose': 2}
mixkernel = functools.partial(kernelproduct, deltakernel, gaussiankernel)
sub_kernels = {'symb': deltakernel, 'nsymb': gaussiankernel, 'mix': mixkernel}
kernel_options = {'name': 'structuralspkernel',
'edge_weight': None,
'node_kernels': sub_kernels,
'edge_kernels': sub_kernels,
'compute_method': 'naive',
'parallel': 'imap_unordered',
# 'parallel': None,
'n_jobs': multiprocessing.cpu_count(),
'normalize': True,
'verbose': 2}
ged_options = {'method': 'IPFP',
'initialization_method': 'RANDOM', # 'NODE'
'initial_solutions': 10, # 1
'edit_cost': 'LETTER2',
'attr_distance': 'euclidean',
'ratio_runs_from_initial_solutions': 1,
'threads': multiprocessing.cpu_count(),
'init_option': 'EAGER_WITHOUT_SHUFFLED_COPIES'}
mge_options = {'init_type': 'MEDOID',
'random_inits': 10,
'time_limit': 600,
'verbose': 2,
'refine': False}
save_results = True
dir_save='../results/xp_median_preimage/'
irrelevant_labels = {'edge_labels': ['valence']}
# print settings.
print('parameters:')
print('dataset name:', ds_name)
print('mpg_options:', mpg_options)
print('kernel_options:', kernel_options)
print('ged_options:', ged_options)
print('mge_options:', mge_options)
print('save_results:', save_results)
print('irrelevant_labels:', irrelevant_labels)
print()
# # compute gram matrices for each class a priori.
# print('Compute gram matrices for each class a priori.')
# compute_gram_matrices_by_class(ds_name, kernel_options, save_results=True, dir_save=dir_save, irrelevant_labels=irrelevant_labels)
# generate preimages.
for fit_method in ['k-graphs', 'random', 'random', 'random']:
print('\n-------------------------------------')
print('fit method:', fit_method, '\n')
mpg_options['fit_method'] = fit_method
generate_median_preimages_by_class(ds_name, mpg_options, kernel_options, ged_options, mge_options, save_results=save_results, save_medians=True, plot_medians=True, load_gm='auto', dir_save=dir_save, irrelevant_labels=irrelevant_labels)


def xp_median_preimage_1_1():
"""xp 1_1: Letter-high, sspkernel.
"""
# set parameters.
ds_name = 'Letter-high'
mpg_options = {'fit_method': 'k-graphs',
'init_ecc': [3, 3, 1, 3, 3],
'ds_name': ds_name,
'parallel': True, # False
'time_limit_in_sec': 0,
'max_itrs': 100,
'max_itrs_without_update': 3,
'epsilon_residual': 0.01,
'epsilon_ec': 0.1,
'verbose': 2}
mixkernel = functools.partial(kernelproduct, deltakernel, gaussiankernel)
sub_kernels = {'symb': deltakernel, 'nsymb': gaussiankernel, 'mix': mixkernel}
kernel_options = {'name': 'structuralspkernel',
'edge_weight': None,
'node_kernels': sub_kernels,
'edge_kernels': sub_kernels,
'compute_method': 'naive',
'parallel': 'imap_unordered',
# 'parallel': None,
'n_jobs': multiprocessing.cpu_count(),
'normalize': True,
'verbose': 2}
ged_options = {'method': 'IPFP',
'initialization_method': 'RANDOM', # 'NODE'
'initial_solutions': 1, # 1
'edit_cost': 'LETTER2',
'attr_distance': 'euclidean',
'ratio_runs_from_initial_solutions': 1,
'threads': multiprocessing.cpu_count(),
'init_option': 'EAGER_WITHOUT_SHUFFLED_COPIES'}
mge_options = {'init_type': 'MEDOID',
'random_inits': 10,
'time_limit': 600,
'verbose': 2,
'refine': False}
save_results = True
# print settings.
print('parameters:')
print('dataset name:', ds_name)
print('mpg_options:', mpg_options)
print('kernel_options:', kernel_options)
print('ged_options:', ged_options)
print('mge_options:', mge_options)
print('save_results:', save_results)
# generate preimages.
for fit_method in ['k-graphs', 'expert', 'random', 'random', 'random']:
print('\n-------------------------------------')
print('fit method:', fit_method, '\n')
mpg_options['fit_method'] = fit_method
generate_median_preimages_by_class(ds_name, mpg_options, kernel_options, ged_options, mge_options, save_results=save_results, save_medians=True, plot_medians=True, load_gm='auto', dir_save='../results/xp_median_preimage/')


if __name__ == "__main__":
#### xp 1_1: Letter-high, sspkernel.
# xp_median_preimage_1_1()
#### xp 2_1: COIL-DEL, sspkernel, using LETTER2, only node attrs.
# xp_median_preimage_2_1()
#### xp 3_1: Fingerprint, sspkernel, using LETTER2, only node attrs.
# xp_median_preimage_3_1()

#### xp 4_1: COLORS-3, sspkernel, using NON_SYMBOLIC.
# xp_median_preimage_4_1()
#### xp 5_1: FRANKENSTEIN, sspkernel, using NON_SYMBOLIC.
# xp_median_preimage_5_1()
#### xp 6_1: COIL-RAG, sspkernel, using NON_SYMBOLIC.
# xp_median_preimage_6_1()

#### xp 7_1: MUTAG, sspkernel, using CONSTANT.
# xp_median_preimage_7_1()
#### xp 8_1: Monoterpenoides, sspkernel, using CONSTANT.
xp_median_preimage_8_1()

+ 4
- 1
gklearn/utils/dataset.py View File

@@ -67,7 +67,7 @@ class Dataset(object):
def load_predefined_dataset(self, ds_name):
current_path = os.path.dirname(os.path.realpath(__file__)) + '/'
if ds_name == 'acyclic':
if ds_name == 'Acyclic':
pass
elif ds_name == 'COIL-DEL':
ds_file = current_path + '../../datasets/COIL-DEL/COIL-DEL_A.txt'
@@ -93,6 +93,9 @@ class Dataset(object):
elif ds_name == 'Letter-med': # node non-symb
ds_file = current_path + '../../datasets/Letter-high/Letter-med_A.txt'
self.__graphs, self.__targets, label_names = load_dataset(ds_file)
elif ds_name == 'Monoterpenoides':
ds_file = current_path + '../../datasets/Monoterpenoides/dataset_10+.ds'
self.__graphs, self.__targets, label_names = load_dataset(ds_file)
elif ds_name == 'MUTAG':
ds_file = current_path + '../../datasets/MUTAG/MUTAG_A.txt'
self.__graphs, self.__targets, label_names = load_dataset(ds_file)


+ 53
- 33
gklearn/utils/graph_files.py View File

@@ -47,7 +47,7 @@ def load_dataset(filename, filename_targets=None, gformat=None, **kwargs):
"""
extension = splitext(filename)[1][1:]
if extension == "ds":
data, y = loadFromDS(filename, filename_targets)
data, y, label_names = load_from_ds(filename, filename_targets)
elif extension == "cxl":
import xml.etree.ElementTree as ET
@@ -59,7 +59,7 @@ def load_dataset(filename, filename_targets=None, gformat=None, **kwargs):
for graph in root.iter('graph'):
mol_filename = graph.attrib['file']
mol_class = graph.attrib['class']
data.append(loadGXL(dirname_dataset + '/' + mol_filename))
data.append(load_gxl(dirname_dataset + '/' + mol_filename))
y.append(mol_class)
elif extension == 'xml':
dir_dataset = kwargs.get('dirname_dataset', None)
@@ -127,7 +127,7 @@ def save_dataset(Gn, y, gformat='gxl', group=None, filename='gfile', xparams=Non
fgroup.close()


def loadCT(filename):
def load_ct(filename):
"""load data from a Chemical Table (.ct) file.

Notes
@@ -180,7 +180,7 @@ def loadCT(filename):
return g


def loadGXL(filename):
def load_gxl(filename): # @todo: directed graphs.
from os.path import basename
import networkx as nx
import xml.etree.ElementTree as ET
@@ -195,9 +195,6 @@ def loadGXL(filename):
labels = {}
for attr in node.iter('attr'):
labels[attr.attrib['name']] = attr[0].text
if 'chem' in labels:
labels['label'] = labels['chem']
labels['atom'] = labels['chem']
g.add_node(index, **labels)
index += 1

@@ -205,11 +202,26 @@ def loadGXL(filename):
labels = {}
for attr in edge.iter('attr'):
labels[attr.attrib['name']] = attr[0].text
if 'valence' in labels:
labels['label'] = labels['valence']
labels['bond_type'] = labels['valence']
g.add_edge(dic[edge.attrib['from']], dic[edge.attrib['to']], **labels)
return g
# get label names.
label_names = {'node_labels': [], 'edge_labels': [], 'node_attrs': [], 'edge_attrs': []}
for node in root.iter('node'):
for attr in node.iter('attr'):
if attr[0].tag == 'int': # @todo: this maybe wrong, and slow.
label_names['node_labels'].append(attr.attrib['name'])
else:
label_names['node_attrs'].append(attr.attrib['name'])
break
for edge in root.iter('edge'):
for attr in edge.iter('attr'):
if attr[0].tag == 'int': # @todo: this maybe wrong, and slow.
label_names['edge_labels'].append(attr.attrib['name'])
else:
label_names['edge_attrs'].append(attr.attrib['name'])
break

return g, label_names


def saveGXL(graph, filename, method='default', node_labels=[], edge_labels=[], node_attrs=[], edge_attrs=[]):
@@ -649,43 +661,49 @@ def loadFromXML(filename, dir_dataset=None):
for graph in root.iter('graph'):
mol_filename = graph.attrib['file']
mol_class = graph.attrib['class']
data.append(loadGXL(dir_dataset + '/' + mol_filename))
data.append(load_gxl(dir_dataset + '/' + mol_filename))
y.append(mol_class)
return data, y

def loadFromDS(filename, filename_y):
def load_from_ds(filename, filename_targets):
"""Load data from .ds file.

Possible graph formats include:

'.ct': see function loadCT for detail.
'.ct': see function load_ct for detail.

'.gxl': see dunction loadGXL for detail.
'.gxl': see dunction load_gxl for detail.

Note these graph formats are checked automatically by the extensions of
graph files.
"""
def append_label_names(label_names, new_names):
for key, val in label_names.items():
label_names[key] += [name for name in new_names[key] if name not in val]
dirname_dataset = dirname(filename)
data = []
y = []
label_names = {'node_labels': [], 'edge_labels': [], 'node_attrs': [], 'edge_attrs': []}
content = open(filename).read().splitlines()
extension = splitext(content[0].split(' ')[0])[1][1:]
if filename_y is None or filename_y == '':
if filename_targets is None or filename_targets == '':
if extension == 'ct':
for i in range(0, len(content)):
tmp = content[i].split(' ')
# remove the '#'s in file names
data.append(
loadCT(dirname_dataset + '/' + tmp[0].replace('#', '', 1)))
load_ct(dirname_dataset + '/' + tmp[0].replace('#', '', 1)))
y.append(float(tmp[1]))
elif extension == 'gxl':
for i in range(0, len(content)):
tmp = content[i].split(' ')
# remove the '#'s in file names
data.append(
loadGXL(dirname_dataset + '/' + tmp[0].replace('#', '', 1)))
g, l_names = load_gxl(dirname_dataset + '/' + tmp[0].replace('#', '', 1))
data.append(g)
append_label_names(label_names, l_names)
y.append(float(tmp[1]))
else: # y in a seperate file
if extension == 'ct':
@@ -693,22 +711,23 @@ def loadFromDS(filename, filename_y):
tmp = content[i]
# remove the '#'s in file names
data.append(
loadCT(dirname_dataset + '/' + tmp.replace('#', '', 1)))
load_ct(dirname_dataset + '/' + tmp.replace('#', '', 1)))
elif extension == 'gxl':
for i in range(0, len(content)):
tmp = content[i]
# remove the '#'s in file names
data.append(
loadGXL(dirname_dataset + '/' + tmp.replace('#', '', 1)))
g, l_names = load_gxl(dirname_dataset + '/' + tmp[0].replace('#', '', 1))
data.append(g)
append_label_names(label_names, l_names)
content_y = open(filename_y).read().splitlines()
# assume entries in filename and filename_y have the same order.
content_y = open(filename_targets).read().splitlines()
# assume entries in filename and filename_targets have the same order.
for item in content_y:
tmp = item.split(' ')
# assume the 3rd entry in a line is y (for Alkane dataset)
y.append(float(tmp[2]))
return data, y
return data, y, label_names
if __name__ == '__main__':
@@ -727,13 +746,14 @@ if __name__ == '__main__':
# print(Gn[1].edges(data=True))
# print(y[1])
# # .gxl file.
# ds = {'name': 'monoterpenoides',
# 'dataset': '../../datasets/monoterpenoides/dataset_10+.ds'} # node/edge symb
# Gn, y = loadDataset(ds['dataset'])
# print(Gn[1].nodes(data=True))
# print(Gn[1].edges(data=True))
# print(y[1])
# .gxl file.
ds = {'name': 'monoterpenoides',
'dataset': '../../datasets/monoterpenoides/dataset_10+.ds'} # node/edge symb
Gn, y, label_names = load_dataset(ds['dataset'])
print(Gn[1].graph)
print(Gn[1].nodes(data=True))
print(Gn[1].edges(data=True))
print(y[1])
# ### Convert graph from one format to another.
# # .gxl file.
@@ -774,5 +794,5 @@ if __name__ == '__main__':
# dataset = '../../datasets/Letter-med/Letter-med_A.txt'
# dataset = '../../datasets/AIDS/AIDS_A.txt'
# dataset = '../../datasets/ENZYMES_txt/ENZYMES_A_sparse.txt'
# Gn, targets = load_dataset(filename)
# Gn, targets, label_names = load_dataset(filename)
pass

Loading…
Cancel
Save