Browse Source

Update comments of tool functions.

v0.1
jajupmochi 6 years ago
parent
commit
221b528cb5
3 changed files with 116 additions and 13 deletions
  1. +51
    -0
      pygraph/utils/graphdataset.py
  2. +33
    -1
      pygraph/utils/graphfiles.py
  3. +32
    -12
      pygraph/utils/model_selection_precomputed.py

+ 51
- 0
pygraph/utils/graphdataset.py View File

@@ -7,6 +7,57 @@ def get_dataset_attributes(Gn,
attr_names=[],
node_label=None,
edge_label=None):
"""Returns the structure and property information of the graph dataset Gn.

Parameters
----------
Gn : List of NetworkX graph
List of graphs whose information will be returned.
target : list
The list of classification targets corresponding to Gn. Only works for
classification problems.
attr_names : list
List of strings which indicate which informations will be returned. The
possible choices includes:
'substructures': sub-structures Gn contains, including 'linear', 'non
linear' and 'cyclic'.
'node_labeled': whether vertices have symbolic labels.
'edge_labeled': whether egdes have symbolic labels.
'is_directed': whether graphs in Gn are directed.
'dataset_size': number of graphs in Gn.
'ave_node_num': average number of vertices of graphs in Gn.
'min_node_num': minimum number of vertices of graphs in Gn.
'max_node_num': maximum number of vertices of graphs in Gn.
'ave_edge_num': average number of edges of graphs in Gn.
'min_edge_num': minimum number of edges of graphs in Gn.
'max_edge_num': maximum number of edges of graphs in Gn.
'ave_node_degree': average vertex degree of graphs in Gn.
'min_node_degree': minimum vertex degree of graphs in Gn.
'max_node_degree': maximum vertex degree of graphs in Gn.
'ave_fill_factor': average fill factor (number_of_edges /
(number_of_nodes ** 2)) of graphs in Gn.
'min_fill_factor': minimum fill factor of graphs in Gn.
'max_fill_factor': maximum fill factor of graphs in Gn.
'node_label_num': number of symbolic vertex labels.
'edge_label_num': number of symbolic edge labels.
'node_attr_dim': number of dimensions of non-symbolic vertex labels.
Extracted from the 'attributes' attribute of graph nodes.
'edge_attr_dim': number of dimensions of non-symbolic edge labels.
Extracted from the 'attributes' attribute of graph edges.
'class_number': number of classes. Only available for classification
problems.
node_label : string
Node attribute used as label. The default node label is atom. Mandatory
when 'node_labeled' or 'node_label_num' is required.
edge_label : string
Edge attribute used as label. The default edge label is bond_type.
Mandatory when 'edge_labeled' or 'edge_label_num' is required.

Return
------
attrs : dict
Value for each property.
"""
import networkx as nx
import numpy as np



+ 33
- 1
pygraph/utils/graphfiles.py View File

@@ -372,7 +372,39 @@ def loadTXT(dirname_dataset):


def loadDataset(filename, filename_y=None, extra_params=None):
"""load file list of the dataset.
"""Read graph data from filename and load them as NetworkX graphs.

Parameters
----------
filename : string
The name of the file from where the dataset is read.
filename_y : string
The name of file of the targets corresponding to graphs.
extra_params : dict
Extra parameters only designated to '.mat' format.

Return
------
data : List of NetworkX graph.
y : List
Targets corresponding to graphs.
Notes
-----
This function supports following graph dataset formats:
'ds': load data from .ct file. See comments of function loadCT for a example.
'cxl': load data from Graph eXchange Language file (.cxl file). See
http://www.gupro.de/GXL/Introduction/background.html, 2019 for detail.
'sdf': load data from structured data file (.sdf file). See
http://www.nonlinear.com/progenesis/sdf-studio/v0.9/faq/sdf-file-format-guidance.aspx,
2018 for details.
'mat': Load graph data from a MATLAB (up to version 7.1) .mat file. See
README in downloadable file in http://mlcb.is.tuebingen.mpg.de/Mitarbeiter/Nino/WL/,
2018 for details.
'txt': Load graph data from a special .txt file. See
https://ls11-www.cs.tu-dortmund.de/staff/morris/graphkerneldatasets,
2019 for details. Note here filename is the name of either .txt file in
the dataset directory.
"""
from os.path import dirname, splitext



+ 32
- 12
pygraph/utils/model_selection_precomputed.py View File

@@ -34,7 +34,9 @@ def model_selection_for_precomputed_kernel(datafile,
n_jobs=1,
read_gm_from_file=False,
verbose=True):
"""Perform model selection, fitting and testing for precomputed kernels using nested cv. Print out neccessary data during the process then finally the results.
"""Perform model selection, fitting and testing for precomputed kernels
using nested CV. Print out neccessary data during the process then finally
the results.

Parameters
----------
@@ -43,17 +45,31 @@ def model_selection_for_precomputed_kernel(datafile,
estimator : function
kernel function used to estimate. This function needs to return a gram matrix.
param_grid_precomputed : dictionary
Dictionary with names (string) of parameters used to calculate gram matrices as keys and lists of parameter settings to try as values. This enables searching over any sequence of parameter settings. Params with length 1 will be omitted.
Dictionary with names (string) of parameters used to calculate gram
matrices as keys and lists of parameter settings to try as values. This
enables searching over any sequence of parameter settings. Params with
length 1 will be omitted.
param_grid : dictionary
Dictionary with names (string) of parameters used as penelties as keys and lists of parameter settings to try as values. This enables searching over any sequence of parameter settings. Params with length 1 will be omitted.
Dictionary with names (string) of parameters used as penelties as keys
and lists of parameter settings to try as values. This enables
searching over any sequence of parameter settings. Params with length 1
will be omitted.
model_type : string
Typr of the problem, can be regression or classification.
Typr of the problem, can be 'regression' or 'classification'.
NUM_TRIALS : integer
Number of random trials of outer cv loop. The default is 30.
datafile_y : string
Path of file storing y data. This parameter is optional depending on the given dataset file.
Path of file storing y data. This parameter is optional depending on
the given dataset file.
extra_params : dict
Extra parameters for loading dataset. See function pygraph.utils.
graphfiles.loadDataset for detail.
ds_name : string
Name of the dataset.
n_jobs : int
Number of jobs for parallelization.
read_gm_from_file : boolean
Whether gram matrices are loaded from file.
Whether gram matrices are loaded from a file.

Examples
--------
@@ -61,14 +77,18 @@ def model_selection_for_precomputed_kernel(datafile,
>>> import sys
>>> sys.path.insert(0, "../")
>>> from pygraph.utils.model_selection_precomputed import model_selection_for_precomputed_kernel
>>> from pygraph.kernels.weisfeilerLehmanKernel import weisfeilerlehmankernel
>>> from pygraph.kernels.untilHPathKernel import untilhpathkernel
>>>
>>> datafile = '../../../../datasets/acyclic/Acyclic/dataset_bps.ds'
>>> estimator = weisfeilerlehmankernel
>>> param_grid_precomputed = {'height': [0,1,2,3,4,5,6,7,8,9,10], 'base_kernel': ['subtree']}
>>> param_grid = {"alpha": np.logspace(-2, 2, num = 10, base = 10)}
>>> datafile = '../datasets/MUTAG/MUTAG_A.txt'
>>> estimator = untilhpathkernel
>>> param_grid_precomputed = {’depth’: np.linspace(1, 10, 10), ’k_func’:
[’MinMax’, ’tanimoto’], ’compute_method’: [’trie’]}
>>> # ’C’ for classification problems and ’alpha’ for regression problems.
>>> param_grid = [{’C’: np.logspace(-10, 10, num=41, base=10)}, {’alpha’:
np.logspace(-10, 10, num=41, base=10)}]
>>>
>>> model_selection_for_precomputed_kernel(datafile, estimator, param_grid_precomputed, param_grid, 'regression')
>>> model_selection_for_precomputed_kernel(datafile, estimator,
param_grid_precomputed, param_grid[0], 'classification', ds_name=’MUTAG’)
"""
tqdm.monitor_interval = 0



Loading…
Cancel
Save