Browse Source

Put deprecation warning of gklearn.utils.Dataset inside the __init__().

v0.2.x
jajupmochi 4 years ago
parent
commit
6326e22abc
3 changed files with 459 additions and 459 deletions
  1. +170
    -170
      gklearn/utils/dataset.py
  2. +285
    -285
      gklearn/utils/graphdataset.py
  3. +4
    -4
      gklearn/utils/kernels.py

+ 170
- 170
gklearn/utils/dataset.py View File

@@ -12,13 +12,13 @@ import os




class Dataset(object): class Dataset(object):
import warnings
warnings.simplefilter('always', DeprecationWarning)
warnings.warn('This class has been moved to "gklearn.dataset" module. The class "gklearn.utils.dataset.Dataset" has not been maintained since Nov 12th, 2020 (version 0.2.1) and will be removed since version 0.4.0.', DeprecationWarning)


def __init__(self, filename=None, filename_targets=None, **kwargs): def __init__(self, filename=None, filename_targets=None, **kwargs):
import warnings
warnings.simplefilter('always', DeprecationWarning)
warnings.warn('This class has been moved to "gklearn.dataset" module. The class "gklearn.utils.dataset.Dataset" has not been maintained since Nov 12th, 2020 (version 0.2.1) and will be removed since version 0.4.0.', DeprecationWarning)

if filename is None: if filename is None:
self._graphs = None self._graphs = None
self._targets = None self._targets = None
@@ -28,7 +28,7 @@ class Dataset(object):
self._edge_attrs = None self._edge_attrs = None
else: else:
self.load_dataset(filename, filename_targets=filename_targets, **kwargs) self.load_dataset(filename, filename_targets=filename_targets, **kwargs)
self._substructures = None self._substructures = None
self._node_label_dim = None self._node_label_dim = None
self._edge_label_dim = None self._edge_label_dim = None
@@ -53,8 +53,8 @@ class Dataset(object):
self._node_attr_dim = None self._node_attr_dim = None
self._edge_attr_dim = None self._edge_attr_dim = None
self._class_number = None self._class_number = None
def load_dataset(self, filename, filename_targets=None, **kwargs): def load_dataset(self, filename, filename_targets=None, **kwargs):
self._graphs, self._targets, label_names = load_dataset(filename, filename_targets=filename_targets, **kwargs) self._graphs, self._targets, label_names = load_dataset(filename, filename_targets=filename_targets, **kwargs)
self._node_labels = label_names['node_labels'] self._node_labels = label_names['node_labels']
@@ -62,15 +62,15 @@ class Dataset(object):
self._edge_labels = label_names['edge_labels'] self._edge_labels = label_names['edge_labels']
self._edge_attrs = label_names['edge_attrs'] self._edge_attrs = label_names['edge_attrs']
self.clean_labels() self.clean_labels()
def load_graphs(self, graphs, targets=None): def load_graphs(self, graphs, targets=None):
# this has to be followed by set_labels(). # this has to be followed by set_labels().
self._graphs = graphs self._graphs = graphs
self._targets = targets self._targets = targets
# self.set_labels_attrs() # @todo # self.set_labels_attrs() # @todo
def load_predefined_dataset(self, ds_name): def load_predefined_dataset(self, ds_name):
current_path = os.path.dirname(os.path.realpath(__file__)) + '/' current_path = os.path.dirname(os.path.realpath(__file__)) + '/'
if ds_name == 'Acyclic': if ds_name == 'Acyclic':
@@ -130,7 +130,7 @@ class Dataset(object):
self._graphs, self._targets, label_names = load_dataset(ds_file) self._graphs, self._targets, label_names = load_dataset(ds_file)
elif ds_name == 'NCI109': elif ds_name == 'NCI109':
ds_file = current_path + '../../datasets/NCI109/NCI109_A.txt' ds_file = current_path + '../../datasets/NCI109/NCI109_A.txt'
self._graphs, self._targets, label_names = load_dataset(ds_file)
self._graphs, self._targets, label_names = load_dataset(ds_file)
elif ds_name == 'PAH': elif ds_name == 'PAH':
ds_file = current_path + '../../datasets/PAH/dataset.ds' ds_file = current_path + '../../datasets/PAH/dataset.ds'
self._graphs, self._targets, label_names = load_dataset(ds_file) self._graphs, self._targets, label_names = load_dataset(ds_file)
@@ -143,13 +143,13 @@ class Dataset(object):
pass pass
else: else:
raise Exception('The dataset name "', ds_name, '" is not pre-defined.') raise Exception('The dataset name "', ds_name, '" is not pre-defined.')
self._node_labels = label_names['node_labels'] self._node_labels = label_names['node_labels']
self._node_attrs = label_names['node_attrs'] self._node_attrs = label_names['node_attrs']
self._edge_labels = label_names['edge_labels'] self._edge_labels = label_names['edge_labels']
self._edge_attrs = label_names['edge_attrs'] self._edge_attrs = label_names['edge_attrs']
self.clean_labels() self.clean_labels()


def set_labels(self, node_labels=[], node_attrs=[], edge_labels=[], edge_attrs=[]): def set_labels(self, node_labels=[], node_attrs=[], edge_labels=[], edge_attrs=[]):
self._node_labels = node_labels self._node_labels = node_labels
@@ -157,7 +157,7 @@ class Dataset(object):
self._edge_labels = edge_labels self._edge_labels = edge_labels
self._edge_attrs = edge_attrs self._edge_attrs = edge_attrs


def set_labels_attrs(self, node_labels=None, node_attrs=None, edge_labels=None, edge_attrs=None): def set_labels_attrs(self, node_labels=None, node_attrs=None, edge_labels=None, edge_attrs=None):
# @todo: remove labels which have only one possible values. # @todo: remove labels which have only one possible values.
if node_labels is None: if node_labels is None:
@@ -183,86 +183,86 @@ class Dataset(object):
# if 'attributes' in e[2]: # if 'attributes' in e[2]:
# return len(e[2]['attributes']) # return len(e[2]['attributes'])
# return 0 # return 0
def get_dataset_infos(self, keys=None, params=None): def get_dataset_infos(self, keys=None, params=None):
"""Computes and returns the structure and property information of the graph dataset. """Computes and returns the structure and property information of the graph dataset.
Parameters Parameters
---------- ----------
keys : list, optional keys : list, optional
A list of strings which indicate which informations will be returned. The A list of strings which indicate which informations will be returned. The
possible choices includes: possible choices includes:
'substructures': sub-structures graphs contains, including 'linear', 'non
'substructures': sub-structures graphs contains, including 'linear', 'non
linear' and 'cyclic'. linear' and 'cyclic'.
'node_label_dim': whether vertices have symbolic labels. 'node_label_dim': whether vertices have symbolic labels.
'edge_label_dim': whether egdes have symbolic labels. 'edge_label_dim': whether egdes have symbolic labels.
'directed': whether graphs in dataset are directed. 'directed': whether graphs in dataset are directed.
'dataset_size': number of graphs in dataset. 'dataset_size': number of graphs in dataset.
'total_node_num': total number of vertices of all graphs in dataset. 'total_node_num': total number of vertices of all graphs in dataset.
'ave_node_num': average number of vertices of graphs in dataset. 'ave_node_num': average number of vertices of graphs in dataset.
'min_node_num': minimum number of vertices of graphs in dataset. 'min_node_num': minimum number of vertices of graphs in dataset.
'max_node_num': maximum number of vertices of graphs in dataset. 'max_node_num': maximum number of vertices of graphs in dataset.
'total_edge_num': total number of edges of all graphs in dataset. 'total_edge_num': total number of edges of all graphs in dataset.
'ave_edge_num': average number of edges of graphs in dataset. 'ave_edge_num': average number of edges of graphs in dataset.
'min_edge_num': minimum number of edges of graphs in dataset. 'min_edge_num': minimum number of edges of graphs in dataset.
'max_edge_num': maximum number of edges of graphs in dataset. 'max_edge_num': maximum number of edges of graphs in dataset.
'ave_node_degree': average vertex degree of graphs in dataset. 'ave_node_degree': average vertex degree of graphs in dataset.
'min_node_degree': minimum vertex degree of graphs in dataset. 'min_node_degree': minimum vertex degree of graphs in dataset.
'max_node_degree': maximum vertex degree of graphs in dataset. 'max_node_degree': maximum vertex degree of graphs in dataset.
'ave_fill_factor': average fill factor (number_of_edges /
'ave_fill_factor': average fill factor (number_of_edges /
(number_of_nodes ** 2)) of graphs in dataset. (number_of_nodes ** 2)) of graphs in dataset.
'min_fill_factor': minimum fill factor of graphs in dataset. 'min_fill_factor': minimum fill factor of graphs in dataset.
'max_fill_factor': maximum fill factor of graphs in dataset. 'max_fill_factor': maximum fill factor of graphs in dataset.
'node_label_nums': list of numbers of symbolic vertex labels of graphs in dataset. 'node_label_nums': list of numbers of symbolic vertex labels of graphs in dataset.
'edge_label_nums': list number of symbolic edge labels of graphs in dataset. 'edge_label_nums': list number of symbolic edge labels of graphs in dataset.
'node_attr_dim': number of dimensions of non-symbolic vertex labels.
'node_attr_dim': number of dimensions of non-symbolic vertex labels.
Extracted from the 'attributes' attribute of graph nodes. Extracted from the 'attributes' attribute of graph nodes.
'edge_attr_dim': number of dimensions of non-symbolic edge labels.
'edge_attr_dim': number of dimensions of non-symbolic edge labels.
Extracted from the 'attributes' attribute of graph edges. Extracted from the 'attributes' attribute of graph edges.
'class_number': number of classes. Only available for classification problems. 'class_number': number of classes. Only available for classification problems.
'all_degree_entropy': the entropy of degree distribution of each graph. 'all_degree_entropy': the entropy of degree distribution of each graph.
'ave_degree_entropy': the average entropy of degree distribution of all graphs. 'ave_degree_entropy': the average entropy of degree distribution of all graphs.
All informations above will be returned if `keys` is not given. All informations above will be returned if `keys` is not given.
params: dict of dict, optional params: dict of dict, optional
A dictinary which contains extra parameters for each possible
A dictinary which contains extra parameters for each possible
element in ``keys``. element in ``keys``.
Return Return
------ ------
dict dict
Information of the graph dataset keyed by `keys`. Information of the graph dataset keyed by `keys`.
""" """
infos = {} infos = {}
if keys == None: if keys == None:
keys = [ keys = [
'substructures', 'substructures',
@@ -292,13 +292,13 @@ class Dataset(object):
'all_degree_entropy', 'all_degree_entropy',
'ave_degree_entropy' 'ave_degree_entropy'
] ]
# dataset size # dataset size
if 'dataset_size' in keys: if 'dataset_size' in keys:
if self._dataset_size is None: if self._dataset_size is None:
self._dataset_size = self._get_dataset_size() self._dataset_size = self._get_dataset_size()
infos['dataset_size'] = self._dataset_size infos['dataset_size'] = self._dataset_size
# graph node number # graph node number
if any(i in keys for i in ['total_node_num', 'ave_node_num', 'min_node_num', 'max_node_num']): if any(i in keys for i in ['total_node_num', 'ave_node_num', 'min_node_num', 'max_node_num']):
all_node_nums = self._get_all_node_nums() all_node_nums = self._get_all_node_nums()
@@ -307,22 +307,22 @@ class Dataset(object):
if self._total_node_num is None: if self._total_node_num is None:
self._total_node_num = self._get_total_node_num(all_node_nums) self._total_node_num = self._get_total_node_num(all_node_nums)
infos['total_node_num'] = self._total_node_num infos['total_node_num'] = self._total_node_num
if 'ave_node_num' in keys: if 'ave_node_num' in keys:
if self._ave_node_num is None: if self._ave_node_num is None:
self._ave_node_num = self._get_ave_node_num(all_node_nums) self._ave_node_num = self._get_ave_node_num(all_node_nums)
infos['ave_node_num'] = self._ave_node_num infos['ave_node_num'] = self._ave_node_num
if 'min_node_num' in keys: if 'min_node_num' in keys:
if self._min_node_num is None: if self._min_node_num is None:
self._min_node_num = self._get_min_node_num(all_node_nums) self._min_node_num = self._get_min_node_num(all_node_nums)
infos['min_node_num'] = self._min_node_num infos['min_node_num'] = self._min_node_num
if 'max_node_num' in keys: if 'max_node_num' in keys:
if self._max_node_num is None: if self._max_node_num is None:
self._max_node_num = self._get_max_node_num(all_node_nums) self._max_node_num = self._get_max_node_num(all_node_nums)
infos['max_node_num'] = self._max_node_num infos['max_node_num'] = self._max_node_num
# graph edge number # graph edge number
if any(i in keys for i in ['total_edge_num', 'ave_edge_num', 'min_edge_num', 'max_edge_num']): if any(i in keys for i in ['total_edge_num', 'ave_edge_num', 'min_edge_num', 'max_edge_num']):
all_edge_nums = self._get_all_edge_nums() all_edge_nums = self._get_all_edge_nums()
@@ -331,12 +331,12 @@ class Dataset(object):
if self._total_edge_num is None: if self._total_edge_num is None:
self._total_edge_num = self._get_total_edge_num(all_edge_nums) self._total_edge_num = self._get_total_edge_num(all_edge_nums)
infos['total_edge_num'] = self._total_edge_num infos['total_edge_num'] = self._total_edge_num
if 'ave_edge_num' in keys: if 'ave_edge_num' in keys:
if self._ave_edge_num is None: if self._ave_edge_num is None:
self._ave_edge_num = self._get_ave_edge_num(all_edge_nums) self._ave_edge_num = self._get_ave_edge_num(all_edge_nums)
infos['ave_edge_num'] = self._ave_edge_num infos['ave_edge_num'] = self._ave_edge_num
if 'max_edge_num' in keys: if 'max_edge_num' in keys:
if self._max_edge_num is None: if self._max_edge_num is None:
self._max_edge_num = self._get_max_edge_num(all_edge_nums) self._max_edge_num = self._get_max_edge_num(all_edge_nums)
@@ -346,120 +346,120 @@ class Dataset(object):
if self._min_edge_num is None: if self._min_edge_num is None:
self._min_edge_num = self._get_min_edge_num(all_edge_nums) self._min_edge_num = self._get_min_edge_num(all_edge_nums)
infos['min_edge_num'] = self._min_edge_num infos['min_edge_num'] = self._min_edge_num
# label number # label number
if 'node_label_dim' in keys: if 'node_label_dim' in keys:
if self._node_label_dim is None: if self._node_label_dim is None:
self._node_label_dim = self._get_node_label_dim() self._node_label_dim = self._get_node_label_dim()
infos['node_label_dim'] = self._node_label_dim
infos['node_label_dim'] = self._node_label_dim
if 'node_label_nums' in keys: if 'node_label_nums' in keys:
if self._node_label_nums is None: if self._node_label_nums is None:
self._node_label_nums = {} self._node_label_nums = {}
for node_label in self._node_labels: for node_label in self._node_labels:
self._node_label_nums[node_label] = self._get_node_label_num(node_label) self._node_label_nums[node_label] = self._get_node_label_num(node_label)
infos['node_label_nums'] = self._node_label_nums infos['node_label_nums'] = self._node_label_nums
if 'edge_label_dim' in keys: if 'edge_label_dim' in keys:
if self._edge_label_dim is None: if self._edge_label_dim is None:
self._edge_label_dim = self._get_edge_label_dim() self._edge_label_dim = self._get_edge_label_dim()
infos['edge_label_dim'] = self._edge_label_dim
infos['edge_label_dim'] = self._edge_label_dim
if 'edge_label_nums' in keys: if 'edge_label_nums' in keys:
if self._edge_label_nums is None: if self._edge_label_nums is None:
self._edge_label_nums = {} self._edge_label_nums = {}
for edge_label in self._edge_labels: for edge_label in self._edge_labels:
self._edge_label_nums[edge_label] = self._get_edge_label_num(edge_label) self._edge_label_nums[edge_label] = self._get_edge_label_num(edge_label)
infos['edge_label_nums'] = self._edge_label_nums infos['edge_label_nums'] = self._edge_label_nums
if 'directed' in keys or 'substructures' in keys: if 'directed' in keys or 'substructures' in keys:
if self._directed is None: if self._directed is None:
self._directed = self._is_directed() self._directed = self._is_directed()
infos['directed'] = self._directed infos['directed'] = self._directed
# node degree # node degree
if any(i in keys for i in ['ave_node_degree', 'max_node_degree', 'min_node_degree']): if any(i in keys for i in ['ave_node_degree', 'max_node_degree', 'min_node_degree']):
all_node_degrees = self._get_all_node_degrees() all_node_degrees = self._get_all_node_degrees()
if 'ave_node_degree' in keys: if 'ave_node_degree' in keys:
if self._ave_node_degree is None: if self._ave_node_degree is None:
self._ave_node_degree = self._get_ave_node_degree(all_node_degrees) self._ave_node_degree = self._get_ave_node_degree(all_node_degrees)
infos['ave_node_degree'] = self._ave_node_degree infos['ave_node_degree'] = self._ave_node_degree
if 'max_node_degree' in keys: if 'max_node_degree' in keys:
if self._max_node_degree is None: if self._max_node_degree is None:
self._max_node_degree = self._get_max_node_degree(all_node_degrees) self._max_node_degree = self._get_max_node_degree(all_node_degrees)
infos['max_node_degree'] = self._max_node_degree infos['max_node_degree'] = self._max_node_degree
if 'min_node_degree' in keys: if 'min_node_degree' in keys:
if self._min_node_degree is None: if self._min_node_degree is None:
self._min_node_degree = self._get_min_node_degree(all_node_degrees) self._min_node_degree = self._get_min_node_degree(all_node_degrees)
infos['min_node_degree'] = self._min_node_degree infos['min_node_degree'] = self._min_node_degree
# fill factor # fill factor
if any(i in keys for i in ['ave_fill_factor', 'max_fill_factor', 'min_fill_factor']): if any(i in keys for i in ['ave_fill_factor', 'max_fill_factor', 'min_fill_factor']):
all_fill_factors = self._get_all_fill_factors() all_fill_factors = self._get_all_fill_factors()
if 'ave_fill_factor' in keys: if 'ave_fill_factor' in keys:
if self._ave_fill_factor is None: if self._ave_fill_factor is None:
self._ave_fill_factor = self._get_ave_fill_factor(all_fill_factors) self._ave_fill_factor = self._get_ave_fill_factor(all_fill_factors)
infos['ave_fill_factor'] = self._ave_fill_factor infos['ave_fill_factor'] = self._ave_fill_factor
if 'max_fill_factor' in keys: if 'max_fill_factor' in keys:
if self._max_fill_factor is None: if self._max_fill_factor is None:
self._max_fill_factor = self._get_max_fill_factor(all_fill_factors) self._max_fill_factor = self._get_max_fill_factor(all_fill_factors)
infos['max_fill_factor'] = self._max_fill_factor infos['max_fill_factor'] = self._max_fill_factor
if 'min_fill_factor' in keys: if 'min_fill_factor' in keys:
if self._min_fill_factor is None: if self._min_fill_factor is None:
self._min_fill_factor = self._get_min_fill_factor(all_fill_factors) self._min_fill_factor = self._get_min_fill_factor(all_fill_factors)
infos['min_fill_factor'] = self._min_fill_factor infos['min_fill_factor'] = self._min_fill_factor
if 'substructures' in keys: if 'substructures' in keys:
if self._substructures is None: if self._substructures is None:
self._substructures = self._get_substructures() self._substructures = self._get_substructures()
infos['substructures'] = self._substructures infos['substructures'] = self._substructures
if 'class_number' in keys: if 'class_number' in keys:
if self._class_number is None: if self._class_number is None:
self._class_number = self._get_class_number() self._class_number = self._get_class_number()
infos['class_number'] = self._class_number infos['class_number'] = self._class_number
if 'node_attr_dim' in keys: if 'node_attr_dim' in keys:
if self._node_attr_dim is None: if self._node_attr_dim is None:
self._node_attr_dim = self._get_node_attr_dim() self._node_attr_dim = self._get_node_attr_dim()
infos['node_attr_dim'] = self._node_attr_dim infos['node_attr_dim'] = self._node_attr_dim
if 'edge_attr_dim' in keys: if 'edge_attr_dim' in keys:
if self._edge_attr_dim is None: if self._edge_attr_dim is None:
self._edge_attr_dim = self._get_edge_attr_dim() self._edge_attr_dim = self._get_edge_attr_dim()
infos['edge_attr_dim'] = self._edge_attr_dim infos['edge_attr_dim'] = self._edge_attr_dim
# entropy of degree distribution. # entropy of degree distribution.
if 'all_degree_entropy' in keys: if 'all_degree_entropy' in keys:
if params is not None and ('all_degree_entropy' in params) and ('base' in params['all_degree_entropy']): if params is not None and ('all_degree_entropy' in params) and ('base' in params['all_degree_entropy']):
base = params['all_degree_entropy']['base'] base = params['all_degree_entropy']['base']
else: else:
base = None base = None
infos['all_degree_entropy'] = self._compute_all_degree_entropy(base=base) infos['all_degree_entropy'] = self._compute_all_degree_entropy(base=base)
if 'ave_degree_entropy' in keys: if 'ave_degree_entropy' in keys:
if params is not None and ('ave_degree_entropy' in params) and ('base' in params['ave_degree_entropy']): if params is not None and ('ave_degree_entropy' in params) and ('base' in params['ave_degree_entropy']):
base = params['ave_degree_entropy']['base'] base = params['ave_degree_entropy']['base']
else: else:
base = None base = None
infos['ave_degree_entropy'] = np.mean(self._compute_all_degree_entropy(base=base)) infos['ave_degree_entropy'] = np.mean(self._compute_all_degree_entropy(base=base))
return infos return infos
def print_graph_infos(self, infos): def print_graph_infos(self, infos):
from collections import OrderedDict from collections import OrderedDict
keys = list(infos.keys()) keys = list(infos.keys())
print(OrderedDict(sorted(infos.items(), key=lambda i: keys.index(i[0])))) print(OrderedDict(sorted(infos.items(), key=lambda i: keys.index(i[0]))))
def remove_labels(self, node_labels=[], edge_labels=[], node_attrs=[], edge_attrs=[]): def remove_labels(self, node_labels=[], edge_labels=[], node_attrs=[], edge_attrs=[]):
node_labels = [item for item in node_labels if item in self._node_labels] node_labels = [item for item in node_labels if item in self._node_labels]
edge_labels = [item for item in edge_labels if item in self._edge_labels] edge_labels = [item for item in edge_labels if item in self._edge_labels]
@@ -485,8 +485,8 @@ class Dataset(object):
self._node_attrs = [na for na in self._node_attrs if na not in node_attrs] self._node_attrs = [na for na in self._node_attrs if na not in node_attrs]
if len(edge_attrs) > 0: if len(edge_attrs) > 0:
self._edge_attrs = [ea for ea in self._edge_attrs if ea not in edge_attrs] self._edge_attrs = [ea for ea in self._edge_attrs if ea not in edge_attrs]
def clean_labels(self): def clean_labels(self):
labels = [] labels = []
for name in self._node_labels: for name in self._node_labels:
@@ -543,8 +543,8 @@ class Dataset(object):
for ed in G.edges(): for ed in G.edges():
del G.edges[ed][name] del G.edges[ed][name]
self._edge_attrs = labels self._edge_attrs = labels
def cut_graphs(self, range_): def cut_graphs(self, range_):
self._graphs = [self._graphs[i] for i in range_] self._graphs = [self._graphs[i] for i in range_]
if self._targets is not None: if self._targets is not None:
@@ -561,8 +561,8 @@ class Dataset(object):
self._graphs = [p[1] for p in trimed_pairs] self._graphs = [p[1] for p in trimed_pairs]
self._targets = [self._targets[i] for i in idx] self._targets = [self._targets[i] for i in idx]
self.clean_labels() self.clean_labels()
def copy(self): def copy(self):
dataset = Dataset() dataset = Dataset()
graphs = [g.copy() for g in self._graphs] if self._graphs is not None else None graphs = [g.copy() for g in self._graphs] if self._graphs is not None else None
@@ -575,8 +575,8 @@ class Dataset(object):
dataset.set_labels(node_labels=node_labels, node_attrs=node_attrs, edge_labels=edge_labels, edge_attrs=edge_attrs) dataset.set_labels(node_labels=node_labels, node_attrs=node_attrs, edge_labels=edge_labels, edge_attrs=edge_attrs)
# @todo: clean_labels and add other class members? # @todo: clean_labels and add other class members?
return dataset return dataset
def get_all_node_labels(self): def get_all_node_labels(self):
node_labels = [] node_labels = []
for g in self._graphs: for g in self._graphs:
@@ -585,8 +585,8 @@ class Dataset(object):
if nl not in node_labels: if nl not in node_labels:
node_labels.append(nl) node_labels.append(nl)
return node_labels return node_labels
def get_all_edge_labels(self): def get_all_edge_labels(self):
edge_labels = [] edge_labels = []
for g in self._graphs: for g in self._graphs:
@@ -595,94 +595,94 @@ class Dataset(object):
if el not in edge_labels: if el not in edge_labels:
edge_labels.append(el) edge_labels.append(el)
return edge_labels return edge_labels
def _get_dataset_size(self): def _get_dataset_size(self):
return len(self._graphs) return len(self._graphs)
def _get_all_node_nums(self): def _get_all_node_nums(self):
return [nx.number_of_nodes(G) for G in self._graphs] return [nx.number_of_nodes(G) for G in self._graphs]
def _get_total_node_nums(self, all_node_nums): def _get_total_node_nums(self, all_node_nums):
return np.sum(all_node_nums) return np.sum(all_node_nums)
def _get_ave_node_num(self, all_node_nums): def _get_ave_node_num(self, all_node_nums):
return np.mean(all_node_nums) return np.mean(all_node_nums)
def _get_min_node_num(self, all_node_nums): def _get_min_node_num(self, all_node_nums):
return np.amin(all_node_nums) return np.amin(all_node_nums)
def _get_max_node_num(self, all_node_nums): def _get_max_node_num(self, all_node_nums):
return np.amax(all_node_nums) return np.amax(all_node_nums)
def _get_all_edge_nums(self): def _get_all_edge_nums(self):
return [nx.number_of_edges(G) for G in self._graphs] return [nx.number_of_edges(G) for G in self._graphs]
def _get_total_edge_nums(self, all_edge_nums): def _get_total_edge_nums(self, all_edge_nums):
return np.sum(all_edge_nums) return np.sum(all_edge_nums)
def _get_ave_edge_num(self, all_edge_nums): def _get_ave_edge_num(self, all_edge_nums):
return np.mean(all_edge_nums) return np.mean(all_edge_nums)
def _get_min_edge_num(self, all_edge_nums): def _get_min_edge_num(self, all_edge_nums):
return np.amin(all_edge_nums) return np.amin(all_edge_nums)
def _get_max_edge_num(self, all_edge_nums): def _get_max_edge_num(self, all_edge_nums):
return np.amax(all_edge_nums) return np.amax(all_edge_nums)
def _get_node_label_dim(self): def _get_node_label_dim(self):
return len(self._node_labels) return len(self._node_labels)
def _get_node_label_num(self, node_label): def _get_node_label_num(self, node_label):
nl = set() nl = set()
for G in self._graphs: for G in self._graphs:
nl = nl | set(nx.get_node_attributes(G, node_label).values()) nl = nl | set(nx.get_node_attributes(G, node_label).values())
return len(nl) return len(nl)
def _get_edge_label_dim(self): def _get_edge_label_dim(self):
return len(self._edge_labels) return len(self._edge_labels)
def _get_edge_label_num(self, edge_label): def _get_edge_label_num(self, edge_label):
el = set() el = set()
for G in self._graphs: for G in self._graphs:
el = el | set(nx.get_edge_attributes(G, edge_label).values()) el = el | set(nx.get_edge_attributes(G, edge_label).values())
return len(el) return len(el)
def _is_directed(self): def _is_directed(self):
return nx.is_directed(self._graphs[0]) return nx.is_directed(self._graphs[0])
def _get_all_node_degrees(self): def _get_all_node_degrees(self):
return [np.mean(list(dict(G.degree()).values())) for G in self._graphs] return [np.mean(list(dict(G.degree()).values())) for G in self._graphs]
def _get_ave_node_degree(self, all_node_degrees): def _get_ave_node_degree(self, all_node_degrees):
return np.mean(all_node_degrees) return np.mean(all_node_degrees)
def _get_max_node_degree(self, all_node_degrees): def _get_max_node_degree(self, all_node_degrees):
return np.amax(all_node_degrees) return np.amax(all_node_degrees)
def _get_min_node_degree(self, all_node_degrees): def _get_min_node_degree(self, all_node_degrees):
return np.amin(all_node_degrees) return np.amin(all_node_degrees)
def _get_all_fill_factors(self): def _get_all_fill_factors(self):
"""Get fill factor, the number of non-zero entries in the adjacency matrix. """Get fill factor, the number of non-zero entries in the adjacency matrix.


@@ -692,20 +692,20 @@ class Dataset(object):
List of fill factors for all graphs. List of fill factors for all graphs.
""" """
return [nx.number_of_edges(G) / (nx.number_of_nodes(G) ** 2) for G in self._graphs] return [nx.number_of_edges(G) / (nx.number_of_nodes(G) ** 2) for G in self._graphs]


def _get_ave_fill_factor(self, all_fill_factors): def _get_ave_fill_factor(self, all_fill_factors):
return np.mean(all_fill_factors) return np.mean(all_fill_factors)
def _get_max_fill_factor(self, all_fill_factors): def _get_max_fill_factor(self, all_fill_factors):
return np.amax(all_fill_factors) return np.amax(all_fill_factors)
def _get_min_fill_factor(self, all_fill_factors): def _get_min_fill_factor(self, all_fill_factors):
return np.amin(all_fill_factors) return np.amin(all_fill_factors)
def _get_substructures(self): def _get_substructures(self):
subs = set() subs = set()
for G in self._graphs: for G in self._graphs:
@@ -737,22 +737,22 @@ class Dataset(object):
# if any(len(i) > 2 for i in cyc): # if any(len(i) > 2 for i in cyc):
# subs.add('cyclic') # subs.add('cyclic')
# break # break
return subs return subs
def _get_class_num(self): def _get_class_num(self):
return len(set(self._targets)) return len(set(self._targets))
def _get_node_attr_dim(self): def _get_node_attr_dim(self):
return len(self._node_attrs) return len(self._node_attrs)
def _get_edge_attr_dim(self): def _get_edge_attr_dim(self):
return len(self._edge_attrs) return len(self._edge_attrs)


def _compute_all_degree_entropy(self, base=None): def _compute_all_degree_entropy(self, base=None):
"""Compute the entropy of degree distribution of each graph. """Compute the entropy of degree distribution of each graph.


@@ -767,15 +767,15 @@ class Dataset(object):
The calculated entropy. The calculated entropy.
""" """
from gklearn.utils.stats import entropy from gklearn.utils.stats import entropy
degree_entropy = [] degree_entropy = []
for g in self._graphs: for g in self._graphs:
degrees = list(dict(g.degree()).values()) degrees = list(dict(g.degree()).values())
en = entropy(degrees, base=base) en = entropy(degrees, base=base)
degree_entropy.append(en) degree_entropy.append(en)
return degree_entropy return degree_entropy
@property @property
def graphs(self): def graphs(self):
return self._graphs return self._graphs
@@ -784,8 +784,8 @@ class Dataset(object):
@property @property
def targets(self): def targets(self):
return self._targets return self._targets
@property @property
def node_labels(self): def node_labels(self):
return self._node_labels return self._node_labels
@@ -794,25 +794,25 @@ class Dataset(object):
@property @property
def edge_labels(self): def edge_labels(self):
return self._edge_labels return self._edge_labels
@property @property
def node_attrs(self): def node_attrs(self):
return self._node_attrs return self._node_attrs
@property @property
def edge_attrs(self): def edge_attrs(self):
return self._edge_attrs return self._edge_attrs
def split_dataset_by_target(dataset): def split_dataset_by_target(dataset):
import warnings import warnings
warnings.simplefilter('always', DeprecationWarning) warnings.simplefilter('always', DeprecationWarning)
warnings.warn('This function has been moved to "gklearn.dataset" module. The function "gklearn.utils.dataset.split_dataset_by_target" has not been maintained since Nov 12th, 2020 (version 0.2.1) and will be removed since version 0.4.0.', DeprecationWarning) warnings.warn('This function has been moved to "gklearn.dataset" module. The function "gklearn.utils.dataset.split_dataset_by_target" has not been maintained since Nov 12th, 2020 (version 0.2.1) and will be removed since version 0.4.0.', DeprecationWarning)
from gklearn.preimage.utils import get_same_item_indices from gklearn.preimage.utils import get_same_item_indices
graphs = dataset.graphs graphs = dataset.graphs
targets = dataset.targets targets = dataset.targets
datasets = [] datasets = []


+ 285
- 285
gklearn/utils/graphdataset.py View File

@@ -5,345 +5,345 @@ This file is for old version of graphkit-learn.




def get_dataset_attributes(Gn, def get_dataset_attributes(Gn,
target=None,
attr_names=[],
node_label=None,
edge_label=None):
"""Returns the structure and property information of the graph dataset Gn.
Parameters
----------
Gn : List of NetworkX graph
List of graphs whose information will be returned.
target : list
The list of classification targets corresponding to Gn. Only works for
classification problems.
attr_names : list
List of strings which indicate which informations will be returned. The
possible choices includes:
'substructures': sub-structures Gn contains, including 'linear', 'non
target=None,
attr_names=[],
node_label=None,
edge_label=None):
"""Returns the structure and property information of the graph dataset Gn.
Parameters
----------
Gn : List of NetworkX graph
List of graphs whose information will be returned.
target : list
The list of classification targets corresponding to Gn. Only works for
classification problems.
attr_names : list
List of strings which indicate which informations will be returned. The
possible choices includes:
'substructures': sub-structures Gn contains, including 'linear', 'non
linear' and 'cyclic'. linear' and 'cyclic'.


'node_labeled': whether vertices have symbolic labels.
'node_labeled': whether vertices have symbolic labels.


'edge_labeled': whether egdes have symbolic labels.
'edge_labeled': whether egdes have symbolic labels.


'is_directed': whether graphs in Gn are directed.
'is_directed': whether graphs in Gn are directed.


'dataset_size': number of graphs in Gn.
'dataset_size': number of graphs in Gn.


'ave_node_num': average number of vertices of graphs in Gn.
'ave_node_num': average number of vertices of graphs in Gn.


'min_node_num': minimum number of vertices of graphs in Gn.
'min_node_num': minimum number of vertices of graphs in Gn.


'max_node_num': maximum number of vertices of graphs in Gn.
'max_node_num': maximum number of vertices of graphs in Gn.


'ave_edge_num': average number of edges of graphs in Gn.
'ave_edge_num': average number of edges of graphs in Gn.


'min_edge_num': minimum number of edges of graphs in Gn.
'min_edge_num': minimum number of edges of graphs in Gn.


'max_edge_num': maximum number of edges of graphs in Gn.
'max_edge_num': maximum number of edges of graphs in Gn.


'ave_node_degree': average vertex degree of graphs in Gn.
'ave_node_degree': average vertex degree of graphs in Gn.


'min_node_degree': minimum vertex degree of graphs in Gn.
'min_node_degree': minimum vertex degree of graphs in Gn.


'max_node_degree': maximum vertex degree of graphs in Gn.
'max_node_degree': maximum vertex degree of graphs in Gn.


'ave_fill_factor': average fill factor (number_of_edges /
'ave_fill_factor': average fill factor (number_of_edges /
(number_of_nodes ** 2)) of graphs in Gn. (number_of_nodes ** 2)) of graphs in Gn.


'min_fill_factor': minimum fill factor of graphs in Gn.
'min_fill_factor': minimum fill factor of graphs in Gn.


'max_fill_factor': maximum fill factor of graphs in Gn.
'max_fill_factor': maximum fill factor of graphs in Gn.


'node_label_num': number of symbolic vertex labels.
'node_label_num': number of symbolic vertex labels.


'edge_label_num': number of symbolic edge labels.
'edge_label_num': number of symbolic edge labels.


'node_attr_dim': number of dimensions of non-symbolic vertex labels.
'node_attr_dim': number of dimensions of non-symbolic vertex labels.
Extracted from the 'attributes' attribute of graph nodes. Extracted from the 'attributes' attribute of graph nodes.


'edge_attr_dim': number of dimensions of non-symbolic edge labels.
'edge_attr_dim': number of dimensions of non-symbolic edge labels.
Extracted from the 'attributes' attribute of graph edges. Extracted from the 'attributes' attribute of graph edges.


'class_number': number of classes. Only available for classification problems.
'class_number': number of classes. Only available for classification problems.


node_label : string
Node attribute used as label. The default node label is atom. Mandatory
when 'node_labeled' or 'node_label_num' is required.
node_label : string
Node attribute used as label. The default node label is atom. Mandatory
when 'node_labeled' or 'node_label_num' is required.


edge_label : string
Edge attribute used as label. The default edge label is bond_type.
Mandatory when 'edge_labeled' or 'edge_label_num' is required.

Return
------
attrs : dict
Value for each property.
"""
import networkx as nx
import numpy as np

attrs = {}

def get_dataset_size(Gn):
return len(Gn)

def get_all_node_num(Gn):
return [nx.number_of_nodes(G) for G in Gn]

def get_ave_node_num(all_node_num):
return np.mean(all_node_num)

def get_min_node_num(all_node_num):
return np.amin(all_node_num)

def get_max_node_num(all_node_num):
return np.amax(all_node_num)

def get_all_edge_num(Gn):
return [nx.number_of_edges(G) for G in Gn]

def get_ave_edge_num(all_edge_num):
return np.mean(all_edge_num)

def get_min_edge_num(all_edge_num):
return np.amin(all_edge_num)

def get_max_edge_num(all_edge_num):
return np.amax(all_edge_num)

def is_node_labeled(Gn):
return False if node_label is None else True

def get_node_label_num(Gn):
nl = set()
for G in Gn:
nl = nl | set(nx.get_node_attributes(G, node_label).values())
return len(nl)

def is_edge_labeled(Gn):
return False if edge_label is None else True

def get_edge_label_num(Gn):
el = set()
for G in Gn:
el = el | set(nx.get_edge_attributes(G, edge_label).values())
return len(el)

def is_directed(Gn):
return nx.is_directed(Gn[0])

def get_ave_node_degree(Gn):
return np.mean([np.mean(list(dict(G.degree()).values())) for G in Gn])

def get_max_node_degree(Gn):
return np.amax([np.mean(list(dict(G.degree()).values())) for G in Gn])

def get_min_node_degree(Gn):
return np.amin([np.mean(list(dict(G.degree()).values())) for G in Gn])
# get fill factor, the number of non-zero entries in the adjacency matrix.
def get_ave_fill_factor(Gn):
return np.mean([nx.number_of_edges(G) / (nx.number_of_nodes(G)
* nx.number_of_nodes(G)) for G in Gn])

def get_max_fill_factor(Gn):
return np.amax([nx.number_of_edges(G) / (nx.number_of_nodes(G)
* nx.number_of_nodes(G)) for G in Gn])

def get_min_fill_factor(Gn):
return np.amin([nx.number_of_edges(G) / (nx.number_of_nodes(G)
* nx.number_of_nodes(G)) for G in Gn])

def get_substructures(Gn):
subs = set()
for G in Gn:
degrees = list(dict(G.degree()).values())
if any(i == 2 for i in degrees):
subs.add('linear')
if np.amax(degrees) >= 3:
subs.add('non linear')
if 'linear' in subs and 'non linear' in subs:
break

if is_directed(Gn):
for G in Gn:
if len(list(nx.find_cycle(G))) > 0:
subs.add('cyclic')
break
# else:
# # @todo: this method does not work for big graph with large amount of edges like D&D, try a better way.
# upper = np.amin([nx.number_of_edges(G) for G in Gn]) * 2 + 10
# for G in Gn:
# if (nx.number_of_edges(G) < upper):
# cyc = list(nx.simple_cycles(G.to_directed()))
# if any(len(i) > 2 for i in cyc):
# subs.add('cyclic')
# break
# if 'cyclic' not in subs:
# for G in Gn:
# cyc = list(nx.simple_cycles(G.to_directed()))
# if any(len(i) > 2 for i in cyc):
# subs.add('cyclic')
# break

return subs

def get_class_num(target):
return len(set(target))

def get_node_attr_dim(Gn):
for G in Gn:
for n in G.nodes(data=True):
if 'attributes' in n[1]:
return len(n[1]['attributes'])
return 0

def get_edge_attr_dim(Gn):
for G in Gn:
if nx.number_of_edges(G) > 0:
for e in G.edges(data=True):
if 'attributes' in e[2]:
return len(e[2]['attributes'])
return 0

if attr_names == []:
attr_names = [
'substructures',
'node_labeled',
'edge_labeled',
'is_directed',
'dataset_size',
'ave_node_num',
'min_node_num',
'max_node_num',
'ave_edge_num',
'min_edge_num',
'max_edge_num',
'ave_node_degree',
'min_node_degree',
'max_node_degree',
'ave_fill_factor',
'min_fill_factor',
'max_fill_factor',
'node_label_num',
'edge_label_num',
'node_attr_dim',
'edge_attr_dim',
'class_number',
]

# dataset size
if 'dataset_size' in attr_names:

attrs.update({'dataset_size': get_dataset_size(Gn)})

# graph node number
if any(i in attr_names
for i in ['ave_node_num', 'min_node_num', 'max_node_num']):

all_node_num = get_all_node_num(Gn)

if 'ave_node_num' in attr_names:

attrs.update({'ave_node_num': get_ave_node_num(all_node_num)})

if 'min_node_num' in attr_names:

attrs.update({'min_node_num': get_min_node_num(all_node_num)})

if 'max_node_num' in attr_names:

attrs.update({'max_node_num': get_max_node_num(all_node_num)})

# graph edge number
if any(i in attr_names for i in
['ave_edge_num', 'min_edge_num', 'max_edge_num']):

all_edge_num = get_all_edge_num(Gn)
edge_label : string
Edge attribute used as label. The default edge label is bond_type.
Mandatory when 'edge_labeled' or 'edge_label_num' is required.


if 'ave_edge_num' in attr_names:
Return
------
attrs : dict
Value for each property.
"""
import networkx as nx
import numpy as np

attrs = {}

def get_dataset_size(Gn):
return len(Gn)

def get_all_node_num(Gn):
return [nx.number_of_nodes(G) for G in Gn]

def get_ave_node_num(all_node_num):
return np.mean(all_node_num)

def get_min_node_num(all_node_num):
return np.amin(all_node_num)

def get_max_node_num(all_node_num):
return np.amax(all_node_num)

def get_all_edge_num(Gn):
return [nx.number_of_edges(G) for G in Gn]

def get_ave_edge_num(all_edge_num):
return np.mean(all_edge_num)

def get_min_edge_num(all_edge_num):
return np.amin(all_edge_num)

def get_max_edge_num(all_edge_num):
return np.amax(all_edge_num)

def is_node_labeled(Gn):
return False if node_label is None else True

def get_node_label_num(Gn):
nl = set()
for G in Gn:
nl = nl | set(nx.get_node_attributes(G, node_label).values())
return len(nl)

def is_edge_labeled(Gn):
return False if edge_label is None else True

def get_edge_label_num(Gn):
el = set()
for G in Gn:
el = el | set(nx.get_edge_attributes(G, edge_label).values())
return len(el)

def is_directed(Gn):
return nx.is_directed(Gn[0])

def get_ave_node_degree(Gn):
return np.mean([np.mean(list(dict(G.degree()).values())) for G in Gn])

def get_max_node_degree(Gn):
return np.amax([np.mean(list(dict(G.degree()).values())) for G in Gn])

def get_min_node_degree(Gn):
return np.amin([np.mean(list(dict(G.degree()).values())) for G in Gn])

# get fill factor, the number of non-zero entries in the adjacency matrix.
def get_ave_fill_factor(Gn):
return np.mean([nx.number_of_edges(G) / (nx.number_of_nodes(G)
* nx.number_of_nodes(G)) for G in Gn])

def get_max_fill_factor(Gn):
return np.amax([nx.number_of_edges(G) / (nx.number_of_nodes(G)
* nx.number_of_nodes(G)) for G in Gn])

def get_min_fill_factor(Gn):
return np.amin([nx.number_of_edges(G) / (nx.number_of_nodes(G)
* nx.number_of_nodes(G)) for G in Gn])

def get_substructures(Gn):
subs = set()
for G in Gn:
degrees = list(dict(G.degree()).values())
if any(i == 2 for i in degrees):
subs.add('linear')
if np.amax(degrees) >= 3:
subs.add('non linear')
if 'linear' in subs and 'non linear' in subs:
break

if is_directed(Gn):
for G in Gn:
if len(list(nx.find_cycle(G))) > 0:
subs.add('cyclic')
break
# else:
# # @todo: this method does not work for big graph with large amount of edges like D&D, try a better way.
# upper = np.amin([nx.number_of_edges(G) for G in Gn]) * 2 + 10
# for G in Gn:
# if (nx.number_of_edges(G) < upper):
# cyc = list(nx.simple_cycles(G.to_directed()))
# if any(len(i) > 2 for i in cyc):
# subs.add('cyclic')
# break
# if 'cyclic' not in subs:
# for G in Gn:
# cyc = list(nx.simple_cycles(G.to_directed()))
# if any(len(i) > 2 for i in cyc):
# subs.add('cyclic')
# break

return subs

def get_class_num(target):
return len(set(target))

def get_node_attr_dim(Gn):
for G in Gn:
for n in G.nodes(data=True):
if 'attributes' in n[1]:
return len(n[1]['attributes'])
return 0

def get_edge_attr_dim(Gn):
for G in Gn:
if nx.number_of_edges(G) > 0:
for e in G.edges(data=True):
if 'attributes' in e[2]:
return len(e[2]['attributes'])
return 0

if attr_names == []:
attr_names = [
'substructures',
'node_labeled',
'edge_labeled',
'is_directed',
'dataset_size',
'ave_node_num',
'min_node_num',
'max_node_num',
'ave_edge_num',
'min_edge_num',
'max_edge_num',
'ave_node_degree',
'min_node_degree',
'max_node_degree',
'ave_fill_factor',
'min_fill_factor',
'max_fill_factor',
'node_label_num',
'edge_label_num',
'node_attr_dim',
'edge_attr_dim',
'class_number',
]

# dataset size
if 'dataset_size' in attr_names:


attrs.update({'ave_edge_num': get_ave_edge_num(all_edge_num)})
attrs.update({'dataset_size': get_dataset_size(Gn)})


if 'max_edge_num' in attr_names:
# graph node number
if any(i in attr_names
for i in ['ave_node_num', 'min_node_num', 'max_node_num']):


attrs.update({'max_edge_num': get_max_edge_num(all_edge_num)})
all_node_num = get_all_node_num(Gn)

if 'ave_node_num' in attr_names:


if 'min_edge_num' in attr_names:
attrs.update({'ave_node_num': get_ave_node_num(all_node_num)})

if 'min_node_num' in attr_names:


attrs.update({'min_edge_num': get_min_edge_num(all_edge_num)})
attrs.update({'min_node_num': get_min_node_num(all_node_num)})

if 'max_node_num' in attr_names:


# label number
if any(i in attr_names for i in ['node_labeled', 'node_label_num']):
is_nl = is_node_labeled(Gn)
node_label_num = get_node_label_num(Gn)
attrs.update({'max_node_num': get_max_node_num(all_node_num)})

# graph edge number
if any(i in attr_names for i in
['ave_edge_num', 'min_edge_num', 'max_edge_num']):


if 'node_labeled' in attr_names:
# graphs are considered node unlabeled if all nodes have the same label.
attrs.update({'node_labeled': is_nl if node_label_num > 1 else False})
all_edge_num = get_all_edge_num(Gn)


if 'node_label_num' in attr_names:
attrs.update({'node_label_num': node_label_num})
if 'ave_edge_num' in attr_names:


if any(i in attr_names for i in ['edge_labeled', 'edge_label_num']):
is_el = is_edge_labeled(Gn)
edge_label_num = get_edge_label_num(Gn)
attrs.update({'ave_edge_num': get_ave_edge_num(all_edge_num)})


if 'edge_labeled' in attr_names:
# graphs are considered edge unlabeled if all edges have the same label.
attrs.update({'edge_labeled': is_el if edge_label_num > 1 else False})
if 'max_edge_num' in attr_names:


if 'edge_label_num' in attr_names:
attrs.update({'edge_label_num': edge_label_num})
attrs.update({'max_edge_num': get_max_edge_num(all_edge_num)})


if 'is_directed' in attr_names:
attrs.update({'is_directed': is_directed(Gn)})
if 'min_edge_num' in attr_names:


if 'ave_node_degree' in attr_names:
attrs.update({'ave_node_degree': get_ave_node_degree(Gn)})
attrs.update({'min_edge_num': get_min_edge_num(all_edge_num)})


if 'max_node_degree' in attr_names:
attrs.update({'max_node_degree': get_max_node_degree(Gn)})
# label number
if any(i in attr_names for i in ['node_labeled', 'node_label_num']):
is_nl = is_node_labeled(Gn)
node_label_num = get_node_label_num(Gn)


if 'min_node_degree' in attr_names:
attrs.update({'min_node_degree': get_min_node_degree(Gn)})
if 'ave_fill_factor' in attr_names:
attrs.update({'ave_fill_factor': get_ave_fill_factor(Gn)})
if 'node_labeled' in attr_names:
# graphs are considered node unlabeled if all nodes have the same label.
attrs.update({'node_labeled': is_nl if node_label_num > 1 else False})


if 'max_fill_factor' in attr_names:
attrs.update({'max_fill_factor': get_max_fill_factor(Gn)})
if 'node_label_num' in attr_names:
attrs.update({'node_label_num': node_label_num})


if 'min_fill_factor' in attr_names:
attrs.update({'min_fill_factor': get_min_fill_factor(Gn)})
if any(i in attr_names for i in ['edge_labeled', 'edge_label_num']):
is_el = is_edge_labeled(Gn)
edge_label_num = get_edge_label_num(Gn)


if 'substructures' in attr_names:
attrs.update({'substructures': get_substructures(Gn)})
if 'edge_labeled' in attr_names:
# graphs are considered edge unlabeled if all edges have the same label.
attrs.update({'edge_labeled': is_el if edge_label_num > 1 else False})


if 'class_number' in attr_names:
attrs.update({'class_number': get_class_num(target)})
if 'edge_label_num' in attr_names:
attrs.update({'edge_label_num': edge_label_num})


if 'node_attr_dim' in attr_names:
attrs['node_attr_dim'] = get_node_attr_dim(Gn)
if 'is_directed' in attr_names:
attrs.update({'is_directed': is_directed(Gn)})


if 'edge_attr_dim' in attr_names:
attrs['edge_attr_dim'] = get_edge_attr_dim(Gn)
if 'ave_node_degree' in attr_names:
attrs.update({'ave_node_degree': get_ave_node_degree(Gn)})


from collections import OrderedDict
return OrderedDict(
sorted(attrs.items(), key=lambda i: attr_names.index(i[0])))
if 'max_node_degree' in attr_names:
attrs.update({'max_node_degree': get_max_node_degree(Gn)})

if 'min_node_degree' in attr_names:
attrs.update({'min_node_degree': get_min_node_degree(Gn)})

if 'ave_fill_factor' in attr_names:
attrs.update({'ave_fill_factor': get_ave_fill_factor(Gn)})

if 'max_fill_factor' in attr_names:
attrs.update({'max_fill_factor': get_max_fill_factor(Gn)})

if 'min_fill_factor' in attr_names:
attrs.update({'min_fill_factor': get_min_fill_factor(Gn)})

if 'substructures' in attr_names:
attrs.update({'substructures': get_substructures(Gn)})

if 'class_number' in attr_names:
attrs.update({'class_number': get_class_num(target)})

if 'node_attr_dim' in attr_names:
attrs['node_attr_dim'] = get_node_attr_dim(Gn)

if 'edge_attr_dim' in attr_names:
attrs['edge_attr_dim'] = get_edge_attr_dim(Gn)

from collections import OrderedDict
return OrderedDict(
sorted(attrs.items(), key=lambda i: attr_names.index(i[0])))




def load_predefined_dataset(ds_name): def load_predefined_dataset(ds_name):
import os import os
from gklearn.utils.graphfiles import loadDataset from gklearn.utils.graphfiles import loadDataset
current_path = os.path.dirname(os.path.realpath(__file__)) + '/' current_path = os.path.dirname(os.path.realpath(__file__)) + '/'
if ds_name == 'Acyclic': if ds_name == 'Acyclic':
ds_file = current_path + '../../datasets/Acyclic/dataset_bps.ds' ds_file = current_path + '../../datasets/Acyclic/dataset_bps.ds'
@@ -415,5 +415,5 @@ def load_predefined_dataset(ds_name):
pass pass
else: else:
raise Exception('The dataset name "', ds_name, '" is not pre-defined.') raise Exception('The dataset name "', ds_name, '" is not pre-defined.')
return graphs, targets return graphs, targets

+ 4
- 4
gklearn/utils/kernels.py View File

@@ -18,8 +18,8 @@ def deltakernel(x, y):


References References
---------- ----------
[1] H. Kashima, K. Tsuda, and A. Inokuchi. Marginalized kernels between
labeled graphs. In Proceedings of the 20th International Conference on
[1] H. Kashima, K. Tsuda, and A. Inokuchi. Marginalized kernels between
labeled graphs. In Proceedings of the 20th International Conference on
Machine Learning, Washington, DC, United States, 2003. Machine Learning, Washington, DC, United States, 2003.
""" """
return x == y #(1 if condition else 0) return x == y #(1 if condition else 0)
@@ -68,7 +68,7 @@ def polynomialkernel(x, y, d=1, c=0):
x, y : array x, y : array


d : integer, default 1 d : integer, default 1
c : float, default 0 c : float, default 0


Returns Returns
@@ -89,7 +89,7 @@ def linearkernel(x, y):
x, y : array x, y : array


d : integer, default 1 d : integer, default 1
c : float, default 0 c : float, default 0


Returns Returns


Loading…
Cancel
Save