|
|
@@ -13,6 +13,7 @@ import os |
|
|
|
|
|
|
|
class Dataset(object): |
|
|
|
|
|
|
|
|
|
|
|
def __init__(self, filename=None, filename_targets=None, **kwargs): |
|
|
|
if filename is None: |
|
|
|
self.__graphs = None |
|
|
@@ -180,13 +181,13 @@ class Dataset(object): |
|
|
|
# return 0 |
|
|
|
|
|
|
|
|
|
|
|
def get_dataset_infos(self, keys=None): |
|
|
|
def get_dataset_infos(self, keys=None, params=None): |
|
|
|
"""Computes and returns the structure and property information of the graph dataset. |
|
|
|
|
|
|
|
Parameters |
|
|
|
---------- |
|
|
|
keys : list |
|
|
|
List of strings which indicate which informations will be returned. The |
|
|
|
keys : list, optional |
|
|
|
A list of strings which indicate which informations will be returned. The |
|
|
|
possible choices includes: |
|
|
|
|
|
|
|
'substructures': sub-structures graphs contains, including 'linear', 'non |
|
|
@@ -241,7 +242,15 @@ class Dataset(object): |
|
|
|
|
|
|
|
'class_number': number of classes. Only available for classification problems. |
|
|
|
|
|
|
|
'all_degree_entropy': the entropy of degree distribution of each graph. |
|
|
|
|
|
|
|
'ave_degree_entropy': the average entropy of degree distribution of all graphs. |
|
|
|
|
|
|
|
All informations above will be returned if `keys` is not given. |
|
|
|
|
|
|
|
params: dict of dict, optional |
|
|
|
A dictinary which contains extra parameters for each possible |
|
|
|
element in ``keys``. |
|
|
|
|
|
|
|
Return |
|
|
|
------ |
|
|
@@ -276,6 +285,8 @@ class Dataset(object): |
|
|
|
'node_attr_dim', |
|
|
|
'edge_attr_dim', |
|
|
|
'class_number', |
|
|
|
'all_degree_entropy', |
|
|
|
'ave_degree_entropy' |
|
|
|
] |
|
|
|
|
|
|
|
# dataset size |
|
|
@@ -420,6 +431,22 @@ class Dataset(object): |
|
|
|
self.__edge_attr_dim = self.__get_edge_attr_dim() |
|
|
|
infos['edge_attr_dim'] = self.__edge_attr_dim |
|
|
|
|
|
|
|
# entropy of degree distribution. |
|
|
|
|
|
|
|
if 'all_degree_entropy' in keys: |
|
|
|
if params is not None and ('all_degree_entropy' in params) and ('base' in params['all_degree_entropy']): |
|
|
|
base = params['all_degree_entropy']['base'] |
|
|
|
else: |
|
|
|
base = None |
|
|
|
infos['all_degree_entropy'] = self.__compute_all_degree_entropy(base=base) |
|
|
|
|
|
|
|
if 'ave_degree_entropy' in keys: |
|
|
|
if params is not None and ('ave_degree_entropy' in params) and ('base' in params['ave_degree_entropy']): |
|
|
|
base = params['ave_degree_entropy']['base'] |
|
|
|
else: |
|
|
|
base = None |
|
|
|
infos['ave_degree_entropy'] = np.mean(self.__compute_all_degree_entropy(base=base)) |
|
|
|
|
|
|
|
return infos |
|
|
|
|
|
|
|
|
|
|
@@ -653,8 +680,7 @@ class Dataset(object): |
|
|
|
|
|
|
|
|
|
|
|
def __get_all_fill_factors(self): |
|
|
|
""" |
|
|
|
Get fill factor, the number of non-zero entries in the adjacency matrix. |
|
|
|
"""Get fill factor, the number of non-zero entries in the adjacency matrix. |
|
|
|
|
|
|
|
Returns |
|
|
|
------- |
|
|
@@ -721,7 +747,30 @@ class Dataset(object): |
|
|
|
|
|
|
|
def __get_edge_attr_dim(self): |
|
|
|
return len(self.__edge_attrs) |
|
|
|
|
|
|
|
|
|
|
|
def __compute_all_degree_entropy(self, base=None): |
|
|
|
"""Compute the entropy of degree distribution of each graph. |
|
|
|
|
|
|
|
Parameters |
|
|
|
---------- |
|
|
|
base : float, optional |
|
|
|
The logarithmic base to use. The default is ``e`` (natural logarithm). |
|
|
|
|
|
|
|
Returns |
|
|
|
------- |
|
|
|
degree_entropy : float |
|
|
|
The calculated entropy. |
|
|
|
""" |
|
|
|
from gklearn.utils.stats import entropy |
|
|
|
|
|
|
|
degree_entropy = [] |
|
|
|
for g in self.__graphs: |
|
|
|
degrees = list(dict(g.degree()).values()) |
|
|
|
en = entropy(degrees, base=base) |
|
|
|
degree_entropy.append(en) |
|
|
|
return degree_entropy |
|
|
|
|
|
|
|
|
|
|
|
@property |
|
|
|
def graphs(self): |
|
|
|