From 3da2fa9f4718e540fe6a0fd009ab4ec4cbe92a73 Mon Sep 17 00:00:00 2001 From: linlin Date: Tue, 6 Oct 2020 17:31:01 +0200 Subject: [PATCH] New translations dataset.py (French) --- lang/fr/gklearn/utils/dataset.py | 59 ++++++++++++++++++++++++++++++++++++---- 1 file changed, 54 insertions(+), 5 deletions(-) diff --git a/lang/fr/gklearn/utils/dataset.py b/lang/fr/gklearn/utils/dataset.py index 7201a0d..3d68212 100644 --- a/lang/fr/gklearn/utils/dataset.py +++ b/lang/fr/gklearn/utils/dataset.py @@ -13,6 +13,7 @@ import os class Dataset(object): + def __init__(self, filename=None, filename_targets=None, **kwargs): if filename is None: self.__graphs = None @@ -180,13 +181,13 @@ class Dataset(object): # return 0 - def get_dataset_infos(self, keys=None): + def get_dataset_infos(self, keys=None, params=None): """Computes and returns the structure and property information of the graph dataset. Parameters ---------- - keys : list - List of strings which indicate which informations will be returned. The + keys : list, optional + A list of strings which indicate which informations will be returned. The possible choices includes: 'substructures': sub-structures graphs contains, including 'linear', 'non @@ -241,7 +242,15 @@ class Dataset(object): 'class_number': number of classes. Only available for classification problems. + 'all_degree_entropy': the entropy of degree distribution of each graph. + + 'ave_degree_entropy': the average entropy of degree distribution of all graphs. + All informations above will be returned if `keys` is not given. + + params: dict of dict, optional + A dictinary which contains extra parameters for each possible + element in ``keys``. Return ------ @@ -276,6 +285,8 @@ class Dataset(object): 'node_attr_dim', 'edge_attr_dim', 'class_number', + 'all_degree_entropy', + 'ave_degree_entropy' ] # dataset size @@ -420,6 +431,22 @@ class Dataset(object): self.__edge_attr_dim = self.__get_edge_attr_dim() infos['edge_attr_dim'] = self.__edge_attr_dim + # entropy of degree distribution. + + if 'all_degree_entropy' in keys: + if params is not None and ('all_degree_entropy' in params) and ('base' in params['all_degree_entropy']): + base = params['all_degree_entropy']['base'] + else: + base = None + infos['all_degree_entropy'] = self.__compute_all_degree_entropy(base=base) + + if 'ave_degree_entropy' in keys: + if params is not None and ('ave_degree_entropy' in params) and ('base' in params['ave_degree_entropy']): + base = params['ave_degree_entropy']['base'] + else: + base = None + infos['ave_degree_entropy'] = np.mean(self.__compute_all_degree_entropy(base=base)) + return infos @@ -653,8 +680,7 @@ class Dataset(object): def __get_all_fill_factors(self): - """ - Get fill factor, the number of non-zero entries in the adjacency matrix. + """Get fill factor, the number of non-zero entries in the adjacency matrix. Returns ------- @@ -721,7 +747,30 @@ class Dataset(object): def __get_edge_attr_dim(self): return len(self.__edge_attrs) + + def __compute_all_degree_entropy(self, base=None): + """Compute the entropy of degree distribution of each graph. + + Parameters + ---------- + base : float, optional + The logarithmic base to use. The default is ``e`` (natural logarithm). + + Returns + ------- + degree_entropy : float + The calculated entropy. + """ + from gklearn.utils.stats import entropy + + degree_entropy = [] + for g in self.__graphs: + degrees = list(dict(g.degree()).values()) + en = entropy(degrees, base=base) + degree_entropy.append(en) + return degree_entropy + @property def graphs(self):