diff --git a/gklearn/dataset/__init__.py b/gklearn/dataset/__init__.py index f92bd74..8671181 100644 --- a/gklearn/dataset/__init__.py +++ b/gklearn/dataset/__init__.py @@ -16,4 +16,6 @@ __date__ = "October 2020" from gklearn.dataset.metadata import DATABASES, DATASET_META from gklearn.dataset.metadata import GREYC_META, IAM_META, TUDataset_META from gklearn.dataset.metadata import list_of_databases, list_of_datasets -from gklearn.dataset.data_fetcher import DataFetcher \ No newline at end of file +from gklearn.dataset.data_fetcher import DataFetcher +from gklearn.dataset.graph_synthesizer import GraphSynthesizer +from gklearn.dataset.dataset import Dataset, split_dataset_by_target \ No newline at end of file diff --git a/gklearn/dataset/graph_synthesizer.py b/gklearn/dataset/graph_synthesizer.py new file mode 100644 index 0000000..73c5e6e --- /dev/null +++ b/gklearn/dataset/graph_synthesizer.py @@ -0,0 +1,61 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +""" +Created on Fri Sep 11 18:10:06 2020 + +@author: ljia +""" +import numpy as np +import networkx as nx +import random + + +class GraphSynthesizer(object): + + + def __init__(self, g_type=None, *args, **kwargs): + if g_type == 'unified': + self._graphs = self.unified_graphs(*args, *kwargs) + else: + self._graphs = None + + + def random_graph(self, num_nodes, num_edges, num_node_labels=0, num_edge_labels=0, seed=None, directed=False, max_num_edges=None, all_edges=None): + g = nx.Graph() + if num_node_labels > 0: + node_labels = np.random.randint(0, high=num_node_labels, size=num_nodes) + for i in range(0, num_nodes): + g.add_node(str(i), atom=node_labels[i]) # @todo: update "atom". + else: + for i in range(0, num_nodes): + g.add_node(str(i)) + + if num_edge_labels > 0: + edge_labels = np.random.randint(0, high=num_edge_labels, size=num_edges) + for idx, i in enumerate(random.sample(range(0, max_num_edges), num_edges)): + node1, node2 = all_edges[i] + g.add_edge(str(node1), str(node2), bond_type=edge_labels[idx]) # @todo: update "bond_type". + else: + for i in random.sample(range(0, max_num_edges), num_edges): + node1, node2 = all_edges[i] + g.add_edge(str(node1), str(node2)) + + return g + + + def unified_graphs(self, num_graphs=1000, num_nodes=20, num_edges=40, num_node_labels=0, num_edge_labels=0, seed=None, directed=False): + max_num_edges = int((num_nodes - 1) * num_nodes / 2) + if num_edges > max_num_edges: + raise Exception('Too many edges.') + all_edges = [(i, j) for i in range(0, num_nodes) for j in range(i + 1, num_nodes)] # @todo: optimize. No directed graphs. + + graphs = [] + for idx in range(0, num_graphs): + graphs.append(self.random_graph(num_nodes, num_edges, num_node_labels=num_node_labels, num_edge_labels=num_edge_labels, seed=seed, directed=directed, max_num_edges=max_num_edges, all_edges=all_edges)) + + return graphs + + + @property + def graphs(self): + return self._graphs \ No newline at end of file diff --git a/gklearn/utils/graph_synthesizer.py b/gklearn/utils/graph_synthesizer.py index 2c5f650..7e83225 100644 --- a/gklearn/utils/graph_synthesizer.py +++ b/gklearn/utils/graph_synthesizer.py @@ -13,6 +13,11 @@ import random class GraphSynthesizer(object): + import warnings + warnings.simplefilter('always', DeprecationWarning) + warnings.warn('This class has been moved to "gklearn.dataset" module. The class "gklearn.utils.graph_synthesizer.GraphSynthesizer" has not been maintained since Nov 12th, 2020 (version 0.2.1) and will be removed since version 0.2.2.', DeprecationWarning) + + def __init__(self): pass