From 052356d39ab0a2dfc4da0433ee5e8e2c8ca1d2b6 Mon Sep 17 00:00:00 2001 From: jajupmochi Date: Fri, 6 Nov 2020 18:19:06 +0100 Subject: [PATCH] Add datasets metadata. --- gklearn/dataset/__init__.py | 19 + gklearn/dataset/metadata.py | 2485 +++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 2504 insertions(+) create mode 100644 gklearn/dataset/__init__.py create mode 100644 gklearn/dataset/metadata.py diff --git a/gklearn/dataset/__init__.py b/gklearn/dataset/__init__.py new file mode 100644 index 0000000..f92bd74 --- /dev/null +++ b/gklearn/dataset/__init__.py @@ -0,0 +1,19 @@ +# -*-coding:utf-8 -*- +"""gklearn - datasets module + +Implement some methods to manage graph datasets + graph_fetcher.py : fetch graph datasets from the Internet. + + +""" + +# info +__version__ = "0.2" +__author__ = "Linlin Jia" +__date__ = "October 2020" + + +from gklearn.dataset.metadata import DATABASES, DATASET_META +from gklearn.dataset.metadata import GREYC_META, IAM_META, TUDataset_META +from gklearn.dataset.metadata import list_of_databases, list_of_datasets +from gklearn.dataset.data_fetcher import DataFetcher \ No newline at end of file diff --git a/gklearn/dataset/metadata.py b/gklearn/dataset/metadata.py new file mode 100644 index 0000000..4fa48d9 --- /dev/null +++ b/gklearn/dataset/metadata.py @@ -0,0 +1,2485 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +""" +Created on Tue Oct 20 14:25:49 2020 + +@author: ljia +""" + +DATABASES = { + 'greyc': 'https://brunl01.users.greyc.fr/CHEMISTRY/', + 'iam': 'https://iapr-tc15.greyc.fr/IAM/', + 'tudataset': 'http://graphlearning.io/docs/datasets/', + } + + +### -------- database greyc -------- ### +GREYC_META = { + 'ACE': { + 'database': 'greyc', + 'reference': None, + 'dataset_size': 32, + 'class_number': 2, + 'task_type': 'classification', + 'ave_node_num': 52, + 'ave_edge_num': None, + 'node_labeled': None, + 'edge_labeled': None, + 'node_attr_dim': None, + 'geometry': None, + 'edge_attr_dim': None, + 'url': 'https://brunl01.users.greyc.fr/CHEMISTRY/ACEDataset.tar', + 'domain': 'small molecules', + 'train_valid_test': [], + 'stereoisomerism': True, + 'load_files': [], + }, + 'Acyclic': { + 'database': 'greyc', + 'reference': None, + 'dataset_size': 183, + 'class_number': None, + 'task_type': 'regression', + 'ave_node_num': 8.15, + 'ave_edge_num': 7.15, + 'node_labeled': True, + 'edge_labeled': False, + 'node_attr_dim': 0, + 'geometry': None, + 'edge_attr_dim': 0, + 'url': 'https://brunl01.users.greyc.fr/CHEMISTRY/Acyclic.tar.gz', + 'domain': 'small molecules', + 'train_valid_test': None, + 'stereoisomerism': False, + 'load_files': ['dataset_bps.ds'], + }, + 'AIDS_greyc': { + 'database': 'greyc', + 'reference': None, + 'dataset_size': 2000, + 'class_number': 2, + 'task_type': 'classification', + 'ave_node_num': 15.69, + 'ave_edge_num': 16.20, + 'node_labeled': True, + 'edge_labeled': True, + 'node_attr_dim': None, + 'geometry': None, + 'edge_attr_dim': None, + 'url': 'https://iapr-tc15.greyc.fr/IAM/AIDS.zip', + 'domain': 'small molecules', + 'train_valid_test': ['data/train.cxl', 'data/valid.cxl', 'data/test.cxl',], + 'stereoisomerism': False, + 'load_files': ['data/'], + }, + 'Alkane': { + 'database': 'greyc', + 'reference': None, + 'dataset_size': 150, + 'class_number': None, + 'task_type': 'regression', + 'ave_node_num': 8.87, + 'ave_edge_num': 7.87, + 'node_labeled': False, + 'edge_labeled': False, + 'node_attr_dim': 0, + 'geometry': None, + 'edge_attr_dim': 0, + 'url': 'https://brunl01.users.greyc.fr/CHEMISTRY/alkane_dataset.tar.gz', + 'domain': 'small molecules', + 'train_valid_test': None, + 'stereoisomerism': False, + 'load_files': ['dataset.ds', 'dataset_boiling_point_names.txt'], + }, + 'Chiral': { + 'database': 'greyc', + 'reference': None, + 'dataset_size': 35, + 'class_number': None, + 'task_type': 'regression', + 'ave_node_num': 21.29, + 'ave_edge_num': None, + 'node_labeled': None, + 'edge_labeled': None, + 'node_attr_dim': None, + 'geometry': None, + 'edge_attr_dim': None, + 'url': 'https://brunl01.users.greyc.fr/CHEMISTRY/DatasetAcyclicChiral.tar', + 'domain': 'small molecules', + 'train_valid_test': [], + 'stereoisomerism': True, + 'load_files': [], + }, + 'MAO': { + 'database': 'greyc', + 'reference': None, + 'dataset_size': 68, + 'class_number': 2, + 'task_type': 'classification', + 'ave_node_num': 18.38, + 'ave_edge_num': 19.63, + 'node_labeled': True, + 'edge_labeled': True, + 'node_attr_dim': 0, + 'geometry': None, + 'edge_attr_dim': 0, + 'url': 'https://brunl01.users.greyc.fr/CHEMISTRY/mao.tgz', + 'domain': 'small molecules', + 'train_valid_test': None, + 'stereoisomerism': False, + 'load_files': ['dataset.ds'], + }, + 'Monoterpenoides': { + 'database': 'greyc', + 'reference': None, + 'dataset_size': 382, + 'class_number': 10, + 'task_type': 'classification', + 'ave_node_num': 10, + 'ave_edge_num': None, + 'node_labeled': None, + 'edge_labeled': None, + 'node_attr_dim': None, + 'geometry': None, + 'edge_attr_dim': None, + 'url': 'https://brunl01.users.greyc.fr/CHEMISTRY/monoterpenoides.tar.gz', + 'domain': 'small molecules', + 'train_valid_test': None, + 'stereoisomerism': False, + 'load_files': ['dataset_10+.ds'], + }, + 'PAH': { + 'database': 'greyc', + 'reference': None, + 'dataset_size': 94, + 'class_number': 2, + 'task_type': 'classification', + 'ave_node_num': 20.7, + 'ave_edge_num': 24.43, + 'node_labeled': False, + 'edge_labeled': False, + 'node_attr_dim': 0, + 'geometry': None, + 'edge_attr_dim': 0, + 'url': 'https://brunl01.users.greyc.fr/CHEMISTRY/PAH.tar.gz', + 'domain': 'small molecules', + 'train_valid_test': ['trainset_0.ds', None, 'testset_0.ds'], + 'stereoisomerism': False, + 'load_files': [], + }, + 'PTC': { + 'database': 'greyc', + 'reference': None, + 'dataset_size': 416, + 'class_number': None, + 'task_type': 'classification', + 'ave_node_num': 14.4, + 'ave_edge_num': None, + 'node_labeled': None, + 'edge_labeled': None, + 'node_attr_dim': None, + 'geometry': None, + 'edge_attr_dim': None, + 'url': 'https://brunl01.users.greyc.fr/CHEMISTRY/ptc.tgz', + 'domain': 'small molecules', + 'train_valid_test': None, + 'stereoisomerism': False, + 'load_files': [], + 'extra_info': 'This dataset has test and train datasets. Select gender between mm, fm, mr, fr. \ndataloader = DataLoader(\'Ptc\',root = ..., option = \'mm\') \ntest,train = dataloader.dataset \nGs_test, y_test = test \nGs_train_, y_train = train', + }, + 'Steroid': { + 'database': 'greyc', + 'reference': None, + 'dataset_size': 64, + 'class_number': None, + 'task_type': 'regression', + 'ave_node_num': 75.11, + 'ave_edge_num': None, + 'node_labeled': None, + 'edge_labeled': None, + 'node_attr_dim': None, + 'geometry': None, + 'edge_attr_dim': None, + 'url': 'https://brunl01.users.greyc.fr/CHEMISTRY/SteroidDataset.tar', + 'domain': 'small molecules', + 'train_valid_test': ['trainset_0.ds', None, 'testset_0.ds'], + 'stereoisomerism': False, + 'load_files': [], + }, + 'Vitamin_D': { + 'database': 'greyc', + 'reference': None, + 'dataset_size': 69, + 'class_number': None, + 'task_type': 'regression', + 'ave_node_num': 76.91, + 'ave_edge_num': None, + 'node_labeled': None, + 'edge_labeled': None, + 'node_attr_dim': None, + 'geometry': None, + 'edge_attr_dim': None, + 'url': 'https://brunl01.users.greyc.fr/CHEMISTRY/DatasetVitamin.tar', + 'domain': 'small molecules', + 'train_valid_test': [], + 'stereoisomerism': True, + 'load_files': [], + }, +} + + +### -------- database iam -------- ### +# @todo: several datasets in this database are included in TUDataset. However they do not include train/valid/test sets. +IAM_META = { + 'GREC': { + 'database': 'iam', + 'reference': None, + 'dataset_size': None, + 'class_number': None, + 'task_type': None, + 'ave_node_num': None, + 'ave_edge_num': None, + 'node_labeled': None, + 'edge_labeled': None, + 'node_attr_dim': None, + 'geometry': None, + 'edge_attr_dim': None, + 'url': 'https://iapr-tc15.greyc.fr/IAM/GREC.zip', + 'domain': None, + 'train_valid_test': ['data/test.cxl','data/train.cxl', 'data/valid.cxl'], + 'load_files': [], + }, + 'Web': { + 'database': 'iam', + 'reference': None, + 'dataset_size': None, + 'class_number': None, + 'task_type': None, + 'ave_node_num': None, + 'ave_edge_num': None, + 'node_labeled': None, + 'edge_labeled': None, + 'node_attr_dim': None, + 'geometry': None, + 'edge_attr_dim': None, + 'url': 'https://iapr-tc15.greyc.fr/IAM/Web.zip', + 'domain': None, + 'train_valid_test': ['data/test.cxl', 'data/train.cxl', 'data/valid.cxl'], + 'load_files': [], + }, +} + + +### -------- database tudataset -------- ### +TUDataset_META = { + ### small molecules + 'AIDS': { + 'database': 'tudataset', + 'reference': '[16,17]', + 'dataset_size': 2000, + 'class_number': 2, + 'task_type': 'classification', + 'ave_node_num': 15.69, + 'ave_edge_num': 16.2, + 'node_labeled': True, + 'edge_labeled': True, + 'node_attr_dim': 4, + 'geometry': None, + 'edge_attr_dim': 0, + 'url': 'https://www.chrsmrrs.com/graphkerneldatasets/AIDS.zip', + 'domain': 'small molecules', + }, + 'alchemy_full': { + 'database': 'tudataset', + 'reference': '[29]', + 'dataset_size': 202579, + 'class_number': None, + 'task_type': 'regression', + 'ave_node_num': 10.1, + 'ave_edge_num': 10.44, + 'node_labeled': True, + 'edge_labeled': True, + 'node_attr_dim': 3, + 'geometry': '3D, RI', + 'edge_attr_dim': 0, + 'url': 'https://www.chrsmrrs.com/graphkerneldatasets/alchemy_full.zip', + 'domain': 'small molecules', + }, + 'aspirin': { + 'database': 'tudataset', + 'reference': '[36]', + 'dataset_size': 111763, + 'class_number': None, + 'task_type': 'regression', + 'ave_node_num': 21.0, + 'ave_edge_num': 151.52, + 'node_labeled': True, + 'edge_labeled': False, + 'node_attr_dim': 6, + 'geometry': '3D, RI', + 'edge_attr_dim': 0, + 'url': 'https://www.chrsmrrs.com/graphkerneldatasets/aspirin.zip', + 'domain': 'small molecules', + }, + 'benzene': { + 'database': 'tudataset', + 'reference': '[36]', + 'dataset_size': 527984, + 'class_number': None, + 'task_type': 'regression', + 'ave_node_num': 12.0, + 'ave_edge_num': 64.94, + 'node_labeled': True, + 'edge_labeled': False, + 'node_attr_dim': 6, + 'geometry': '3D, RI', + 'edge_attr_dim': 0, + 'url': 'https://www.chrsmrrs.com/graphkerneldatasets/benzene.zip', + 'domain': 'small molecules', + }, + 'BZR': { + 'database': 'tudataset', + 'reference': '[7]', + 'dataset_size': 405, + 'class_number': 2, + 'task_type': 'classification', + 'ave_node_num': 35.75, + 'ave_edge_num': 38.36, + 'node_labeled': True, + 'edge_labeled': False, + 'node_attr_dim': 3, + 'geometry': '3D, RI', + 'edge_attr_dim': 0, + 'url': 'https://www.chrsmrrs.com/graphkerneldatasets/BZR.zip', + 'domain': 'small molecules', + }, + 'BZR_MD': { + 'database': 'tudataset', + 'reference': '[7,23]', + 'dataset_size': 306, + 'class_number': 2, + 'task_type': 'classification', + 'ave_node_num': 21.3, + 'ave_edge_num': 225.06, + 'node_labeled': True, + 'edge_labeled': True, + 'node_attr_dim': 0, + 'geometry': None, + 'edge_attr_dim': 1, + 'url': 'https://www.chrsmrrs.com/graphkerneldatasets/BZR_MD.zip', + 'domain': 'small molecules', + }, + 'COX2': { + 'database': 'tudataset', + 'reference': '[7]', + 'dataset_size': 467, + 'class_number': 2, + 'task_type': 'classification', + 'ave_node_num': 41.22, + 'ave_edge_num': 43.45, + 'node_labeled': True, + 'edge_labeled': False, + 'node_attr_dim': 3, + 'geometry': '3D, RI', + 'edge_attr_dim': 0, + 'url': 'https://www.chrsmrrs.com/graphkerneldatasets/COX2.zip', + 'domain': 'small molecules', + }, + 'COX2_MD': { + 'database': 'tudataset', + 'reference': '[7,23]', + 'dataset_size': 303, + 'class_number': 2, + 'task_type': 'classification', + 'ave_node_num': 26.28, + 'ave_edge_num': 335.12, + 'node_labeled': True, + 'edge_labeled': True, + 'node_attr_dim': 0, + 'geometry': None, + 'edge_attr_dim': 1, + 'url': 'https://www.chrsmrrs.com/graphkerneldatasets/COX2_MD.zip', + 'domain': 'small molecules', + }, + 'DHFR': { + 'database': 'tudataset', + 'reference': '[7]', + 'dataset_size': 467, + 'class_number': 2, + 'task_type': 'classification', + 'ave_node_num': 42.43, + 'ave_edge_num': 44.54, + 'node_labeled': True, + 'edge_labeled': False, + 'node_attr_dim': 3, + 'geometry': '3D, RI', + 'edge_attr_dim': 0, + 'url': 'https://www.chrsmrrs.com/graphkerneldatasets/DHFR.zip', + 'domain': 'small molecules', + }, + 'DHFR_MD': { + 'database': 'tudataset', + 'reference': '[7,23]', + 'dataset_size': 393, + 'class_number': 2, + 'task_type': 'classification', + 'ave_node_num': 23.87, + 'ave_edge_num': 283.01, + 'node_labeled': True, + 'edge_labeled': True, + 'node_attr_dim': 0, + 'geometry': None, + 'edge_attr_dim': 1, + 'url': 'https://www.chrsmrrs.com/graphkerneldatasets/DHFR_MD.zip', + 'domain': 'small molecules', + }, + 'ER_MD': { + 'database': 'tudataset', + 'reference': '[7,23]', + 'dataset_size': 446, + 'class_number': 2, + 'task_type': 'classification', + 'ave_node_num': 21.33, + 'ave_edge_num': 234.85, + 'node_labeled': True, + 'edge_labeled': True, + 'node_attr_dim': 0, + 'geometry': None, + 'edge_attr_dim': 1, + 'url': 'https://www.chrsmrrs.com/graphkerneldatasets/ER_MD.zip', + 'domain': 'small molecules', + }, + 'ethanol': { + 'database': 'tudataset', + 'reference': '[36]', + 'dataset_size': 455093, + 'class_number': None, + 'task_type': 'regression', + 'ave_node_num': 9.0, + 'ave_edge_num': 36.0, + 'node_labeled': True, + 'edge_labeled': False, + 'node_attr_dim': 6, + 'geometry': '3D, RI', + 'edge_attr_dim': 0, + 'url': 'https://www.chrsmrrs.com/graphkerneldatasets/ethanol.zip', + 'domain': 'small molecules', + }, + 'FRANKENSTEIN': { + 'database': 'tudataset', + 'reference': '[15]', + 'dataset_size': 4337, + 'class_number': 2, + 'task_type': 'classification', + 'ave_node_num': 16.9, + 'ave_edge_num': 17.88, + 'node_labeled': False, + 'edge_labeled': False, + 'node_attr_dim': 780, + 'geometry': None, + 'edge_attr_dim': 0, + 'url': 'https://www.chrsmrrs.com/graphkerneldatasets/FRANKENSTEIN.zip', + 'domain': 'small molecules', + }, + 'malonaldehyde': { + 'database': 'tudataset', + 'reference': '[36]', + 'dataset_size': 893238, + 'class_number': None, + 'task_type': 'regression', + 'ave_node_num': 9.0, + 'ave_edge_num': 36.0, + 'node_labeled': True, + 'edge_labeled': False, + 'node_attr_dim': 6, + 'geometry': '3D, RI', + 'edge_attr_dim': 0, + 'url': 'https://www.chrsmrrs.com/graphkerneldatasets/malonaldehyde.zip', + 'domain': 'small molecules', + }, + 'MCF-7': { + 'database': 'tudataset', + 'reference': '[28]', + 'dataset_size': 27770, + 'class_number': 2, + 'task_type': 'classification', + 'ave_node_num': 26.39, + 'ave_edge_num': 28.52, + 'node_labeled': True, + 'edge_labeled': True, + 'node_attr_dim': 0, + 'geometry': None, + 'edge_attr_dim': 0, + 'url': 'https://www.chrsmrrs.com/graphkerneldatasets/MCF-7.zip', + 'domain': 'small molecules', + }, + 'MCF-7H': { + 'database': 'tudataset', + 'reference': '[28]', + 'dataset_size': 27770, + 'class_number': 2, + 'task_type': 'classification', + 'ave_node_num': 47.3, + 'ave_edge_num': 49.43, + 'node_labeled': True, + 'edge_labeled': True, + 'node_attr_dim': 0, + 'geometry': None, + 'edge_attr_dim': 0, + 'url': 'https://www.chrsmrrs.com/graphkerneldatasets/MCF-7H.zip', + 'domain': 'small molecules', + }, + 'MOLT-4': { + 'database': 'tudataset', + 'reference': '[28]', + 'dataset_size': 39765, + 'class_number': 2, + 'task_type': 'classification', + 'ave_node_num': 26.09, + 'ave_edge_num': 28.13, + 'node_labeled': True, + 'edge_labeled': True, + 'node_attr_dim': 0, + 'geometry': None, + 'edge_attr_dim': 0, + 'url': 'https://www.chrsmrrs.com/graphkerneldatasets/MOLT-4.zip', + 'domain': 'small molecules', + }, + 'MOLT-4H': { + 'database': 'tudataset', + 'reference': '[28]', + 'dataset_size': 39765, + 'class_number': 2, + 'task_type': 'classification', + 'ave_node_num': 46.7, + 'ave_edge_num': 48.73, + 'node_labeled': True, + 'edge_labeled': True, + 'node_attr_dim': 0, + 'geometry': None, + 'edge_attr_dim': 0, + 'url': 'https://www.chrsmrrs.com/graphkerneldatasets/MOLT-4H.zip', + 'domain': 'small molecules', + }, + 'Mutagenicity': { + 'database': 'tudataset', + 'reference': '[16,20]', + 'dataset_size': 4337, + 'class_number': 2, + 'task_type': 'classification', + 'ave_node_num': 30.32, + 'ave_edge_num': 30.77, + 'node_labeled': True, + 'edge_labeled': True, + 'node_attr_dim': 0, + 'geometry': None, + 'edge_attr_dim': 0, + 'url': 'https://www.chrsmrrs.com/graphkerneldatasets/Mutagenicity.zip', + 'domain': 'small molecules', + }, + 'MUTAG': { + 'database': 'tudataset', + 'reference': '[1,23]', + 'dataset_size': 188, + 'class_number': 2, + 'task_type': 'classification', + 'ave_node_num': 17.93, + 'ave_edge_num': 19.79, + 'node_labeled': True, + 'edge_labeled': True, + 'node_attr_dim': 0, + 'geometry': None, + 'edge_attr_dim': 0, + 'url': 'https://www.chrsmrrs.com/graphkerneldatasets/MUTAG.zip', + 'domain': 'small molecules', + }, + 'naphthalene': { + 'database': 'tudataset', + 'reference': '[36]', + 'dataset_size': 226256, + 'class_number': None, + 'task_type': 'regression', + 'ave_node_num': 18.0, + 'ave_edge_num': 127.37, + 'node_labeled': True, + 'edge_labeled': False, + 'node_attr_dim': 6, + 'geometry': '3D, RI', + 'edge_attr_dim': 0, + 'url': 'https://www.chrsmrrs.com/graphkerneldatasets/naphthalene.zip', + 'domain': 'small molecules', + }, + 'NCI1': { + 'database': 'tudataset', + 'reference': '[8,9,22]', + 'dataset_size': 4110, + 'class_number': 2, + 'task_type': 'classification', + 'ave_node_num': 29.87, + 'ave_edge_num': 32.3, + 'node_labeled': True, + 'edge_labeled': False, + 'node_attr_dim': 0, + 'geometry': None, + 'edge_attr_dim': 0, + 'url': 'https://www.chrsmrrs.com/graphkerneldatasets/NCI1.zip', + 'domain': 'small molecules', + }, + 'NCI109': { + 'database': 'tudataset', + 'reference': '[8,9,22]', + 'dataset_size': 4127, + 'class_number': 2, + 'task_type': 'classification', + 'ave_node_num': 29.68, + 'ave_edge_num': 32.13, + 'node_labeled': True, + 'edge_labeled': False, + 'node_attr_dim': 0, + 'geometry': None, + 'edge_attr_dim': 0, + 'url': 'https://www.chrsmrrs.com/graphkerneldatasets/NCI109.zip', + 'domain': 'small molecules', + }, + 'NCI-H23': { + 'database': 'tudataset', + 'reference': '[28]', + 'dataset_size': 40353, + 'class_number': 2, + 'task_type': 'classification', + 'ave_node_num': 26.07, + 'ave_edge_num': 28.1, + 'node_labeled': True, + 'edge_labeled': True, + 'node_attr_dim': 0, + 'geometry': None, + 'edge_attr_dim': 0, + 'url': 'https://www.chrsmrrs.com/graphkerneldatasets/NCI-H23.zip-H23', + 'domain': 'small molecules', + }, + 'NCI-H23H': { + 'database': 'tudataset', + 'reference': '[28]', + 'dataset_size': 40353, + 'class_number': 2, + 'task_type': 'classification', + 'ave_node_num': 46.67, + 'ave_edge_num': 48.69, + 'node_labeled': True, + 'edge_labeled': True, + 'node_attr_dim': 0, + 'geometry': None, + 'edge_attr_dim': 0, + 'url': 'https://www.chrsmrrs.com/graphkerneldatasets/NCI-H23H.zip-H23H', + 'domain': 'small molecules', + }, + 'OVCAR-8': { + 'database': 'tudataset', + 'reference': '[28]', + 'dataset_size': 40516, + 'class_number': 2, + 'task_type': 'classification', + 'ave_node_num': 26.07, + 'ave_edge_num': 28.1, + 'node_labeled': True, + 'edge_labeled': True, + 'node_attr_dim': 0, + 'geometry': None, + 'edge_attr_dim': 0, + 'url': 'https://www.chrsmrrs.com/graphkerneldatasets/OVCAR-8.zip-8', + 'domain': 'small molecules', + }, + 'OVCAR-8H': { + 'database': 'tudataset', + 'reference': '[28]', + 'dataset_size': 40516, + 'class_number': 2, + 'task_type': 'classification', + 'ave_node_num': 46.67, + 'ave_edge_num': 48.7, + 'node_labeled': True, + 'edge_labeled': True, + 'node_attr_dim': 0, + 'geometry': None, + 'edge_attr_dim': 0, + 'url': 'https://www.chrsmrrs.com/graphkerneldatasets/OVCAR-8H.zip-8H', + 'domain': 'small molecules', + }, + 'P388': { + 'database': 'tudataset', + 'reference': '[28]', + 'dataset_size': 41472, + 'class_number': 2, + 'task_type': 'classification', + 'ave_node_num': 22.11, + 'ave_edge_num': 23.55, + 'node_labeled': True, + 'edge_labeled': True, + 'node_attr_dim': 0, + 'geometry': None, + 'edge_attr_dim': 0, + 'url': 'https://www.chrsmrrs.com/graphkerneldatasets/P388.zip', + 'domain': 'small molecules', + }, + 'P388H': { + 'database': 'tudataset', + 'reference': '[28]', + 'dataset_size': 41472, + 'class_number': 2, + 'task_type': 'classification', + 'ave_node_num': 40.44, + 'ave_edge_num': 41.88, + 'node_labeled': True, + 'edge_labeled': True, + 'node_attr_dim': 0, + 'geometry': None, + 'edge_attr_dim': 0, + 'url': 'https://www.chrsmrrs.com/graphkerneldatasets/P388H.zip', + 'domain': 'small molecules', + }, + 'PC-3': { + 'database': 'tudataset', + 'reference': '[28]', + 'dataset_size': 27509, + 'class_number': 2, + 'task_type': 'classification', + 'ave_node_num': 26.35, + 'ave_edge_num': 28.49, + 'node_labeled': True, + 'edge_labeled': True, + 'node_attr_dim': 0, + 'geometry': None, + 'edge_attr_dim': 0, + 'url': 'https://www.chrsmrrs.com/graphkerneldatasets/PC-3.zip', + 'domain': 'small molecules', + }, + 'PC-3H': { + 'database': 'tudataset', + 'reference': '[28]', + 'dataset_size': 27509, + 'class_number': 2, + 'task_type': 'classification', + 'ave_node_num': 47.19, + 'ave_edge_num': 49.32, + 'node_labeled': True, + 'edge_labeled': True, + 'node_attr_dim': 0, + 'geometry': None, + 'edge_attr_dim': 0, + 'url': 'https://www.chrsmrrs.com/graphkerneldatasets/PC-3H.zip', + 'domain': 'small molecules', + }, + 'PTC_FM': { + 'database': 'tudataset', + 'reference': '[2,23]', + 'dataset_size': 349, + 'class_number': 2, + 'task_type': 'classification', + 'ave_node_num': 14.11, + 'ave_edge_num': 14.48, + 'node_labeled': True, + 'edge_labeled': True, + 'node_attr_dim': 0, + 'geometry': None, + 'edge_attr_dim': 0, + 'url': 'https://www.chrsmrrs.com/graphkerneldatasets/PTC_FM.zip', + 'domain': 'small molecules', + }, + 'PTC_FR': { + 'database': 'tudataset', + 'reference': '[2,23]', + 'dataset_size': 351, + 'class_number': 2, + 'task_type': 'classification', + 'ave_node_num': 14.56, + 'ave_edge_num': 15.0, + 'node_labeled': True, + 'edge_labeled': True, + 'node_attr_dim': 0, + 'geometry': None, + 'edge_attr_dim': 0, + 'url': 'https://www.chrsmrrs.com/graphkerneldatasets/PTC_FR.zip', + 'domain': 'small molecules', + }, + 'PTC_MM': { + 'database': 'tudataset', + 'reference': '[2,23]', + 'dataset_size': 336, + 'class_number': 2, + 'task_type': 'classification', + 'ave_node_num': 13.97, + 'ave_edge_num': 14.32, + 'node_labeled': True, + 'edge_labeled': True, + 'node_attr_dim': 0, + 'geometry': None, + 'edge_attr_dim': 0, + 'url': 'https://www.chrsmrrs.com/graphkerneldatasets/PTC_MM.zip', + 'domain': 'small molecules', + }, + 'PTC_MR': { + 'database': 'tudataset', + 'reference': '[2,23]', + 'dataset_size': 344, + 'class_number': 2, + 'task_type': 'classification', + 'ave_node_num': 14.29, + 'ave_edge_num': 14.69, + 'node_labeled': True, + 'edge_labeled': True, + 'node_attr_dim': 0, + 'geometry': None, + 'edge_attr_dim': 0, + 'url': 'https://www.chrsmrrs.com/graphkerneldatasets/PTC_MR.zip', + 'domain': 'small molecules', + }, + 'QM9': { + 'database': 'tudataset', + 'reference': '[33,34,35]', + 'dataset_size': 129433, + 'class_number': None, + 'task_type': 'regression', + 'ave_node_num': 18.03, + 'ave_edge_num': 18.63, + 'node_labeled': False, + 'edge_labeled': False, + 'node_attr_dim': 16, + 'geometry': '3D, RI', + 'edge_attr_dim': 4, + 'url': 'https://www.chrsmrrs.com/graphkerneldatasets/QM9.zip', + 'domain': 'small molecules', + }, + 'salicylic_acid': { + 'database': 'tudataset', + 'reference': '[36]', + 'dataset_size': 220232, + 'class_number': None, + 'task_type': 'regression', + 'ave_node_num': 16.0, + 'ave_edge_num': 104.13, + 'node_labeled': True, + 'edge_labeled': False, + 'node_attr_dim': 6, + 'geometry': '3D, RI', + 'edge_attr_dim': 0, + 'url': 'https://www.chrsmrrs.com/graphkerneldatasets/salicylic_acid.zip', + 'domain': 'small molecules', + }, + 'SF-295': { + 'database': 'tudataset', + 'reference': '[28]', + 'dataset_size': 40271, + 'class_number': 2, + 'task_type': 'classification', + 'ave_node_num': 26.06, + 'ave_edge_num': 28.08, + 'node_labeled': True, + 'edge_labeled': True, + 'node_attr_dim': 0, + 'geometry': None, + 'edge_attr_dim': 0, + 'url': 'https://www.chrsmrrs.com/graphkerneldatasets/SF-295.zip', + 'domain': 'small molecules', + }, + 'SF-295H': { + 'database': 'tudataset', + 'reference': '[28]', + 'dataset_size': 40271, + 'class_number': 2, + 'task_type': 'classification', + 'ave_node_num': 46.65, + 'ave_edge_num': 48.68, + 'node_labeled': True, + 'edge_labeled': True, + 'node_attr_dim': 0, + 'geometry': None, + 'edge_attr_dim': 0, + 'url': 'https://www.chrsmrrs.com/graphkerneldatasets/SF-295H.zip', + 'domain': 'small molecules', + }, + 'SN12C': { + 'database': 'tudataset', + 'reference': '[28]', + 'dataset_size': 40004, + 'class_number': 2, + 'task_type': 'classification', + 'ave_node_num': 26.08, + 'ave_edge_num': 28.11, + 'node_labeled': True, + 'edge_labeled': True, + 'node_attr_dim': 0, + 'geometry': None, + 'edge_attr_dim': 0, + 'url': 'https://www.chrsmrrs.com/graphkerneldatasets/SN12C.zip', + 'domain': 'small molecules', + }, + 'SN12CH': { + 'database': 'tudataset', + 'reference': '[28]', + 'dataset_size': 40004, + 'class_number': 2, + 'task_type': 'classification', + 'ave_node_num': 46.69, + 'ave_edge_num': 48.71, + 'node_labeled': True, + 'edge_labeled': True, + 'node_attr_dim': 0, + 'geometry': None, + 'edge_attr_dim': 0, + 'url': 'https://www.chrsmrrs.com/graphkerneldatasets/SN12CH.zip', + 'domain': 'small molecules', + }, + 'SW-620': { + 'database': 'tudataset', + 'reference': '[28]', + 'dataset_size': 40532, + 'class_number': 2, + 'task_type': 'classification', + 'ave_node_num': 26.05, + 'ave_edge_num': 28.08, + 'node_labeled': True, + 'edge_labeled': True, + 'node_attr_dim': 0, + 'geometry': None, + 'edge_attr_dim': 0, + 'url': 'https://www.chrsmrrs.com/graphkerneldatasets/SW-620.zip', + 'domain': 'small molecules', + }, + 'SW-620H': { + 'database': 'tudataset', + 'reference': '[28]', + 'dataset_size': 40532, + 'class_number': 2, + 'task_type': 'classification', + 'ave_node_num': 46.62, + 'ave_edge_num': 48.65, + 'node_labeled': True, + 'edge_labeled': True, + 'node_attr_dim': 0, + 'geometry': None, + 'edge_attr_dim': 0, + 'url': 'https://www.chrsmrrs.com/graphkerneldatasets/SW-620H.zip', + 'domain': 'small molecules', + }, + 'toluene': { + 'database': 'tudataset', + 'reference': '[36]', + 'dataset_size': 342791, + 'class_number': None, + 'task_type': 'regression', + 'ave_node_num': 15.0, + 'ave_edge_num': 96.15, + 'node_labeled': True, + 'edge_labeled': False, + 'node_attr_dim': 6, + 'geometry': '3D, RI', + 'edge_attr_dim': 0, + 'url': 'https://www.chrsmrrs.com/graphkerneldatasets/toluene.zip', + 'domain': 'small molecules', + }, + 'Tox21_AhR_training': { + 'database': 'tudataset', + 'reference': '[24]', + 'dataset_size': 8169, + 'class_number': 2, + 'task_type': 'classification', + 'ave_node_num': 18.09, + 'ave_edge_num': 18.5, + 'node_labeled': True, + 'edge_labeled': True, + 'node_attr_dim': 0, + 'geometry': None, + 'edge_attr_dim': 0, + 'url': 'https://www.chrsmrrs.com/graphkerneldatasets/Tox21_AhR_training.zip', + 'domain': 'small molecules', + }, + 'Tox21_AhR_testing': { + 'database': 'tudataset', + 'reference': '[24]', + 'dataset_size': 272, + 'class_number': 2, + 'task_type': 'classification', + 'ave_node_num': 22.13, + 'ave_edge_num': 23.05, + 'node_labeled': True, + 'edge_labeled': True, + 'node_attr_dim': 0, + 'geometry': None, + 'edge_attr_dim': 0, + 'url': 'https://www.chrsmrrs.com/graphkerneldatasets/Tox21_AhR_testing.zip', + 'domain': 'small molecules', + }, + 'Tox21_AhR_evaluation': { + 'database': 'tudataset', + 'reference': '[24]', + 'dataset_size': 607, + 'class_number': 2, + 'task_type': 'classification', + 'ave_node_num': 17.64, + 'ave_edge_num': 18.06, + 'node_labeled': True, + 'edge_labeled': True, + 'node_attr_dim': 0, + 'geometry': None, + 'edge_attr_dim': 0, + 'url': 'https://www.chrsmrrs.com/graphkerneldatasets/Tox21_AhR_evaluation.zip', + 'domain': 'small molecules', + }, + 'Tox21_AR_training': { + 'database': 'tudataset', + 'reference': '[24]', + 'dataset_size': 9362, + 'class_number': 2, + 'task_type': 'classification', + 'ave_node_num': 18.39, + 'ave_edge_num': 18.84, + 'node_labeled': True, + 'edge_labeled': True, + 'node_attr_dim': 0, + 'geometry': None, + 'edge_attr_dim': 0, + 'url': 'https://www.chrsmrrs.com/graphkerneldatasets/Tox21_AR_training.zip', + 'domain': 'small molecules', + }, + 'Tox21_AR_testing': { + 'database': 'tudataset', + 'reference': '[24]', + 'dataset_size': 292, + 'class_number': 2, + 'task_type': 'classification', + 'ave_node_num': 22.35, + 'ave_edge_num': 23.32, + 'node_labeled': True, + 'edge_labeled': True, + 'node_attr_dim': 0, + 'geometry': None, + 'edge_attr_dim': 0, + 'url': 'https://www.chrsmrrs.com/graphkerneldatasets/Tox21_AR_testing.zip', + 'domain': 'small molecules', + }, + 'Tox21_AR_evaluation': { + 'database': 'tudataset', + 'reference': '[24]', + 'dataset_size': 585, + 'class_number': 2, + 'task_type': 'classification', + 'ave_node_num': 17.99, + 'ave_edge_num': 18.45, + 'node_labeled': True, + 'edge_labeled': True, + 'node_attr_dim': 0, + 'geometry': None, + 'edge_attr_dim': 0, + 'url': 'https://www.chrsmrrs.com/graphkerneldatasets/Tox21_AR_evaluation.zip', + 'domain': 'small molecules', + }, + 'Tox21_AR-LBD_training': { + 'database': 'tudataset', + 'reference': '[24]', + 'dataset_size': 8599, + 'class_number': 2, + 'task_type': 'classification', + 'ave_node_num': 17.77, + 'ave_edge_num': 18.16, + 'node_labeled': True, + 'edge_labeled': True, + 'node_attr_dim': 0, + 'geometry': None, + 'edge_attr_dim': 0, + 'url': 'https://www.chrsmrrs.com/graphkerneldatasets/Tox21_AR-LBD_training.zip', + 'domain': 'small molecules', + }, + 'Tox21_AR-LBD_testing': { + 'database': 'tudataset', + 'reference': '[24]', + 'dataset_size': 253, + 'class_number': 2, + 'task_type': 'classification', + 'ave_node_num': 21.85, + 'ave_edge_num': 22.73, + 'node_labeled': True, + 'edge_labeled': True, + 'node_attr_dim': 0, + 'geometry': None, + 'edge_attr_dim': 0, + 'url': 'https://www.chrsmrrs.com/graphkerneldatasets/Tox21_AR-LBD_testing.zip', + 'domain': 'small molecules', + }, + 'Tox21_AR-LBD_evaluation': { + 'database': 'tudataset', + 'reference': '[24]', + 'dataset_size': 580, + 'class_number': 2, + 'task_type': 'classification', + 'ave_node_num': 17.09, + 'ave_edge_num': 17.42, + 'node_labeled': True, + 'edge_labeled': True, + 'node_attr_dim': 0, + 'geometry': None, + 'edge_attr_dim': 0, + 'url': 'https://www.chrsmrrs.com/graphkerneldatasets/Tox21_AR-LBD_evaluation.zip', + 'domain': 'small molecules', + }, + 'Tox21_ARE_training': { + 'database': 'tudataset', + 'reference': '[24]', + 'dataset_size': 7167, + 'class_number': 2, + 'task_type': 'classification', + 'ave_node_num': 16.28, + 'ave_edge_num': 16.52, + 'node_labeled': True, + 'edge_labeled': True, + 'node_attr_dim': 0, + 'geometry': None, + 'edge_attr_dim': 0, + 'url': 'https://www.chrsmrrs.com/graphkerneldatasets/Tox21_ARE_training.zip', + 'domain': 'small molecules', + }, + 'Tox21_ARE_testing': { + 'database': 'tudataset', + 'reference': '[24]', + 'dataset_size': 234, + 'class_number': 2, + 'task_type': 'classification', + 'ave_node_num': 21.99, + 'ave_edge_num': 22.91, + 'node_labeled': True, + 'edge_labeled': True, + 'node_attr_dim': 0, + 'geometry': None, + 'edge_attr_dim': 0, + 'url': 'https://www.chrsmrrs.com/graphkerneldatasets/Tox21_ARE_testing.zip', + 'domain': 'small molecules', + }, + 'Tox21_ARE_evaluation': { + 'database': 'tudataset', + 'reference': '[24]', + 'dataset_size': 552, + 'class_number': 2, + 'task_type': 'classification', + 'ave_node_num': 17.01, + 'ave_edge_num': 17.33, + 'node_labeled': True, + 'edge_labeled': True, + 'node_attr_dim': 0, + 'geometry': None, + 'edge_attr_dim': 0, + 'url': 'https://www.chrsmrrs.com/graphkerneldatasets/Tox21_ARE_evaluation.zip', + 'domain': 'small molecules', + }, + 'Tox21_aromatase_training': { + 'database': 'tudataset', + 'reference': '[24]', + 'dataset_size': 7226, + 'class_number': 2, + 'task_type': 'classification', + 'ave_node_num': 17.5, + 'ave_edge_num': 17.79, + 'node_labeled': True, + 'edge_labeled': True, + 'node_attr_dim': 0, + 'geometry': None, + 'edge_attr_dim': 0, + 'url': 'https://www.chrsmrrs.com/graphkerneldatasets/Tox21_aromatase_training.zip', + 'domain': 'small molecules', + }, + 'Tox21_aromatase_testing': { + 'database': 'tudataset', + 'reference': '[24]', + 'dataset_size': 214, + 'class_number': 2, + 'task_type': 'classification', + 'ave_node_num': 21.65, + 'ave_edge_num': 22.36, + 'node_labeled': True, + 'edge_labeled': True, + 'node_attr_dim': 0, + 'geometry': None, + 'edge_attr_dim': 0, + 'url': 'https://www.chrsmrrs.com/graphkerneldatasets/Tox21_aromatase_testing.zip', + 'domain': 'small molecules', + }, + 'Tox21_aromatase_evaluation': { + 'database': 'tudataset', + 'reference': '[24]', + 'dataset_size': 528, + 'class_number': 2, + 'task_type': 'classification', + 'ave_node_num': 16.74, + 'ave_edge_num': 16.99, + 'node_labeled': True, + 'edge_labeled': True, + 'node_attr_dim': 0, + 'geometry': None, + 'edge_attr_dim': 0, + 'url': 'https://www.chrsmrrs.com/graphkerneldatasets/Tox21_aromatase_evaluation.zip', + 'domain': 'small molecules', + }, + 'Tox21_ATAD5_training': { + 'database': 'tudataset', + 'reference': '[24]', + 'dataset_size': 9091, + 'class_number': 2, + 'task_type': 'classification', + 'ave_node_num': 17.89, + 'ave_edge_num': 18.3, + 'node_labeled': True, + 'edge_labeled': True, + 'node_attr_dim': 0, + 'geometry': None, + 'edge_attr_dim': 0, + 'url': 'https://www.chrsmrrs.com/graphkerneldatasets/Tox21_ATAD5_training.zip', + 'domain': 'small molecules', + }, + 'Tox21_ATAD5_testing': { + 'database': 'tudataset', + 'reference': '[24]', + 'dataset_size': 272, + 'class_number': 2, + 'task_type': 'classification', + 'ave_node_num': 21.99, + 'ave_edge_num': 22.89, + 'node_labeled': True, + 'edge_labeled': True, + 'node_attr_dim': 0, + 'geometry': None, + 'edge_attr_dim': 0, + 'url': 'https://www.chrsmrrs.com/graphkerneldatasets/Tox21_ATAD5_testing.zip', + 'domain': 'small molecules', + }, + 'Tox21_ATAD5_evaluation': { + 'database': 'tudataset', + 'reference': '[24]', + 'dataset_size': 619, + 'class_number': 2, + 'task_type': 'classification', + 'ave_node_num': 17.68, + 'ave_edge_num': 18.11, + 'node_labeled': True, + 'edge_labeled': True, + 'node_attr_dim': 0, + 'geometry': None, + 'edge_attr_dim': 0, + 'url': 'https://www.chrsmrrs.com/graphkerneldatasets/Tox21_ATAD5_evaluation.zip', + 'domain': 'small molecules', + }, + 'Tox21_ER_training': { + 'database': 'tudataset', + 'reference': '[24]', + 'dataset_size': 7697, + 'class_number': 2, + 'task_type': 'classification', + 'ave_node_num': 17.58, + 'ave_edge_num': 17.94, + 'node_labeled': True, + 'edge_labeled': True, + 'node_attr_dim': 0, + 'geometry': None, + 'edge_attr_dim': 0, + 'url': 'https://www.chrsmrrs.com/graphkerneldatasets/Tox21_ER_training.zip', + 'domain': 'small molecules', + }, + 'Tox21_ER_testing': { + 'database': 'tudataset', + 'reference': '[24]', + 'dataset_size': 265, + 'class_number': 2, + 'task_type': 'classification', + 'ave_node_num': 22.16, + 'ave_edge_num': 23.13, + 'node_labeled': True, + 'edge_labeled': True, + 'node_attr_dim': 0, + 'geometry': None, + 'edge_attr_dim': 0, + 'url': 'https://www.chrsmrrs.com/graphkerneldatasets/Tox21_ER_testing.zip', + 'domain': 'small molecules', + }, + 'Tox21_ER_evaluation': { + 'database': 'tudataset', + 'reference': '[24]', + 'dataset_size': 515, + 'class_number': 2, + 'task_type': 'classification', + 'ave_node_num': 17.66, + 'ave_edge_num': 18.1, + 'node_labeled': True, + 'edge_labeled': True, + 'node_attr_dim': 0, + 'geometry': None, + 'edge_attr_dim': 0, + 'url': 'https://www.chrsmrrs.com/graphkerneldatasets/Tox21_ER_evaluation.zip', + 'domain': 'small molecules', + }, + 'Tox21_ER-LBD_training': { + 'database': 'tudataset', + 'reference': '[24]', + 'dataset_size': 8753, + 'class_number': 2, + 'task_type': 'classification', + 'ave_node_num': 18.06, + 'ave_edge_num': 18.47, + 'node_labeled': True, + 'edge_labeled': True, + 'node_attr_dim': 0, + 'geometry': None, + 'edge_attr_dim': 0, + 'url': 'https://www.chrsmrrs.com/graphkerneldatasets/Tox21_ER-LBD_training.zip', + 'domain': 'small molecules', + }, + 'Tox21_ER-LBD_testing': { + 'database': 'tudataset', + 'reference': '[24]', + 'dataset_size': 287, + 'class_number': 2, + 'task_type': 'classification', + 'ave_node_num': 22.28, + 'ave_edge_num': 23.23, + 'node_labeled': True, + 'edge_labeled': True, + 'node_attr_dim': 0, + 'geometry': None, + 'edge_attr_dim': 0, + 'url': 'https://www.chrsmrrs.com/graphkerneldatasets/Tox21_ER-LBD_testing.zip', + 'domain': 'small molecules', + }, + 'Tox21_ER-LBD_evaluation': { + 'database': 'tudataset', + 'reference': '[24]', + 'dataset_size': 599, + 'class_number': 2, + 'task_type': 'classification', + 'ave_node_num': 17.75, + 'ave_edge_num': 18.17, + 'node_labeled': True, + 'edge_labeled': True, + 'node_attr_dim': 0, + 'geometry': None, + 'edge_attr_dim': 0, + 'url': 'https://www.chrsmrrs.com/graphkerneldatasets/Tox21_ER-LBD_evaluation.zip', + 'domain': 'small molecules', + }, + 'Tox21_HSE_training': { + 'database': 'tudataset', + 'reference': '[24]', + 'dataset_size': 8150, + 'class_number': 2, + 'task_type': 'classification', + 'ave_node_num': 16.72, + 'ave_edge_num': 17.04, + 'node_labeled': True, + 'edge_labeled': True, + 'node_attr_dim': 0, + 'geometry': None, + 'edge_attr_dim': 0, + 'url': 'https://www.chrsmrrs.com/graphkerneldatasets/Tox21_HSE_training.zip', + 'domain': 'small molecules', + }, + 'Tox21_HSE_testing': { + 'database': 'tudataset', + 'reference': '[24]', + 'dataset_size': 267, + 'class_number': 2, + 'task_type': 'classification', + 'ave_node_num': 22.07, + 'ave_edge_num': 23.0, + 'node_labeled': True, + 'edge_labeled': True, + 'node_attr_dim': 0, + 'geometry': None, + 'edge_attr_dim': 0, + 'url': 'https://www.chrsmrrs.com/graphkerneldatasets/Tox21_HSE_testing.zip', + 'domain': 'small molecules', + }, + 'Tox21_HSE_evaluation': { + 'database': 'tudataset', + 'reference': '[24]', + 'dataset_size': 607, + 'class_number': 2, + 'task_type': 'classification', + 'ave_node_num': 17.61, + 'ave_edge_num': 18.01, + 'node_labeled': True, + 'edge_labeled': True, + 'node_attr_dim': 0, + 'geometry': None, + 'edge_attr_dim': 0, + 'url': 'https://www.chrsmrrs.com/graphkerneldatasets/Tox21_HSE_evaluation.zip', + 'domain': 'small molecules', + }, + 'Tox21_MMP_training': { + 'database': 'tudataset', + 'reference': '[24]', + 'dataset_size': 7320, + 'class_number': 2, + 'task_type': 'classification', + 'ave_node_num': 17.49, + 'ave_edge_num': 17.83, + 'node_labeled': True, + 'edge_labeled': True, + 'node_attr_dim': 0, + 'geometry': None, + 'edge_attr_dim': 0, + 'url': 'https://www.chrsmrrs.com/graphkerneldatasets/Tox21_MMP_training.zip', + 'domain': 'small molecules', + }, + 'Tox21_MMP_testing': { + 'database': 'tudataset', + 'reference': '[24]', + 'dataset_size': 238, + 'class_number': 2, + 'task_type': 'classification', + 'ave_node_num': 21.68, + 'ave_edge_num': 22.55, + 'node_labeled': True, + 'edge_labeled': True, + 'node_attr_dim': 0, + 'geometry': None, + 'edge_attr_dim': 0, + 'url': 'https://www.chrsmrrs.com/graphkerneldatasets/Tox21_MMP_testing.zip', + 'domain': 'small molecules', + }, + 'Tox21_MMP_evaluation': { + 'database': 'tudataset', + 'reference': '[24]', + 'dataset_size': 541, + 'class_number': 2, + 'task_type': 'classification', + 'ave_node_num': 16.67, + 'ave_edge_num': 16.88, + 'node_labeled': True, + 'edge_labeled': True, + 'node_attr_dim': 0, + 'geometry': None, + 'edge_attr_dim': 0, + 'url': 'https://www.chrsmrrs.com/graphkerneldatasets/Tox21_MMP_evaluation.zip', + 'domain': 'small molecules', + }, + 'Tox21_p53_training': { + 'database': 'tudataset', + 'reference': '[24]', + 'dataset_size': 8634, + 'class_number': 2, + 'task_type': 'classification', + 'ave_node_num': 17.79, + 'ave_edge_num': 18.19, + 'node_labeled': True, + 'edge_labeled': True, + 'node_attr_dim': 0, + 'geometry': None, + 'edge_attr_dim': 0, + 'url': 'https://www.chrsmrrs.com/graphkerneldatasets/Tox21_p53_training.zip', + 'domain': 'small molecules', + }, + 'Tox21_p53_testing': { + 'database': 'tudataset', + 'reference': '[24]', + 'dataset_size': 269, + 'class_number': 2, + 'task_type': 'classification', + 'ave_node_num': 22.14, + 'ave_edge_num': 23.04, + 'node_labeled': True, + 'edge_labeled': True, + 'node_attr_dim': 0, + 'geometry': None, + 'edge_attr_dim': 0, + 'url': 'https://www.chrsmrrs.com/graphkerneldatasets/Tox21_p53_testing.zip', + 'domain': 'small molecules', + }, + 'Tox21_p53_evaluation': { + 'database': 'tudataset', + 'reference': '[24]', + 'dataset_size': 613, + 'class_number': 2, + 'task_type': 'classification', + 'ave_node_num': 17.34, + 'ave_edge_num': 17.72, + 'node_labeled': True, + 'edge_labeled': True, + 'node_attr_dim': 0, + 'geometry': None, + 'edge_attr_dim': 0, + 'url': 'https://www.chrsmrrs.com/graphkerneldatasets/Tox21_p53_evaluation.zip', + 'domain': 'small molecules', + }, + 'Tox21_PPAR-gamma_training': { + 'database': 'tudataset', + 'reference': '[24]', + 'dataset_size': 8184, + 'class_number': 2, + 'task_type': 'classification', + 'ave_node_num': 17.23, + 'ave_edge_num': 17.55, + 'node_labeled': True, + 'edge_labeled': True, + 'node_attr_dim': 0, + 'geometry': None, + 'edge_attr_dim': 0, + 'url': 'https://www.chrsmrrs.com/graphkerneldatasets/Tox21_PPAR-gamma_training.zip', + 'domain': 'small molecules', + }, + 'Tox21_PPAR-gamma_testing': { + 'database': 'tudataset', + 'reference': '[24]', + 'dataset_size': 267, + 'class_number': 2, + 'task_type': 'classification', + 'ave_node_num': 22.04, + 'ave_edge_num': 22.93, + 'node_labeled': True, + 'edge_labeled': True, + 'node_attr_dim': 0, + 'geometry': None, + 'edge_attr_dim': 0, + 'url': 'https://www.chrsmrrs.com/graphkerneldatasets/Tox21_PPAR-gamma_testing.zip', + 'domain': 'small molecules', + }, + 'Tox21_PPAR-gamma_evaluation': { + 'database': 'tudataset', + 'reference': '[24]', + 'dataset_size': 602, + 'class_number': 2, + 'task_type': 'classification', + 'ave_node_num': 17.38, + 'ave_edge_num': 17.77, + 'node_labeled': True, + 'edge_labeled': True, + 'node_attr_dim': 0, + 'geometry': None, + 'edge_attr_dim': 0, + 'url': 'https://www.chrsmrrs.com/graphkerneldatasets/Tox21_PPAR-gamma_evaluation.zip', + 'domain': 'small molecules', + }, + 'UACC257': { + 'database': 'tudataset', + 'reference': '[28]', + 'dataset_size': 39988, + 'class_number': 2, + 'task_type': 'classification', + 'ave_node_num': 26.09, + 'ave_edge_num': 28.12, + 'node_labeled': True, + 'edge_labeled': True, + 'node_attr_dim': 0, + 'geometry': None, + 'edge_attr_dim': 0, + 'url': 'https://www.chrsmrrs.com/graphkerneldatasets/UACC257.zip', + 'domain': 'small molecules', + }, + 'UACC257H': { + 'database': 'tudataset', + 'reference': '[28]', + 'dataset_size': 39988, + 'class_number': 2, + 'task_type': 'classification', + 'ave_node_num': 46.68, + 'ave_edge_num': 48.71, + 'node_labeled': True, + 'edge_labeled': True, + 'node_attr_dim': 0, + 'geometry': None, + 'edge_attr_dim': 0, + 'url': 'https://www.chrsmrrs.com/graphkerneldatasets/UACC257H.zip', + 'domain': 'small molecules', + }, + 'uracil': { + 'database': 'tudataset', + 'reference': '[36]', + 'dataset_size': 133770, + 'class_number': None, + 'task_type': 'regression', + 'ave_node_num': 12.0, + 'ave_edge_num': 64.44, + 'node_labeled': True, + 'edge_labeled': False, + 'node_attr_dim': 6, + 'geometry': '3D, RI', + 'edge_attr_dim': 0, + 'url': 'https://www.chrsmrrs.com/graphkerneldatasets/uracil.zip', + 'domain': 'small molecules', + }, + 'Yeast': { + 'database': 'tudataset', + 'reference': '[28]', + 'dataset_size': 79601, + 'class_number': 2, + 'task_type': 'classification', + 'ave_node_num': 21.54, + 'ave_edge_num': 22.84, + 'node_labeled': True, + 'edge_labeled': True, + 'node_attr_dim': 0, + 'geometry': None, + 'edge_attr_dim': 0, + 'url': 'https://www.chrsmrrs.com/graphkerneldatasets/Yeast.zip', + 'domain': 'small molecules', + }, + 'YeastH': { + 'database': 'tudataset', + 'reference': '[28]', + 'dataset_size': 79601, + 'class_number': 2, + 'task_type': 'classification', + 'ave_node_num': 39.44, + 'ave_edge_num': 40.74, + 'node_labeled': True, + 'edge_labeled': True, + 'node_attr_dim': 0, + 'geometry': None, + 'edge_attr_dim': 0, + 'url': 'https://www.chrsmrrs.com/graphkerneldatasets/YeastH.zip', + 'domain': 'small molecules', + }, + 'ZINC_full': { + 'database': 'tudataset', + 'reference': '[31]', + 'dataset_size': 249456, + 'class_number': None, + 'task_type': 'regression', + 'ave_node_num': 23.14, + 'ave_edge_num': 24.91, + 'node_labeled': True, + 'edge_labeled': True, + 'node_attr_dim': 0, + 'geometry': None, + 'edge_attr_dim': 0, + 'url': 'https://www.chrsmrrs.com/graphkerneldatasets/ZINC_full.zip', + 'domain': 'small molecules', + }, + 'ZINC_test': { + 'database': 'tudataset', + 'reference': '[31]', + 'dataset_size': 5000, + 'class_number': None, + 'task_type': 'regression', + 'ave_node_num': 23.1, + 'ave_edge_num': 24.83, + 'node_labeled': True, + 'edge_labeled': True, + 'node_attr_dim': 0, + 'geometry': None, + 'edge_attr_dim': 0, + 'url': 'https://www.chrsmrrs.com/graphkerneldatasets/ZINC_test.zip', + 'domain': 'small molecules', + }, + 'ZINC_train': { + 'database': 'tudataset', + 'reference': '[31]', + 'dataset_size': 220011, + 'class_number': None, + 'task_type': 'regression', + 'ave_node_num': 23.15, + 'ave_edge_num': 24.91, + 'node_labeled': True, + 'edge_labeled': True, + 'node_attr_dim': 0, + 'geometry': None, + 'edge_attr_dim': 0, + 'url': 'https://www.chrsmrrs.com/graphkerneldatasets/ZINC_train.zip', + 'domain': 'small molecules', + }, + 'ZINC_val': { + 'database': 'tudataset', + 'reference': '[31]', + 'dataset_size': 24445, + 'class_number': None, + 'task_type': 'regression', + 'ave_node_num': 23.13, + 'ave_edge_num': 24.88, + 'node_labeled': True, + 'edge_labeled': True, + 'node_attr_dim': 0, + 'geometry': None, + 'edge_attr_dim': 0, + 'url': 'https://www.chrsmrrs.com/graphkerneldatasets/ZINC_val.zip', + 'domain': 'small molecules', + }, + + ### bioinformatics + 'DD': { + 'database': 'tudataset', + 'reference': '[6,22]', + 'dataset_size': 1178, + 'class_number': 2, + 'task_type': 'classification', + 'ave_node_num': 284.32, + 'ave_edge_num': 715.66, + 'node_labeled': True, + 'edge_labeled': False, + 'node_attr_dim': 0, + 'geometry': None, + 'edge_attr_dim': 0, + 'url': 'https://www.chrsmrrs.com/graphkerneldatasets/DD.zip', + 'domain': 'bioinformatics', + }, + 'ENZYMES': { + 'database': 'tudataset', + 'reference': '[4,5]', + 'dataset_size': 600, + 'class_number': 6, + 'task_type': 'classification', + 'ave_node_num': 32.63, + 'ave_edge_num': 62.14, + 'node_labeled': True, + 'edge_labeled': False, + 'node_attr_dim': 18, + 'geometry': None, + 'edge_attr_dim': 0, + 'url': 'http://www.chrsmrrs.com/graphkerneldatasets/ENZYMES.zip', + 'domain': 'bioinformatics', + }, + 'KKI': { + 'database': 'tudataset', + 'reference': '[26]', + 'dataset_size': 83, + 'class_number': 2, + 'task_type': 'classification', + 'ave_node_num': 26.96, + 'ave_edge_num': 48.42, + 'node_labeled': True, + 'edge_labeled': False, + 'node_attr_dim': 0, + 'geometry': None, + 'edge_attr_dim': 0, + 'url': 'https://www.chrsmrrs.com/graphkerneldatasets/KKI.zip', + 'domain': 'bioinformatics', + }, + 'OHSU': { + 'database': 'tudataset', + 'reference': '[26]', + 'dataset_size': 79, + 'class_number': 2, + 'task_type': 'classification', + 'ave_node_num': 82.01, + 'ave_edge_num': 199.66, + 'node_labeled': True, + 'edge_labeled': False, + 'node_attr_dim': 0, + 'geometry': None, + 'edge_attr_dim': 0, + 'url': 'https://www.chrsmrrs.com/graphkerneldatasets/OHSU.zip', + 'domain': 'bioinformatics', + }, + 'Peking_1': { + 'database': 'tudataset', + 'reference': '[26]', + 'dataset_size': 85, + 'class_number': 2, + 'task_type': 'classification', + 'ave_node_num': 39.31, + 'ave_edge_num': 77.35, + 'node_labeled': True, + 'edge_labeled': False, + 'node_attr_dim': 0, + 'geometry': None, + 'edge_attr_dim': 0, + 'url': 'https://www.chrsmrrs.com/graphkerneldatasets/Peking_1.zip', + 'domain': 'bioinformatics', + }, + 'PROTEINS': { + 'database': 'tudataset', + 'reference': '[4,6]', + 'dataset_size': 1113, + 'class_number': 2, + 'task_type': 'classification', + 'ave_node_num': 39.06, + 'ave_edge_num': 72.82, + 'node_labeled': True, + 'edge_labeled': False, + 'node_attr_dim': 1, + 'geometry': None, + 'edge_attr_dim': 0, + 'url': 'https://www.chrsmrrs.com/graphkerneldatasets/PROTEINS.zip', + 'domain': 'bioinformatics', + }, + 'PROTEINS_full': { + 'database': 'tudataset', + 'reference': '[4,6]', + 'dataset_size': 1113, + 'class_number': 2, + 'task_type': 'classification', + 'ave_node_num': 39.06, + 'ave_edge_num': 72.82, + 'node_labeled': True, + 'edge_labeled': False, + 'node_attr_dim': 29, + 'geometry': None, + 'edge_attr_dim': 0, + 'url': 'https://www.chrsmrrs.com/graphkerneldatasets/PROTEINS_full.zip', + 'domain': 'bioinformatics', + }, + + ### computer vision + 'COIL-DEL': { + 'database': 'tudataset', + 'reference': '[16,18]', + 'dataset_size': 3900, + 'class_number': 100, + 'task_type': 'classification', + 'ave_node_num': 21.54, + 'ave_edge_num': 54.24, + 'node_labeled': False, + 'edge_labeled': True, + 'node_attr_dim': 2, + 'geometry': None, + 'edge_attr_dim': 0, + 'url': 'https://www.chrsmrrs.com/graphkerneldatasets/COIL-DEL.zip', + 'domain': 'computer vision', + }, + 'COIL-RAG': { + 'database': 'tudataset', + 'reference': '[16,18]', + 'dataset_size': 3900, + 'class_number': 100, + 'task_type': 'classification', + 'ave_node_num': 3.01, + 'ave_edge_num': 3.02, + 'node_labeled': False, + 'edge_labeled': False, + 'node_attr_dim': 64, + 'geometry': None, + 'edge_attr_dim': 1, + 'url': 'https://www.chrsmrrs.com/graphkerneldatasets/COIL-RAG.zip', + 'domain': 'computer vision', + }, + 'Cuneiform': { + 'database': 'tudataset', + 'reference': '[25]', + 'dataset_size': 267, + 'class_number': 30, + 'task_type': 'classification', + 'ave_node_num': 21.27, + 'ave_edge_num': 44.8, + 'node_labeled': True, + 'edge_labeled': True, + 'node_attr_dim': 3, + 'geometry': '3D', + 'edge_attr_dim': 2, + 'url': 'https://www.chrsmrrs.com/graphkerneldatasets/Cuneiform.zip', + 'domain': 'computer vision', + }, + 'Fingerprint': { + 'database': 'tudataset', + 'reference': '[16,19]', + 'dataset_size': 2800, + 'class_number': 4, + 'task_type': 'classification', + 'ave_node_num': 5.42, + 'ave_edge_num': 4.42, + 'node_labeled': False, + 'edge_labeled': False, + 'node_attr_dim': 2, + 'geometry': '2D', + 'edge_attr_dim': 2, + 'url': 'https://www.chrsmrrs.com/graphkerneldatasets/Fingerprint.zip', + 'domain': 'computer vision', + }, + 'FIRSTMM_DB': { + 'database': 'tudataset', + 'reference': '[11,12,13]', + 'dataset_size': 41, + 'class_number': 11, + 'task_type': 'classification', + 'ave_node_num': 1377.27, + 'ave_edge_num': 3074.1, + 'node_labeled': True, + 'edge_labeled': False, + 'node_attr_dim': 1, + 'geometry': None, + 'edge_attr_dim': 2, + 'url': 'https://www.chrsmrrs.com/graphkerneldatasets/FIRSTMM_DB.zip', + 'domain': 'computer vision', + }, + 'Letter-high': { + 'database': 'tudataset', + 'reference': '[16]', + 'dataset_size': 2250, + 'class_number': 15, + 'task_type': 'classification', + 'ave_node_num': 4.67, + 'ave_edge_num': 4.5, + 'node_labeled': False, + 'edge_labeled': False, + 'node_attr_dim': 2, + 'geometry': '2D', + 'edge_attr_dim': 0, + 'url': 'https://www.chrsmrrs.com/graphkerneldatasets/Letter-high.zip', + 'domain': 'computer vision', + }, + 'Letter-low': { + 'database': 'tudataset', + 'reference': '[16]', + 'dataset_size': 2250, + 'class_number': 15, + 'task_type': 'classification', + 'ave_node_num': 4.68, + 'ave_edge_num': 3.13, + 'node_labeled': False, + 'edge_labeled': False, + 'node_attr_dim': 2, + 'geometry': '2D', + 'edge_attr_dim': 0, + 'url': 'https://www.chrsmrrs.com/graphkerneldatasets/Letter-low.zip', + 'domain': 'computer vision', + }, + 'Letter-med': { + 'database': 'tudataset', + 'reference': '[16]', + 'dataset_size': 2250, + 'class_number': 15, + 'task_type': 'classification', + 'ave_node_num': 4.67, + 'ave_edge_num': 4.5, + 'node_labeled': False, + 'edge_labeled': False, + 'node_attr_dim': 2, + 'geometry': '2D', + 'edge_attr_dim': 0, + 'url': 'https://www.chrsmrrs.com/graphkerneldatasets/Letter-med.zip', + 'domain': 'computer vision', + }, + 'MSRC_9': { + 'database': 'tudataset', + 'reference': '[13]', + 'dataset_size': 221, + 'class_number': 8, + 'task_type': 'classification', + 'ave_node_num': 40.58, + 'ave_edge_num': 97.94, + 'node_labeled': True, + 'edge_labeled': False, + 'node_attr_dim': 0, + 'geometry': None, + 'edge_attr_dim': 0, + 'url': 'https://www.chrsmrrs.com/graphkerneldatasets/MSRC_9.zip', + 'domain': 'computer vision', + }, + 'MSRC_21': { + 'database': 'tudataset', + 'reference': '[13]', + 'dataset_size': 563, + 'class_number': 20, + 'task_type': 'classification', + 'ave_node_num': 77.52, + 'ave_edge_num': 198.32, + 'node_labeled': True, + 'edge_labeled': False, + 'node_attr_dim': 0, + 'geometry': None, + 'edge_attr_dim': 0, + 'url': 'https://www.chrsmrrs.com/graphkerneldatasets/MSRC_21.zip', + 'domain': 'computer vision', + }, + 'MSRC_21C': { + 'database': 'tudataset', + 'reference': '[13]', + 'dataset_size': 209, + 'class_number': 20, + 'task_type': 'classification', + 'ave_node_num': 40.28, + 'ave_edge_num': 96.6, + 'node_labeled': True, + 'edge_labeled': False, + 'node_attr_dim': 0, + 'geometry': None, + 'edge_attr_dim': 0, + 'url': 'https://www.chrsmrrs.com/graphkerneldatasets/MSRC_21C.zip', + 'domain': 'computer vision', + }, + + ### social networks + 'COLLAB': { + 'database': 'tudataset', + 'reference': '[14]', + 'dataset_size': 5000, + 'class_number': 3, + 'task_type': 'classification', + 'ave_node_num': 74.49, + 'ave_edge_num': 2457.78, + 'node_labeled': False, + 'edge_labeled': False, + 'node_attr_dim': 0, + 'geometry': None, + 'edge_attr_dim': 0, + 'url': 'https://www.chrsmrrs.com/graphkerneldatasets/COLLAB.zip', + 'domain': 'social networks', + }, + 'dblp_ct1': { + 'database': 'tudataset', + 'reference': '[32]', + 'dataset_size': 755, + 'class_number': 2, + 'task_type': 'classification', + 'ave_node_num': 52.87, + 'ave_edge_num': 320.09, + 'node_labeled': False, + 'edge_labeled': False, + 'node_attr_dim': 0, + 'geometry': None, + 'edge_attr_dim': 'temporal', + 'url': 'https://www.chrsmrrs.com/graphkerneldatasets/dblp_ct1.zip', + 'domain': 'social networks', + }, + 'dblp_ct2': { + 'database': 'tudataset', + 'reference': '[32]', + 'dataset_size': 755, + 'class_number': 2, + 'task_type': 'classification', + 'ave_node_num': 52.87, + 'ave_edge_num': 320.09, + 'node_labeled': False, + 'edge_labeled': False, + 'node_attr_dim': 0, + 'geometry': None, + 'edge_attr_dim': 'temporal', + 'url': 'https://www.chrsmrrs.com/graphkerneldatasets/dblp_ct2.zip', + 'domain': 'social networks', + }, + 'DBLP_v1': { + 'database': 'tudataset', + 'reference': '[26]', + 'dataset_size': 19456, + 'class_number': 2, + 'task_type': 'classification', + 'ave_node_num': 10.48, + 'ave_edge_num': 19.65, + 'node_labeled': True, + 'edge_labeled': True, + 'node_attr_dim': 0, + 'geometry': None, + 'edge_attr_dim': 0, + 'url': 'https://www.chrsmrrs.com/graphkerneldatasets/DBLP_v1.zip', + 'domain': 'social networks', + }, + 'deezer_ego_nets': { + 'database': 'tudataset', + 'reference': '[30]', + 'dataset_size': 9629, + 'class_number': 2, + 'task_type': 'classification', + 'ave_node_num': 23.49, + 'ave_edge_num': 65.25, + 'node_labeled': False, + 'edge_labeled': False, + 'node_attr_dim': 0, + 'geometry': None, + 'edge_attr_dim': 0, + 'url': 'https://www.chrsmrrs.com/graphkerneldatasets/deezer_ego_nets.zip', + 'domain': 'social networks', + }, + 'facebook_ct1': { + 'database': 'tudataset', + 'reference': '[32]', + 'dataset_size': 995, + 'class_number': 2, + 'task_type': 'classification', + 'ave_node_num': 95.72, + 'ave_edge_num': 269.01, + 'node_labeled': False, + 'edge_labeled': False, + 'node_attr_dim': 0, + 'geometry': None, + 'edge_attr_dim': 'temporal', + 'url': 'https://www.chrsmrrs.com/graphkerneldatasets/facebook_ct1.zip', + 'domain': 'social networks', + }, + 'facebook_ct2': { + 'database': 'tudataset', + 'reference': '[32]', + 'dataset_size': 995, + 'class_number': 2, + 'task_type': 'classification', + 'ave_node_num': 95.72, + 'ave_edge_num': 269.01, + 'node_labeled': False, + 'edge_labeled': False, + 'node_attr_dim': 0, + 'geometry': None, + 'edge_attr_dim': 'temporal', + 'url': 'https://www.chrsmrrs.com/graphkerneldatasets/facebook_ct2.zip', + 'domain': 'social networks', + }, + 'github_stargazers': { + 'database': 'tudataset', + 'reference': '[30]', + 'dataset_size': 12725, + 'class_number': 2, + 'task_type': 'classification', + 'ave_node_num': 113.79, + 'ave_edge_num': 234.64, + 'node_labeled': False, + 'edge_labeled': False, + 'node_attr_dim': 0, + 'geometry': None, + 'edge_attr_dim': 0, + 'url': 'https://www.chrsmrrs.com/graphkerneldatasets/github_stargazers.zip', + 'domain': 'social networks', + }, + 'highschool_ct1': { + 'database': 'tudataset', + 'reference': '[32]', + 'dataset_size': 180, + 'class_number': 2, + 'task_type': 'classification', + 'ave_node_num': 52.32, + 'ave_edge_num': 544.81, + 'node_labeled': False, + 'edge_labeled': False, + 'node_attr_dim': 0, + 'geometry': None, + 'edge_attr_dim': 'temporal', + 'url': 'https://www.chrsmrrs.com/graphkerneldatasets/highschool_ct1.zip', + 'domain': 'social networks', + }, + 'highschool_ct2': { + 'database': 'tudataset', + 'reference': '[32]', + 'dataset_size': 180, + 'class_number': 2, + 'task_type': 'classification', + 'ave_node_num': 52.32, + 'ave_edge_num': 544.81, + 'node_labeled': False, + 'edge_labeled': False, + 'node_attr_dim': 0, + 'geometry': None, + 'edge_attr_dim': 'temporal', + 'url': 'https://www.chrsmrrs.com/graphkerneldatasets/highschool_ct2.zip', + 'domain': 'social networks', + }, + 'IMDB-BINARY': { + 'database': 'tudataset', + 'reference': '[14]', + 'dataset_size': 1000, + 'class_number': 2, + 'task_type': 'classification', + 'ave_node_num': 19.77, + 'ave_edge_num': 96.53, + 'node_labeled': False, + 'edge_labeled': False, + 'node_attr_dim': 0, + 'geometry': '', + 'edge_attr_dim': 0, + 'url': 'https://www.chrsmrrs.com/graphkerneldatasets/IMDB-BINARY.zip', + 'domain': 'social networks', + }, + 'IMDB-MULTI': { + 'database': 'tudataset', + 'reference': '[14]', + 'dataset_size': 1500, + 'class_number': 3, + 'task_type': 'classification', + 'ave_node_num': 13.0, + 'ave_edge_num': 65.94, + 'node_labeled': False, + 'edge_labeled': False, + 'node_attr_dim': 0, + 'geometry': '', + 'edge_attr_dim': 0, + 'url': 'https://www.chrsmrrs.com/graphkerneldatasets/IMDB-MULTI.zip', + 'domain': 'social networks', + }, + 'infectious_ct1': { + 'database': 'tudataset', + 'reference': '[32]', + 'dataset_size': 200, + 'class_number': 2, + 'task_type': 'classification', + 'ave_node_num': 50.0, + 'ave_edge_num': 459.72, + 'node_labeled': False, + 'edge_labeled': False, + 'node_attr_dim': 0, + 'geometry': None, + 'edge_attr_dim': 'temporal', + 'url': 'https://www.chrsmrrs.com/graphkerneldatasets/infectious_ct1.zip', + 'domain': 'social networks', + }, + 'infectious_ct2': { + 'database': 'tudataset', + 'reference': '[32]', + 'dataset_size': 200, + 'class_number': 2, + 'task_type': 'classification', + 'ave_node_num': 50.0, + 'ave_edge_num': 459.72, + 'node_labeled': False, + 'edge_labeled': False, + 'node_attr_dim': 0, + 'geometry': None, + 'edge_attr_dim': 'temporal', + 'url': 'https://www.chrsmrrs.com/graphkerneldatasets/infectious_ct2.zip', + 'domain': 'social networks', + }, + 'mit_ct1': { + 'database': 'tudataset', + 'reference': '[32]', + 'dataset_size': 97, + 'class_number': 2, + 'task_type': 'classification', + 'ave_node_num': 20.0, + 'ave_edge_num': 1469.15, + 'node_labeled': False, + 'edge_labeled': False, + 'node_attr_dim': 0, + 'geometry': None, + 'edge_attr_dim': 'temporal', + 'url': 'https://www.chrsmrrs.com/graphkerneldatasets/mit_ct1.zip', + 'domain': 'social networks', + }, + 'mit_ct2': { + 'database': 'tudataset', + 'reference': '[32]', + 'dataset_size': 97, + 'class_number': 2, + 'task_type': 'classification', + 'ave_node_num': 20.0, + 'ave_edge_num': 1469.15, + 'node_labeled': False, + 'edge_labeled': False, + 'node_attr_dim': 0, + 'geometry': None, + 'edge_attr_dim': 'temporal', + 'url': 'https://www.chrsmrrs.com/graphkerneldatasets/mit_ct2.zip', + 'domain': 'social networks', + }, + 'REDDIT-BINARY': { + 'database': 'tudataset', + 'reference': '[14]', + 'dataset_size': 2000, + 'class_number': 2, + 'task_type': 'classification', + 'ave_node_num': 429.63, + 'ave_edge_num': 497.75, + 'node_labeled': False, + 'edge_labeled': False, + 'node_attr_dim': 0, + 'geometry': None, + 'edge_attr_dim': 0, + 'url': 'https://www.chrsmrrs.com/graphkerneldatasets/REDDIT-BINARY.zip', + 'domain': 'social networks', + }, + 'REDDIT-MULTI-5K': { + 'database': 'tudataset', + 'reference': '[14]', + 'dataset_size': 4999, + 'class_number': 5, + 'task_type': 'classification', + 'ave_node_num': 508.52, + 'ave_edge_num': 594.87, + 'node_labeled': False, + 'edge_labeled': False, + 'node_attr_dim': 0, + 'geometry': None, + 'edge_attr_dim': 0, + 'url': 'https://www.chrsmrrs.com/graphkerneldatasets/REDDIT-MULTI-5K.zip', + 'domain': 'social networks', + }, + 'REDDIT-MULTI-12K': { + 'database': 'tudataset', + 'reference': '[14]', + 'dataset_size': 11929, + 'class_number': 11, + 'task_type': 'classification', + 'ave_node_num': 391.41, + 'ave_edge_num': 456.89, + 'node_labeled': False, + 'edge_labeled': False, + 'node_attr_dim': 0, + 'geometry': None, + 'edge_attr_dim': 0, + 'url': 'https://www.chrsmrrs.com/graphkerneldatasets/REDDIT-MULTI-12K.zip', + 'domain': 'social networks', + }, + 'reddit_threads': { + 'database': 'tudataset', + 'reference': '[30]', + 'dataset_size': 203088, + 'class_number': 2, + 'task_type': 'classification', + 'ave_node_num': 23.93, + 'ave_edge_num': 24.99, + 'node_labeled': False, + 'edge_labeled': False, + 'node_attr_dim': 0, + 'geometry': None, + 'edge_attr_dim': 0, + 'url': 'https://www.chrsmrrs.com/graphkerneldatasets/reddit_threads.zip', + 'domain': 'social networks', + }, + 'tumblr_ct1': { + 'database': 'tudataset', + 'reference': '[32]', + 'dataset_size': 373, + 'class_number': 2, + 'task_type': 'classification', + 'ave_node_num': 53.11, + 'ave_edge_num': 199.78, + 'node_labeled': False, + 'edge_labeled': False, + 'node_attr_dim': 0, + 'geometry': None, + 'edge_attr_dim': 'temporal', + 'url': 'https://www.chrsmrrs.com/graphkerneldatasets/tumblr_ct1.zip', + 'domain': 'social networks', + }, + 'tumblr_ct2': { + 'database': 'tudataset', + 'reference': '[32]', + 'dataset_size': 373, + 'class_number': 2, + 'task_type': 'classification', + 'ave_node_num': 53.11, + 'ave_edge_num': 199.78, + 'node_labeled': False, + 'edge_labeled': False, + 'node_attr_dim': 0, + 'geometry': None, + 'edge_attr_dim': 'temporal', + 'url': 'https://www.chrsmrrs.com/graphkerneldatasets/tumblr_ct2.zip', + 'domain': 'social networks', + }, + 'twitch_egos': { + 'database': 'tudataset', + 'reference': '[30]', + 'dataset_size': 127094, + 'class_number': 2, + 'task_type': 'classification', + 'ave_node_num': 29.67, + 'ave_edge_num': 86.59, + 'node_labeled': False, + 'edge_labeled': False, + 'node_attr_dim': 0, + 'geometry': None, + 'edge_attr_dim': 0, + 'url': 'https://www.chrsmrrs.com/graphkerneldatasets/twitch_egos.zip', + 'domain': 'social networks', + }, + 'TWITTER-Real-Graph-Partial': { + 'database': 'tudataset', + 'reference': '[26]', + 'dataset_size': 144033, + 'class_number': 2, + 'task_type': 'classification', + 'ave_node_num': 4.03, + 'ave_edge_num': 4.98, + 'node_labeled': True, + 'edge_labeled': False, + 'node_attr_dim': 0, + 'geometry': None, + 'edge_attr_dim': 1, + 'url': 'https://www.chrsmrrs.com/graphkerneldatasets/TWITTER-Real-Graph-Partial.zip', + 'domain': 'social networks', + }, + + ### synthetic + 'COLORS-3': { + 'database': 'tudataset', + 'reference': '[27]', + 'dataset_size': 10500, + 'class_number': 11, + 'task_type': 'classification', + 'ave_node_num': 61.31, + 'ave_edge_num': 91.03, + 'node_labeled': False, + 'edge_labeled': False, + 'node_attr_dim': 4, + 'geometry': None, + 'edge_attr_dim': 0, + 'url': 'https://www.chrsmrrs.com/graphkerneldatasets/COLORS-3.zip', + 'domain': 'synthetic', + }, + 'SYNTHETIC': { + 'database': 'tudataset', + 'reference': '[3]', + 'dataset_size': 300, + 'class_number': 2, + 'task_type': 'classification', + 'ave_node_num': 100.0, + 'ave_edge_num': 196.0, + 'node_labeled': False, + 'edge_labeled': False, + 'node_attr_dim': 1, + 'geometry': None, + 'edge_attr_dim': 0, + 'url': 'https://www.chrsmrrs.com/graphkerneldatasets/SYNTHETIC.zip', + 'domain': 'synthetic', + }, + 'SYNTHETICnew': { + 'database': 'tudataset', + 'reference': '[3,10]', + 'dataset_size': 300, + 'class_number': 2, + 'task_type': 'classification', + 'ave_node_num': 100.0, + 'ave_edge_num': 196.25, + 'node_labeled': False, + 'edge_labeled': False, + 'node_attr_dim': 1, + 'geometry': None, + 'edge_attr_dim': 0, + 'url': 'https://www.chrsmrrs.com/graphkerneldatasets/SYNTHETICnew.zip', + 'domain': 'synthetic', + }, + 'Synthie': { + 'database': 'tudataset', + 'reference': '[21]', + 'dataset_size': 400, + 'class_number': 4, + 'task_type': 'classification', + 'ave_node_num': 95.0, + 'ave_edge_num': 172.93, + 'node_labeled': False, + 'edge_labeled': False, + 'node_attr_dim': 15, + 'geometry': None, + 'edge_attr_dim': 0, + 'url': 'https://www.chrsmrrs.com/graphkerneldatasets/Synthie.zip', + 'domain': 'synthetic', + }, + 'TRIANGLES': { + 'database': 'tudataset', + 'reference': '[27]', + 'dataset_size': 45000, + 'class_number': 10, + 'task_type': 'classification', + 'ave_node_num': 20.85, + 'ave_edge_num': 32.74, + 'node_labeled': False, + 'edge_labeled': False, + 'node_attr_dim': 0, + 'geometry': None, + 'edge_attr_dim': 0, + 'url': 'https://www.chrsmrrs.com/graphkerneldatasets/TRIANGLES.zip', + 'domain': 'synthetic', + }, +} + + +DATASET_META = {**GREYC_META, **IAM_META, **TUDataset_META} + + +def list_of_databases(): + """List names of all databases. + + Returns + ------- + list + The list of all databases. + """ + return [i for i in DATABASES] + + +def list_of_datasets(): + """List names of all datasets. + + Returns + ------- + list + The list of all datasets. + """ + return [i for i in DATASET_META] \ No newline at end of file