Former-commit-id:masterf69cb061fa
[formerlyc3397fef19
] [formerly6a860e5720
[formerly000a545c3a
]] [formerlyff9c443db3
[formerly156df43e37
] [formerlycea004358e
[formerly13dc768026
]]] [formerly0e8c2b36d6
[formerlye06d6e4b1d
] [formerly916268b59a
[formerly94269e5935
]] [formerlye7f3cbe2bc
[formerly578e3b3407
] [formerlybdd544f42f
[formerly4a0f7ebacb
]]]] [formerly8d0312ea2a
[formerlydd6a809873
] [formerlyeb3fa974a5
[formerlyeb845a33a5
]] [formerly6abaa03fc0
[formerly7f91618baf
] [formerly7349184150
[formerly65db25658d
]]] [formerly3bddb52e03
[formerly6d45707f8b
] [formerlyeb1d413e48
[formerly3dff64fc9c
]] [formerly37ba0a93d9
[formerly68376d1136
] [formerly7337100c7e
[formerlya7d7191cd0
]]]]] [formerly5ed93d611f
[formerlyfccbcab1d6
] [formerlyb1e0c0a2fc
[formerly32f4b1fd86
]] [formerlyf83c346b28
[formerlyd8334572b9
] [formerly132996877b
[formerlyc4879bd9b6
]]] [formerly5250a29264
[formerly136ae3d312
] [formerly98915afd21
[formerly901c78eda0
]] [formerly1da527284f
[formerly5afe58ad88
] [formerly034d01c383
[formerly30d34d22f1
]]]] [formerly1f82e0423c
[formerly7baf32d730
] [formerly4fa02a3232
[formerly68453ba156
]] [formerlyf7a6064615
[formerly466cad0822
] [formerly8c6edce88a
[formerly5ba34e8055
]]] [formerly47c8407d98
[formerly6ffe487218
] [formerlyb29093f8c5
[formerly1d558ef564
]] [formerlye07914e9bc
[formerlye771b7661f
] [formerlyd0e83ee68d
[formerlyc247502053
]]]]]] Former-commit-id:64ce167b5a
[formerly272db3aac4
] [formerly8dbf9dfaf4
[formerlyfe817f3ae2
]] [formerly1e885a2687
[formerlyacef5b5524
] [formerly13c13bbd3f
[formerlyc145b1b38d
]]] [formerlyf96e25db93
[formerly63212c3151
] [formerly2b1ec4708a
[formerly2e68680bbb
]] [formerly4c9740e169
[formerly0342c2b566
] [formerlye176dfaa43
[formerlyda7a1ebf6a
]]]] [formerlyfdfdf497eb
[formerlyc640fd5e0d
] [formerly785858d194
[formerlyaabaed9b5c
]] [formerly0b22f22411
[formerlyc1121bd5cc
] [formerly6c25ea99c9
[formerlybeb2d79577
]]] [formerly801fb85133
[formerlyea56dfe63d
] [formerly06896e7d44
[formerly02afce99f8
]] [formerly29b354b7ea
[formerly2bf5cbc12e
] [formerlyd0e83ee68d
]]]] Former-commit-id:b6909ab7c0
[formerlydeb15a65c0
] [formerlyac1624e7bf
[formerly7b46ea57b1
]] [formerlyfbb66c7baf
[formerly56c7c97cd8
] [formerly64fa88c0b3
[formerly26cba61fc3
]]] [formerly9ac4a2597c
[formerlya2049b937e
] [formerlyd45c50064e
[formerly9ce9d8535c
]] [formerlyc1b31154a5
[formerly3f6855b738
] [formerlyc3c7581997
[formerlyb998f88a49
]]]] Former-commit-id:39d7e6e3c2
[formerlyec719a521e
] [formerlya725d64be6
[formerly37269dd3cd
]] [formerly161f51241f
[formerlyb899d38f90
] [formerlyf67dc6de31
[formerly1136390cd0
]]] Former-commit-id:82bc989202
[formerlya966eafa19
] [formerlya3ba1cf3d4
[formerly63d4c93d5d
]] Former-commit-id:fecc0f6568
[formerly4f124e3310
] Former-commit-id:9a7d39ff77
@@ -0,0 +1,14 @@ | |||||
#!/usr/bin/env python3 | |||||
import sys | |||||
import unittest | |||||
runner = unittest.TextTestRunner(verbosity=1) | |||||
tests = unittest.TestLoader().discover('./') | |||||
if not runner.run(tests).wasSuccessful(): | |||||
sys.exit(1) | |||||
#for each in ['data_processing', 'timeseries_processing', 'feature_analysis', 'detection_algorithm']: | |||||
# tests = unittest.TestLoader().discover(each) | |||||
# if not runner.run(tests).wasSuccessful(): | |||||
# sys.exit(1) |
@@ -9,14 +9,20 @@ from tods.common import FixedSplit | |||||
class FixedSplitDatasetSplitPrimitiveTestCase(unittest.TestCase): | class FixedSplitDatasetSplitPrimitiveTestCase(unittest.TestCase): | ||||
def test_produce_train_values(self): | |||||
dataset_doc_path = os.path.abspath(os.path.join(os.path.dirname(__file__), '..', 'tests', 'data', 'datasets', 'iris_dataset_1', 'datasetDoc.json')) | |||||
def _get_yahoo_dataset(self): | |||||
dataset_doc_path = os.path.abspath(os.path.join(os.path.dirname(__file__),'..', '..', '..', 'datasets', 'anomaly','yahoo_sub_5','TRAIN','dataset_TRAIN', 'datasetDoc.json')) | |||||
dataset = container.Dataset.load('file://{dataset_doc_path}'.format(dataset_doc_path=dataset_doc_path)) | dataset = container.Dataset.load('file://{dataset_doc_path}'.format(dataset_doc_path=dataset_doc_path)) | ||||
return dataset | |||||
def test_produce_train_values(self): | |||||
dataset = self._get_yahoo_dataset() | |||||
# We set semantic types like runtime would. | # We set semantic types like runtime would. | ||||
dataset.metadata = dataset.metadata.add_semantic_type(('learningData', metadata_base.ALL_ELEMENTS, 5), 'https://metadata.datadrivendiscovery.org/types/Target') | |||||
dataset.metadata = dataset.metadata.add_semantic_type(('learningData', metadata_base.ALL_ELEMENTS, 5), 'https://metadata.datadrivendiscovery.org/types/TrueTarget') | |||||
dataset.metadata = dataset.metadata.add_semantic_type(('learningData', metadata_base.ALL_ELEMENTS, 0), 'https://metadata.datadrivendiscovery.org/types/Index') | |||||
dataset.metadata = dataset.metadata.add_semantic_type(('learningData', metadata_base.ALL_ELEMENTS, 7), 'https://metadata.datadrivendiscovery.org/types/TrueTarget') | |||||
dataset.metadata = dataset.metadata.remove_semantic_type(('learningData', metadata_base.ALL_ELEMENTS, 5), 'https://metadata.datadrivendiscovery.org/types/Attribute') | dataset.metadata = dataset.metadata.remove_semantic_type(('learningData', metadata_base.ALL_ELEMENTS, 5), 'https://metadata.datadrivendiscovery.org/types/Attribute') | ||||
hyperparams_class = FixedSplit.FixedSplitDatasetSplitPrimitive.metadata.get_hyperparams() | hyperparams_class = FixedSplit.FixedSplitDatasetSplitPrimitive.metadata.get_hyperparams() | ||||
@@ -44,20 +50,18 @@ class FixedSplitDatasetSplitPrimitiveTestCase(unittest.TestCase): | |||||
for dataset in results: | for dataset in results: | ||||
self.assertEqual(len(dataset), 1) | self.assertEqual(len(dataset), 1) | ||||
self.assertEqual(results[0]['learningData'].shape[0], 147) | |||||
self.assertEqual(list(results[0]['learningData'].iloc[:, 0]), [str(i) for i in range(150) if i not in [9, 11, 13]]) | |||||
self.assertEqual(results[0]['learningData'].shape[0], 1257) | |||||
self.assertEqual(list(results[0]['learningData'].iloc[:, 0]), [str(i) for i in range(1260) if i not in [9, 11, 13]]) | |||||
def test_produce_score_values(self): | def test_produce_score_values(self): | ||||
dataset_doc_path = os.path.abspath(os.path.join(os.path.dirname(__file__), '..', 'tests', 'data', 'datasets', 'iris_dataset_1', 'datasetDoc.json')) | |||||
dataset = container.Dataset.load('file://{dataset_doc_path}'.format(dataset_doc_path=dataset_doc_path)) | |||||
dataset = self._get_yahoo_dataset() | |||||
# We set semantic types like runtime would. | # We set semantic types like runtime would. | ||||
dataset.metadata = dataset.metadata.add_semantic_type(('learningData', metadata_base.ALL_ELEMENTS, 5), 'https://metadata.datadrivendiscovery.org/types/Target') | |||||
dataset.metadata = dataset.metadata.add_semantic_type(('learningData', metadata_base.ALL_ELEMENTS, 5), 'https://metadata.datadrivendiscovery.org/types/TrueTarget') | |||||
dataset.metadata = dataset.metadata.add_semantic_type(('learningData', metadata_base.ALL_ELEMENTS, 0), 'https://metadata.datadrivendiscovery.org/types/Index') | |||||
dataset.metadata = dataset.metadata.add_semantic_type(('learningData', metadata_base.ALL_ELEMENTS, 7), 'https://metadata.datadrivendiscovery.org/types/TrueTarget') | |||||
dataset.metadata = dataset.metadata.remove_semantic_type(('learningData', metadata_base.ALL_ELEMENTS, 5), 'https://metadata.datadrivendiscovery.org/types/Attribute') | dataset.metadata = dataset.metadata.remove_semantic_type(('learningData', metadata_base.ALL_ELEMENTS, 5), 'https://metadata.datadrivendiscovery.org/types/Attribute') | ||||
hyperparams_class = fixed_split.FixedSplitDatasetSplitPrimitive.metadata.get_hyperparams() | |||||
hyperparams_class = FixedSplit.FixedSplitDatasetSplitPrimitive.metadata.get_hyperparams() | |||||
hyperparams = hyperparams_class.defaults().replace({ | hyperparams = hyperparams_class.defaults().replace({ | ||||
'primary_index_values': ['9', '11', '13'], | 'primary_index_values': ['9', '11', '13'], | ||||
@@ -67,7 +71,7 @@ class FixedSplitDatasetSplitPrimitiveTestCase(unittest.TestCase): | |||||
# a pickle because runtime populates this primitive as a list from a split file. | # a pickle because runtime populates this primitive as a list from a split file. | ||||
self.assertEqual(hyperparams.values_to_json_structure(), {'primary_index_values': ['9', '11', '13'], 'row_indices': [], 'delete_recursive': False}) | self.assertEqual(hyperparams.values_to_json_structure(), {'primary_index_values': ['9', '11', '13'], 'row_indices': [], 'delete_recursive': False}) | ||||
primitive = fixed_split.FixedSplitDatasetSplitPrimitive(hyperparams=hyperparams) | |||||
primitive = FixedSplit.FixedSplitDatasetSplitPrimitive(hyperparams=hyperparams) | |||||
primitive.set_training_data(dataset=dataset) | primitive.set_training_data(dataset=dataset) | ||||
primitive.fit() | primitive.fit() | ||||
@@ -83,18 +87,15 @@ class FixedSplitDatasetSplitPrimitiveTestCase(unittest.TestCase): | |||||
self.assertEqual(list(results[0]['learningData'].iloc[:, 0]), [str(i) for i in range(150) if i in [9, 11, 13]]) | self.assertEqual(list(results[0]['learningData'].iloc[:, 0]), [str(i) for i in range(150) if i in [9, 11, 13]]) | ||||
def test_produce_train_indices(self): | def test_produce_train_indices(self): | ||||
dataset_doc_path = os.path.abspath(os.path.join(os.path.dirname(__file__), '..', 'tests', 'data', 'datasets', 'iris_dataset_1', 'datasetDoc.json')) | |||||
dataset = container.Dataset.load('file://{dataset_doc_path}'.format(dataset_doc_path=dataset_doc_path)) | |||||
dataset = self._get_yahoo_dataset() | |||||
# We set semantic types like runtime would. | # We set semantic types like runtime would. | ||||
dataset.metadata = dataset.metadata.add_semantic_type(('learningData', metadata_base.ALL_ELEMENTS, 5), 'https://metadata.datadrivendiscovery.org/types/Target') | |||||
dataset.metadata = dataset.metadata.add_semantic_type(('learningData', metadata_base.ALL_ELEMENTS, 5), 'https://metadata.datadrivendiscovery.org/types/TrueTarget') | |||||
dataset.metadata = dataset.metadata.add_semantic_type(('learningData', metadata_base.ALL_ELEMENTS, 0), 'https://metadata.datadrivendiscovery.org/types/Index') | |||||
dataset.metadata = dataset.metadata.add_semantic_type(('learningData', metadata_base.ALL_ELEMENTS, 7), 'https://metadata.datadrivendiscovery.org/types/TrueTarget') | |||||
dataset.metadata = dataset.metadata.remove_semantic_type(('learningData', metadata_base.ALL_ELEMENTS, 5), 'https://metadata.datadrivendiscovery.org/types/Attribute') | dataset.metadata = dataset.metadata.remove_semantic_type(('learningData', metadata_base.ALL_ELEMENTS, 5), 'https://metadata.datadrivendiscovery.org/types/Attribute') | ||||
hyperparams_class = FixedSplit.FixedSplitDatasetSplitPrimitive.metadata.get_hyperparams() | |||||
hyperparams_class = fixed_split.FixedSplitDatasetSplitPrimitive.metadata.get_hyperparams() | |||||
primitive = fixed_split.FixedSplitDatasetSplitPrimitive(hyperparams=hyperparams_class.defaults().replace({ | |||||
primitive = FixedSplit.FixedSplitDatasetSplitPrimitive(hyperparams=hyperparams_class.defaults().replace({ | |||||
'row_indices': [9, 11, 13], | 'row_indices': [9, 11, 13], | ||||
})) | })) | ||||
@@ -111,22 +112,20 @@ class FixedSplitDatasetSplitPrimitiveTestCase(unittest.TestCase): | |||||
for dataset in results: | for dataset in results: | ||||
self.assertEqual(len(dataset), 1) | self.assertEqual(len(dataset), 1) | ||||
self.assertEqual(results[0]['learningData'].shape[0], 147) | |||||
self.assertEqual(list(results[0]['learningData'].iloc[:, 0]), [str(i) for i in range(150) if i not in [9, 11, 13]]) | |||||
self.assertEqual(results[0]['learningData'].shape[0], 1257) | |||||
self.assertEqual(list(results[0]['learningData'].iloc[:, 0]), [str(i) for i in range(1260) if i not in [9, 11, 13]]) | |||||
def test_produce_score_indices(self): | def test_produce_score_indices(self): | ||||
dataset_doc_path = os.path.abspath(os.path.join(os.path.dirname(__file__), '..', 'tests', 'data', 'datasets', 'iris_dataset_1', 'datasetDoc.json')) | |||||
dataset = container.Dataset.load('file://{dataset_doc_path}'.format(dataset_doc_path=dataset_doc_path)) | |||||
dataset = self._get_yahoo_dataset() | |||||
# We set semantic types like runtime would. | # We set semantic types like runtime would. | ||||
dataset.metadata = dataset.metadata.add_semantic_type(('learningData', metadata_base.ALL_ELEMENTS, 5), 'https://metadata.datadrivendiscovery.org/types/Target') | |||||
dataset.metadata = dataset.metadata.add_semantic_type(('learningData', metadata_base.ALL_ELEMENTS, 5), 'https://metadata.datadrivendiscovery.org/types/TrueTarget') | |||||
dataset.metadata = dataset.metadata.add_semantic_type(('learningData', metadata_base.ALL_ELEMENTS, 0), 'https://metadata.datadrivendiscovery.org/types/Index') | |||||
dataset.metadata = dataset.metadata.add_semantic_type(('learningData', metadata_base.ALL_ELEMENTS, 7), 'https://metadata.datadrivendiscovery.org/types/TrueTarget') | |||||
dataset.metadata = dataset.metadata.remove_semantic_type(('learningData', metadata_base.ALL_ELEMENTS, 5), 'https://metadata.datadrivendiscovery.org/types/Attribute') | dataset.metadata = dataset.metadata.remove_semantic_type(('learningData', metadata_base.ALL_ELEMENTS, 5), 'https://metadata.datadrivendiscovery.org/types/Attribute') | ||||
hyperparams_class = fixed_split.FixedSplitDatasetSplitPrimitive.metadata.get_hyperparams() | |||||
hyperparams_class = FixedSplit.FixedSplitDatasetSplitPrimitive.metadata.get_hyperparams() | |||||
primitive = fixed_split.FixedSplitDatasetSplitPrimitive(hyperparams=hyperparams_class.defaults().replace({ | |||||
primitive = FixedSplit.FixedSplitDatasetSplitPrimitive(hyperparams=hyperparams_class.defaults().replace({ | |||||
'row_indices': [9, 11, 13], | 'row_indices': [9, 11, 13], | ||||
})) | })) | ||||
@@ -5,26 +5,31 @@ import unittest | |||||
from d3m import container | from d3m import container | ||||
from d3m.metadata import base as metadata_base | from d3m.metadata import base as metadata_base | ||||
from common_primitives import kfold_split | |||||
from tods.common import KFoldSplit | |||||
class KFoldDatasetSplitPrimitiveTestCase(unittest.TestCase): | class KFoldDatasetSplitPrimitiveTestCase(unittest.TestCase): | ||||
def test_produce_train(self): | |||||
dataset_doc_path = os.path.abspath(os.path.join(os.path.dirname(__file__), '..', 'tests', 'data', 'datasets', 'database_dataset_1', 'datasetDoc.json')) | |||||
def _get_yahoo_dataset(self): | |||||
dataset_doc_path = os.path.abspath(os.path.join(os.path.dirname(__file__),'..', '..', '..', 'datasets', 'anomaly','yahoo_sub_5','TRAIN','dataset_TRAIN', 'datasetDoc.json')) | |||||
dataset = container.Dataset.load('file://{dataset_doc_path}'.format(dataset_doc_path=dataset_doc_path)) | dataset = container.Dataset.load('file://{dataset_doc_path}'.format(dataset_doc_path=dataset_doc_path)) | ||||
return dataset | |||||
def test_produce_train(self): | |||||
dataset = self._get_yahoo_dataset() | |||||
# We set semantic types like runtime would. | # We set semantic types like runtime would. | ||||
dataset.metadata = dataset.metadata.add_semantic_type(('learningData', metadata_base.ALL_ELEMENTS, 4), 'https://metadata.datadrivendiscovery.org/types/Target') | |||||
dataset.metadata = dataset.metadata.add_semantic_type(('learningData', metadata_base.ALL_ELEMENTS, 4), 'https://metadata.datadrivendiscovery.org/types/TrueTarget') | |||||
dataset.metadata = dataset.metadata.remove_semantic_type(('learningData', metadata_base.ALL_ELEMENTS, 4), 'https://metadata.datadrivendiscovery.org/types/Attribute') | |||||
dataset.metadata = dataset.metadata.add_semantic_type(('learningData', metadata_base.ALL_ELEMENTS, 0), 'https://metadata.datadrivendiscovery.org/types/Index') | |||||
dataset.metadata = dataset.metadata.add_semantic_type(('learningData', metadata_base.ALL_ELEMENTS, 7), 'https://metadata.datadrivendiscovery.org/types/TrueTarget') | |||||
dataset.metadata = dataset.metadata.remove_semantic_type(('learningData', metadata_base.ALL_ELEMENTS, 5), 'https://metadata.datadrivendiscovery.org/types/Attribute') | |||||
hyperparams_class = kfold_split.KFoldDatasetSplitPrimitive.metadata.get_hyperparams() | |||||
hyperparams_class = KFoldSplit.KFoldDatasetSplitPrimitive.metadata.get_hyperparams() | |||||
primitive = kfold_split.KFoldDatasetSplitPrimitive(hyperparams=hyperparams_class.defaults().replace({ | |||||
primitive = KFoldSplit.KFoldDatasetSplitPrimitive(hyperparams=hyperparams_class.defaults().replace({ | |||||
'number_of_folds': 10, | 'number_of_folds': 10, | ||||
'shuffle': True, | 'shuffle': True, | ||||
'delete_recursive': True, | |||||
'delete_recursive': False, | |||||
})) | })) | ||||
primitive.set_training_data(dataset=dataset) | primitive.set_training_data(dataset=dataset) | ||||
@@ -33,68 +38,45 @@ class KFoldDatasetSplitPrimitiveTestCase(unittest.TestCase): | |||||
# To test that pickling works. | # To test that pickling works. | ||||
pickle.dumps(primitive) | pickle.dumps(primitive) | ||||
results = primitive.produce(inputs=container.List([0, 1], generate_metadata=True)).value | |||||
results = primitive.produce(inputs=container.List([0], generate_metadata=True)).value | |||||
self.assertEqual(len(results), 2) | |||||
self.assertEqual(len(results), 1) | |||||
for dataset in results: | for dataset in results: | ||||
self.assertEqual(len(dataset), 4) | |||||
self.assertEqual(results[0]['codes'].shape[0], 3) | |||||
self.assertEqual(results[1]['codes'].shape[0], 3) | |||||
self.assertEqual(set(results[0]['codes'].iloc[:, 0]), {'AAA', 'BBB', 'CCC'}) | |||||
self.assertEqual(len(results[0]['learningData'].iloc[:, 0]), 40) | |||||
self.assertEqual(set(results[0]['learningData'].iloc[:, 1]), {'AAA', 'BBB', 'CCC'}) | |||||
self.assertEqual(set(results[0]['learningData'].iloc[:, 2]), {'aaa', 'bbb', 'ccc', 'ddd', 'eee'}) | |||||
self.assertEqual(set(results[0]['learningData'].iloc[:, 3]), {'1990', '2000', '2010'}) | |||||
self.assertEqual(len(dataset), 1) | |||||
self.assertEqual(set(results[1]['codes'].iloc[:, 0]), {'AAA', 'BBB', 'CCC'}) | |||||
self.assertEqual(len(results[1]['learningData'].iloc[:, 0]), 40) | |||||
self.assertEqual(set(results[1]['learningData'].iloc[:, 1]), {'AAA', 'BBB', 'CCC'}) | |||||
self.assertEqual(set(results[1]['learningData'].iloc[:, 2]), {'aaa', 'bbb', 'ccc', 'ddd', 'eee'}) | |||||
self.assertEqual(set(results[1]['learningData'].iloc[:, 3]), {'1990', '2000', '2010'}) | |||||
self.assertEqual(results[0]['learningData'].shape[0], 1134) | |||||
def test_produce_score(self): | def test_produce_score(self): | ||||
dataset_doc_path = os.path.abspath(os.path.join(os.path.dirname(__file__), '..', 'tests', 'data', 'datasets', 'database_dataset_1', 'datasetDoc.json')) | |||||
dataset = container.Dataset.load('file://{dataset_doc_path}'.format(dataset_doc_path=dataset_doc_path)) | |||||
dataset = self._get_yahoo_dataset() | |||||
# We set semantic types like runtime would. | # We set semantic types like runtime would. | ||||
dataset.metadata = dataset.metadata.add_semantic_type(('learningData', metadata_base.ALL_ELEMENTS, 4), 'https://metadata.datadrivendiscovery.org/types/Target') | |||||
dataset.metadata = dataset.metadata.add_semantic_type(('learningData', metadata_base.ALL_ELEMENTS, 4), 'https://metadata.datadrivendiscovery.org/types/TrueTarget') | |||||
dataset.metadata = dataset.metadata.remove_semantic_type(('learningData', metadata_base.ALL_ELEMENTS, 4), 'https://metadata.datadrivendiscovery.org/types/Attribute') | |||||
dataset.metadata = dataset.metadata.add_semantic_type(('learningData', metadata_base.ALL_ELEMENTS, 0), 'https://metadata.datadrivendiscovery.org/types/Index') | |||||
dataset.metadata = dataset.metadata.add_semantic_type(('learningData', metadata_base.ALL_ELEMENTS, 7), 'https://metadata.datadrivendiscovery.org/types/TrueTarget') | |||||
dataset.metadata = dataset.metadata.remove_semantic_type(('learningData', metadata_base.ALL_ELEMENTS, 5), 'https://metadata.datadrivendiscovery.org/types/Attribute') | |||||
hyperparams_class = kfold_split.KFoldDatasetSplitPrimitive.metadata.get_hyperparams() | |||||
hyperparams_class = KFoldSplit.KFoldDatasetSplitPrimitive.metadata.get_hyperparams() | |||||
primitive = kfold_split.KFoldDatasetSplitPrimitive(hyperparams=hyperparams_class.defaults().replace({ | |||||
primitive = KFoldSplit.KFoldDatasetSplitPrimitive(hyperparams=hyperparams_class.defaults().replace({ | |||||
'number_of_folds': 10, | 'number_of_folds': 10, | ||||
'shuffle': True, | 'shuffle': True, | ||||
'delete_recursive': True, | |||||
'delete_recursive': False, | |||||
})) | })) | ||||
primitive.set_training_data(dataset=dataset) | primitive.set_training_data(dataset=dataset) | ||||
primitive.fit() | primitive.fit() | ||||
results = primitive.produce_score_data(inputs=container.List([0, 1], generate_metadata=True)).value | |||||
results = primitive.produce_score_data(inputs=container.List([0], generate_metadata=True)).value | |||||
self.assertEqual(len(results), 2) | |||||
self.assertEqual(len(results), 1) | |||||
for dataset in results: | for dataset in results: | ||||
self.assertEqual(len(dataset), 4) | |||||
self.assertEqual(set(results[0]['codes'].iloc[:, 0]), {'AAA', 'BBB'}) | |||||
self.assertEqual(set(results[0]['learningData'].iloc[:, 0]), {'5', '11', '28', '31', '38'}) | |||||
self.assertEqual(set(results[0]['learningData'].iloc[:, 1]), {'AAA', 'BBB'}) | |||||
self.assertEqual(set(results[0]['learningData'].iloc[:, 2]), {'aaa', 'bbb', 'ddd', 'eee'}) | |||||
self.assertEqual(set(results[0]['learningData'].iloc[:, 3]), {'1990', '2000'}) | |||||
self.assertEqual(len(dataset), 1) | |||||
self.assertEqual(set(results[1]['codes'].iloc[:, 0]), {'BBB', 'CCC'}) | |||||
self.assertEqual(set(results[1]['learningData'].iloc[:, 0]), {'12', '26', '29', '32', '39'}) | |||||
self.assertEqual(set(results[1]['learningData'].iloc[:, 1]), {'BBB', 'CCC'}) | |||||
self.assertEqual(set(results[1]['learningData'].iloc[:, 2]), {'bbb', 'ccc', 'ddd', 'eee'}) | |||||
self.assertEqual(set(results[1]['learningData'].iloc[:, 3]), {'1990', '2000', '2010'}) | |||||
self.assertEqual(results[0]['learningData'].shape[0], 126) | |||||
if __name__ == '__main__': | if __name__ == '__main__': | ||||
unittest.main() | unittest.main() | ||||
@@ -5,24 +5,31 @@ import unittest | |||||
from d3m import container | from d3m import container | ||||
from d3m.metadata import base as metadata_base | from d3m.metadata import base as metadata_base | ||||
from common_primitives import kfold_split_timeseries | |||||
from tods.common import KFoldSplitTimeseries | |||||
class KFoldTimeSeriesSplitPrimitiveTestCase(unittest.TestCase): | class KFoldTimeSeriesSplitPrimitiveTestCase(unittest.TestCase): | ||||
def test_produce_train_timeseries_1(self): | |||||
dataset_doc_path = os.path.abspath(os.path.join(os.path.dirname(__file__), '..', 'tests', 'data', 'datasets', 'timeseries_dataset_1', 'datasetDoc.json')) | |||||
def _get_yahoo_dataset(self): | |||||
dataset_doc_path = os.path.abspath(os.path.join(os.path.dirname(__file__),'..', '..', '..', 'datasets', 'anomaly','yahoo_sub_5','TRAIN','dataset_TRAIN', 'datasetDoc.json')) | |||||
dataset = container.Dataset.load('file://{dataset_doc_path}'.format(dataset_doc_path=dataset_doc_path)) | dataset = container.Dataset.load('file://{dataset_doc_path}'.format(dataset_doc_path=dataset_doc_path)) | ||||
return dataset | |||||
def test_produce_train_timeseries_1(self): | |||||
dataset = self._get_yahoo_dataset() | |||||
# We set semantic types like runtime would. | # We set semantic types like runtime would. | ||||
dataset.metadata = dataset.metadata.add_semantic_type(('learningData', metadata_base.ALL_ELEMENTS, 4), 'https://metadata.datadrivendiscovery.org/types/Target') | |||||
dataset.metadata = dataset.metadata.add_semantic_type(('learningData', metadata_base.ALL_ELEMENTS, 4), 'https://metadata.datadrivendiscovery.org/types/TrueTarget') | |||||
dataset.metadata = dataset.metadata.remove_semantic_type(('learningData', metadata_base.ALL_ELEMENTS, 4), 'https://metadata.datadrivendiscovery.org/types/Attribute') | |||||
dataset.metadata = dataset.metadata.add_semantic_type(('learningData', metadata_base.ALL_ELEMENTS, 0), 'https://metadata.datadrivendiscovery.org/types/Index') | |||||
dataset.metadata = dataset.metadata.add_semantic_type(('learningData', metadata_base.ALL_ELEMENTS, 1), 'https://metadata.datadrivendiscovery.org/types/Time') | |||||
dataset.metadata = dataset.metadata.add_semantic_type(('learningData', metadata_base.ALL_ELEMENTS, 7), 'https://metadata.datadrivendiscovery.org/types/TrueTarget') | |||||
dataset.metadata = dataset.metadata.remove_semantic_type(('learningData', metadata_base.ALL_ELEMENTS, 5), 'https://metadata.datadrivendiscovery.org/types/Attribute') | |||||
hyperparams_class = kfold_split_timeseries.KFoldTimeSeriesSplitPrimitive.metadata.get_hyperparams() | |||||
hyperparams_class = KFoldSplitTimeseries.KFoldTimeSeriesSplitPrimitive.metadata.get_hyperparams() | |||||
folds = 5 | folds = 5 | ||||
primitive = kfold_split_timeseries.KFoldTimeSeriesSplitPrimitive(hyperparams=hyperparams_class.defaults().replace({ | |||||
primitive = KFoldSplitTimeseries.KFoldTimeSeriesSplitPrimitive(hyperparams=hyperparams_class.defaults().replace({ | |||||
'number_of_folds': folds, | 'number_of_folds': folds, | ||||
'number_of_window_folds': 1, | 'number_of_window_folds': 1, | ||||
})) | })) | ||||
@@ -33,35 +40,29 @@ class KFoldTimeSeriesSplitPrimitiveTestCase(unittest.TestCase): | |||||
# To test that pickling works. | # To test that pickling works. | ||||
pickle.dumps(primitive) | pickle.dumps(primitive) | ||||
results = primitive.produce(inputs=container.List([0, 1], generate_metadata=True)).value | |||||
results = primitive.produce(inputs=container.List([0], generate_metadata=True)).value | |||||
self.assertEqual(len(results), 2) | |||||
self.assertEqual(len(results), 1) | |||||
for dataset in results: | for dataset in results: | ||||
self.assertEqual(len(dataset), 1) | self.assertEqual(len(dataset), 1) | ||||
self.assertEqual(len(results[0]['learningData'].iloc[:, 0]), 8) | |||||
self.assertEqual(set(results[0]['learningData'].iloc[:, 3]), {'2013-11-05', '2013-11-06', '2013-11-07', '2013-11-08', '2013-11-11', | |||||
'2013-11-12', '2013-11-13', '2013-11-14'}) | |||||
self.assertEqual(len(results[1]['learningData'].iloc[:, 0]), 8) | |||||
self.assertEqual(set(results[1]['learningData'].iloc[:, 3]), {'2013-11-13', '2013-11-14', '2013-11-15', '2013-11-18', '2013-11-19', | |||||
'2013-11-20', '2013-11-21', '2013-11-22'}) | |||||
self.assertEqual(len(results[0]['learningData'].iloc[:, 0]), 210) | |||||
#TODO: correct the semantic type and validate unix timestamp | |||||
def test_produce_score_timeseries_1(self): | def test_produce_score_timeseries_1(self): | ||||
dataset_doc_path = os.path.abspath(os.path.join(os.path.dirname(__file__), '..', 'tests', 'data', 'datasets', 'timeseries_dataset_1', 'datasetDoc.json')) | |||||
dataset = container.Dataset.load('file://{dataset_doc_path}'.format(dataset_doc_path=dataset_doc_path)) | |||||
dataset = self._get_yahoo_dataset() | |||||
# We set semantic types like runtime would. | # We set semantic types like runtime would. | ||||
dataset.metadata = dataset.metadata.add_semantic_type(('learningData', metadata_base.ALL_ELEMENTS, 4), 'https://metadata.datadrivendiscovery.org/types/Target') | |||||
dataset.metadata = dataset.metadata.add_semantic_type(('learningData', metadata_base.ALL_ELEMENTS, 4), 'https://metadata.datadrivendiscovery.org/types/TrueTarget') | |||||
dataset.metadata = dataset.metadata.remove_semantic_type(('learningData', metadata_base.ALL_ELEMENTS, 4), 'https://metadata.datadrivendiscovery.org/types/Attribute') | |||||
dataset.metadata = dataset.metadata.add_semantic_type(('learningData', metadata_base.ALL_ELEMENTS, 0), 'https://metadata.datadrivendiscovery.org/types/Index') | |||||
dataset.metadata = dataset.metadata.add_semantic_type(('learningData', metadata_base.ALL_ELEMENTS, 1), 'https://metadata.datadrivendiscovery.org/types/Time') | |||||
dataset.metadata = dataset.metadata.add_semantic_type(('learningData', metadata_base.ALL_ELEMENTS, 7), 'https://metadata.datadrivendiscovery.org/types/TrueTarget') | |||||
dataset.metadata = dataset.metadata.remove_semantic_type(('learningData', metadata_base.ALL_ELEMENTS, 5), 'https://metadata.datadrivendiscovery.org/types/Attribute') | |||||
hyperparams_class = kfold_split_timeseries.KFoldTimeSeriesSplitPrimitive.metadata.get_hyperparams() | |||||
hyperparams_class = KFoldSplitTimeseries.KFoldTimeSeriesSplitPrimitive.metadata.get_hyperparams() | |||||
folds = 5 | folds = 5 | ||||
primitive = kfold_split_timeseries.KFoldTimeSeriesSplitPrimitive(hyperparams=hyperparams_class.defaults().replace({ | |||||
primitive = KFoldSplitTimeseries.KFoldTimeSeriesSplitPrimitive(hyperparams=hyperparams_class.defaults().replace({ | |||||
'number_of_folds': folds, | 'number_of_folds': folds, | ||||
'number_of_window_folds': 1, | 'number_of_window_folds': 1, | ||||
})) | })) | ||||
@@ -69,38 +70,31 @@ class KFoldTimeSeriesSplitPrimitiveTestCase(unittest.TestCase): | |||||
primitive.set_training_data(dataset=dataset) | primitive.set_training_data(dataset=dataset) | ||||
primitive.fit() | primitive.fit() | ||||
results = primitive.produce_score_data(inputs=container.List([0, 1], generate_metadata=True)).value | |||||
results = primitive.produce_score_data(inputs=container.List([0], generate_metadata=True)).value | |||||
self.assertEqual(len(results), 2) | |||||
self.assertEqual(len(results), 1) | |||||
for dataset in results: | for dataset in results: | ||||
self.assertEqual(len(dataset), 1) | self.assertEqual(len(dataset), 1) | ||||
self.assertEqual(len(results[0]['learningData'].iloc[:, 0]), 6) | |||||
self.assertEqual(set(results[0]['learningData'].iloc[:, 3]), {'2013-11-15', '2013-11-18', '2013-11-19', | |||||
'2013-11-20', '2013-11-21', '2013-11-22'}) | |||||
self.assertEqual(len(results[1]['learningData'].iloc[:, 0]), 6) | |||||
self.assertEqual(set(results[1]['learningData'].iloc[:, 3]), {'2013-11-25', '2013-11-26', '2013-11-27', | |||||
'2013-11-29', '2013-12-02', '2013-12-03'}) | |||||
self.assertEqual(len(results[0]['learningData'].iloc[:, 0]), 210) | |||||
def test_produce_train(self): | def test_produce_train(self): | ||||
dataset_doc_path = os.path.abspath(os.path.join(os.path.dirname(__file__), '..', 'tests', 'data', 'datasets', 'database_dataset_1', 'datasetDoc.json')) | |||||
dataset = container.Dataset.load('file://{dataset_doc_path}'.format(dataset_doc_path=dataset_doc_path)) | |||||
dataset = self._get_yahoo_dataset() | |||||
# We set semantic types like runtime would. | # We set semantic types like runtime would. | ||||
dataset.metadata = dataset.metadata.add_semantic_type(('learningData', metadata_base.ALL_ELEMENTS, 4), 'https://metadata.datadrivendiscovery.org/types/Target') | |||||
dataset.metadata = dataset.metadata.add_semantic_type(('learningData', metadata_base.ALL_ELEMENTS, 4), 'https://metadata.datadrivendiscovery.org/types/TrueTarget') | |||||
dataset.metadata = dataset.metadata.remove_semantic_type(('learningData', metadata_base.ALL_ELEMENTS, 4), 'https://metadata.datadrivendiscovery.org/types/Attribute') | |||||
dataset.metadata = dataset.metadata.add_semantic_type(('learningData', metadata_base.ALL_ELEMENTS, 0), 'https://metadata.datadrivendiscovery.org/types/Index') | |||||
dataset.metadata = dataset.metadata.add_semantic_type(('learningData', metadata_base.ALL_ELEMENTS, 1), 'https://metadata.datadrivendiscovery.org/types/Time') | |||||
dataset.metadata = dataset.metadata.add_semantic_type(('learningData', metadata_base.ALL_ELEMENTS, 7), 'https://metadata.datadrivendiscovery.org/types/TrueTarget') | |||||
dataset.metadata = dataset.metadata.remove_semantic_type(('learningData', metadata_base.ALL_ELEMENTS, 5), 'https://metadata.datadrivendiscovery.org/types/Attribute') | |||||
# We fake that the dataset is time-series. | # We fake that the dataset is time-series. | ||||
dataset.metadata = dataset.metadata.add_semantic_type(('learningData', metadata_base.ALL_ELEMENTS, 3), 'https://metadata.datadrivendiscovery.org/types/Time') | |||||
#dataset.metadata = dataset.metadata.add_semantic_type(('learningData', metadata_base.ALL_ELEMENTS, 1), 'https://metadata.datadrivendiscovery.org/types/Time') | |||||
hyperparams_class = kfold_split_timeseries.KFoldTimeSeriesSplitPrimitive.metadata.get_hyperparams() | |||||
hyperparams_class = KFoldSplitTimeseries.KFoldTimeSeriesSplitPrimitive.metadata.get_hyperparams() | |||||
folds = 5 | folds = 5 | ||||
primitive = kfold_split_timeseries.KFoldTimeSeriesSplitPrimitive(hyperparams=hyperparams_class.defaults().replace({ | |||||
primitive = KFoldSplitTimeseries.KFoldTimeSeriesSplitPrimitive(hyperparams=hyperparams_class.defaults().replace({ | |||||
'number_of_folds': folds, | 'number_of_folds': folds, | ||||
'number_of_window_folds': 1, | 'number_of_window_folds': 1, | ||||
})) | })) | ||||
@@ -111,45 +105,30 @@ class KFoldTimeSeriesSplitPrimitiveTestCase(unittest.TestCase): | |||||
# To test that pickling works. | # To test that pickling works. | ||||
pickle.dumps(primitive) | pickle.dumps(primitive) | ||||
results = primitive.produce(inputs=container.List([0, 1], generate_metadata=True)).value | |||||
results = primitive.produce(inputs=container.List([0], generate_metadata=True)).value | |||||
self.assertEqual(len(results), 2) | |||||
self.assertEqual(len(results), 1) | |||||
for dataset in results: | for dataset in results: | ||||
self.assertEqual(len(dataset), 4) | |||||
self.assertEqual(len(dataset), 1) | |||||
self.assertEqual(results[0]['codes'].shape[0], 3) | |||||
self.assertEqual(results[1]['codes'].shape[0], 3) | |||||
self.assertEqual(set(results[0]['codes'].iloc[:, 0]), {'AAA', 'BBB', 'CCC'}) | |||||
self.assertEqual(len(results[0]['learningData'].iloc[:, 0]), 9) | |||||
self.assertEqual(set(results[0]['learningData'].iloc[:, 1]), {'AAA', 'BBB', 'CCC'}) | |||||
self.assertEqual(set(results[0]['learningData'].iloc[:, 2]), {'bbb', 'ccc', 'ddd'}) | |||||
self.assertEqual(set(results[0]['learningData'].iloc[:, 3]), {'1990'}) | |||||
self.assertEqual(set(results[1]['codes'].iloc[:, 0]), {'AAA', 'BBB', 'CCC'}) | |||||
self.assertEqual(len(results[1]['learningData'].iloc[:, 0]), 9) | |||||
self.assertEqual(set(results[1]['learningData'].iloc[:, 1]), {'AAA', 'BBB', 'CCC'}) | |||||
self.assertEqual(set(results[1]['learningData'].iloc[:, 2]), {'aaa', 'bbb', 'ddd', 'eee'}) | |||||
self.assertEqual(set(results[1]['learningData'].iloc[:, 3]), {'1990', '2000'}) | |||||
self.assertEqual(results[0]['learningData'].shape[0], 210) | |||||
#TODO: correct the semantic type and validate unix timestamp | |||||
def test_produce_score(self): | def test_produce_score(self): | ||||
dataset_doc_path = os.path.abspath(os.path.join(os.path.dirname(__file__), '..', 'tests', 'data', 'datasets', 'database_dataset_1', 'datasetDoc.json')) | |||||
dataset = container.Dataset.load('file://{dataset_doc_path}'.format(dataset_doc_path=dataset_doc_path)) | |||||
dataset = self._get_yahoo_dataset() | |||||
# We set semantic types like runtime would. | # We set semantic types like runtime would. | ||||
dataset.metadata = dataset.metadata.add_semantic_type(('learningData', metadata_base.ALL_ELEMENTS, 4), 'https://metadata.datadrivendiscovery.org/types/Target') | |||||
dataset.metadata = dataset.metadata.add_semantic_type(('learningData', metadata_base.ALL_ELEMENTS, 4), 'https://metadata.datadrivendiscovery.org/types/TrueTarget') | |||||
dataset.metadata = dataset.metadata.remove_semantic_type(('learningData', metadata_base.ALL_ELEMENTS, 4), 'https://metadata.datadrivendiscovery.org/types/Attribute') | |||||
dataset.metadata = dataset.metadata.add_semantic_type(('learningData', metadata_base.ALL_ELEMENTS, 0), 'https://metadata.datadrivendiscovery.org/types/Index') | |||||
dataset.metadata = dataset.metadata.add_semantic_type(('learningData', metadata_base.ALL_ELEMENTS, 1), 'https://metadata.datadrivendiscovery.org/types/Time') | |||||
dataset.metadata = dataset.metadata.add_semantic_type(('learningData', metadata_base.ALL_ELEMENTS, 7), 'https://metadata.datadrivendiscovery.org/types/TrueTarget') | |||||
dataset.metadata = dataset.metadata.remove_semantic_type(('learningData', metadata_base.ALL_ELEMENTS, 5), 'https://metadata.datadrivendiscovery.org/types/Attribute') | |||||
# We fake that the dataset is time-series. | |||||
dataset.metadata = dataset.metadata.add_semantic_type(('learningData', metadata_base.ALL_ELEMENTS, 3), 'https://metadata.datadrivendiscovery.org/types/Time') | |||||
hyperparams_class = kfold_split_timeseries.KFoldTimeSeriesSplitPrimitive.metadata.get_hyperparams() | |||||
hyperparams_class = KFoldSplitTimeseries.KFoldTimeSeriesSplitPrimitive.metadata.get_hyperparams() | |||||
folds = 5 | folds = 5 | ||||
primitive = kfold_split_timeseries.KFoldTimeSeriesSplitPrimitive(hyperparams=hyperparams_class.defaults().replace({ | |||||
primitive = KFoldSplitTimeseries.KFoldTimeSeriesSplitPrimitive(hyperparams=hyperparams_class.defaults().replace({ | |||||
'number_of_folds': folds, | 'number_of_folds': folds, | ||||
'number_of_window_folds': 1, | 'number_of_window_folds': 1, | ||||
})) | })) | ||||
@@ -162,37 +141,24 @@ class KFoldTimeSeriesSplitPrimitiveTestCase(unittest.TestCase): | |||||
self.assertEqual(len(results), 2) | self.assertEqual(len(results), 2) | ||||
for dataset in results: | for dataset in results: | ||||
self.assertEqual(len(dataset), 4) | |||||
self.assertEqual(results[0]['codes'].shape[0], 3) | |||||
self.assertEqual(results[1]['codes'].shape[0], 3) | |||||
self.assertEqual(len(dataset), 1) | |||||
self.assertEqual(set(results[0]['codes'].iloc[:, 0]), {'AAA', 'BBB', 'CCC'}) | |||||
self.assertEqual(set(results[0]['learningData'].iloc[:, 0]), {'2', '3', '32', '33', '37', '38', '39'}) | |||||
self.assertEqual(set(results[0]['learningData'].iloc[:, 1]), {'AAA', 'BBB', 'CCC'}) | |||||
self.assertEqual(set(results[0]['learningData'].iloc[:, 2]), {'aaa', 'ddd', 'eee'}) | |||||
self.assertEqual(set(results[0]['learningData'].iloc[:, 3]), {'1990', '2000'}) | |||||
self.assertEqual(results[0]['learningData'].shape[0], 210) | |||||
self.assertEqual(set(results[1]['codes'].iloc[:, 0]), {'AAA', 'BBB', 'CCC'}) | |||||
self.assertEqual(set(results[1]['learningData'].iloc[:, 0]), {'22', '23', '24', '31', '40', '41', '42'}) | |||||
self.assertEqual(set(results[1]['learningData'].iloc[:, 1]), {'AAA', 'BBB', 'CCC'}) | |||||
self.assertEqual(set(results[1]['learningData'].iloc[:, 2]), {'ccc', 'ddd', 'eee'}) | |||||
self.assertEqual(set(results[1]['learningData'].iloc[:, 3]), {'2000'}) | |||||
def test_unsorted_datetimes_timeseries_4(self): | def test_unsorted_datetimes_timeseries_4(self): | ||||
dataset_doc_path = os.path.abspath(os.path.join(os.path.dirname(__file__), '..', 'tests', 'data', 'datasets', 'timeseries_dataset_4', 'datasetDoc.json')) | |||||
dataset = container.Dataset.load('file://{dataset_doc_path}'.format(dataset_doc_path=dataset_doc_path)) | |||||
dataset = self._get_yahoo_dataset() | |||||
# We set semantic types like runtime would. | # We set semantic types like runtime would. | ||||
dataset.metadata = dataset.metadata.add_semantic_type(('learningData', metadata_base.ALL_ELEMENTS, 4), 'https://metadata.datadrivendiscovery.org/types/Target') | |||||
dataset.metadata = dataset.metadata.add_semantic_type(('learningData', metadata_base.ALL_ELEMENTS, 4), 'https://metadata.datadrivendiscovery.org/types/TrueTarget') | |||||
dataset.metadata = dataset.metadata.remove_semantic_type(('learningData', metadata_base.ALL_ELEMENTS, 4), 'https://metadata.datadrivendiscovery.org/types/Attribute') | |||||
dataset.metadata = dataset.metadata.add_semantic_type(('learningData', metadata_base.ALL_ELEMENTS, 0), 'https://metadata.datadrivendiscovery.org/types/Index') | |||||
dataset.metadata = dataset.metadata.add_semantic_type(('learningData', metadata_base.ALL_ELEMENTS, 1), 'https://metadata.datadrivendiscovery.org/types/Time') | |||||
dataset.metadata = dataset.metadata.add_semantic_type(('learningData', metadata_base.ALL_ELEMENTS, 7), 'https://metadata.datadrivendiscovery.org/types/TrueTarget') | |||||
dataset.metadata = dataset.metadata.remove_semantic_type(('learningData', metadata_base.ALL_ELEMENTS, 5), 'https://metadata.datadrivendiscovery.org/types/Attribute') | |||||
hyperparams_class = kfold_split_timeseries.KFoldTimeSeriesSplitPrimitive.metadata.get_hyperparams() | |||||
hyperparams_class = KFoldSplitTimeseries.KFoldTimeSeriesSplitPrimitive.metadata.get_hyperparams() | |||||
folds = 5 | folds = 5 | ||||
primitive = kfold_split_timeseries.KFoldTimeSeriesSplitPrimitive(hyperparams=hyperparams_class.defaults().replace({ | |||||
primitive = KFoldSplitTimeseries.KFoldTimeSeriesSplitPrimitive(hyperparams=hyperparams_class.defaults().replace({ | |||||
'number_of_folds': folds, | 'number_of_folds': folds, | ||||
'number_of_window_folds': 1, | 'number_of_window_folds': 1, | ||||
})) | })) | ||||
@@ -203,20 +169,16 @@ class KFoldTimeSeriesSplitPrimitiveTestCase(unittest.TestCase): | |||||
# To test that pickling works. | # To test that pickling works. | ||||
pickle.dumps(primitive) | pickle.dumps(primitive) | ||||
results = primitive.produce(inputs=container.List([0, 1], generate_metadata=True)).value | |||||
results = primitive.produce(inputs=container.List([0], generate_metadata=True)).value | |||||
self.assertEqual(len(results), 2) | |||||
self.assertEqual(len(results), 1) | |||||
for dataset in results: | for dataset in results: | ||||
self.assertEqual(len(dataset), 1) | self.assertEqual(len(dataset), 1) | ||||
self.assertEqual(len(results[0]['learningData'].iloc[:, 0]), 8) | |||||
self.assertEqual(set(results[0]['learningData'].iloc[:, 3]), {'2013-11-05', '2013-11-06', '2013-11-07', '2013-11-08', '2013-11-11', | |||||
'2013-11-12', '2013-11-13', '2013-11-14'}) | |||||
self.assertEqual(len(results[0]['learningData'].iloc[:, 0]), 210) | |||||
self.assertEqual(len(results[1]['learningData'].iloc[:, 0]), 8) | |||||
self.assertEqual(set(results[1]['learningData'].iloc[:, 3]), {'2013-11-13', '2013-11-14', '2013-11-15', '2013-11-18', '2013-11-19', | |||||
'2013-11-20', '2013-11-21', '2013-11-22'}) | |||||
#TODO: correct the semantic type and validate unix timestamp | |||||
if __name__ == '__main__': | if __name__ == '__main__': | ||||
@@ -5,23 +5,29 @@ import unittest | |||||
from d3m import container | from d3m import container | ||||
from d3m.metadata import base as metadata_base | from d3m.metadata import base as metadata_base | ||||
from common_primitives import no_split | |||||
from tods.common import NoSplit | |||||
class NoSplitDatasetSplitPrimitiveTestCase(unittest.TestCase): | class NoSplitDatasetSplitPrimitiveTestCase(unittest.TestCase): | ||||
def test_produce_train(self): | |||||
dataset_doc_path = os.path.abspath(os.path.join(os.path.dirname(__file__), '..', 'tests', 'data', 'datasets', 'iris_dataset_1', 'datasetDoc.json')) | |||||
def _get_yahoo_dataset(self): | |||||
dataset_doc_path = os.path.abspath(os.path.join(os.path.dirname(__file__),'..', '..', '..', 'datasets', 'anomaly','yahoo_sub_5','TRAIN','dataset_TRAIN', 'datasetDoc.json')) | |||||
dataset = container.Dataset.load('file://{dataset_doc_path}'.format(dataset_doc_path=dataset_doc_path)) | dataset = container.Dataset.load('file://{dataset_doc_path}'.format(dataset_doc_path=dataset_doc_path)) | ||||
return dataset | |||||
def test_produce_train(self): | |||||
dataset = self._get_yahoo_dataset() | |||||
# We set semantic types like runtime would. | # We set semantic types like runtime would. | ||||
dataset.metadata = dataset.metadata.add_semantic_type(('learningData', metadata_base.ALL_ELEMENTS, 5), 'https://metadata.datadrivendiscovery.org/types/Target') | |||||
dataset.metadata = dataset.metadata.add_semantic_type(('learningData', metadata_base.ALL_ELEMENTS, 5), 'https://metadata.datadrivendiscovery.org/types/TrueTarget') | |||||
dataset.metadata = dataset.metadata.add_semantic_type(('learningData', metadata_base.ALL_ELEMENTS, 0), 'https://metadata.datadrivendiscovery.org/types/Index') | |||||
dataset.metadata = dataset.metadata.add_semantic_type(('learningData', metadata_base.ALL_ELEMENTS, 7), 'https://metadata.datadrivendiscovery.org/types/TrueTarget') | |||||
dataset.metadata = dataset.metadata.remove_semantic_type(('learningData', metadata_base.ALL_ELEMENTS, 5), 'https://metadata.datadrivendiscovery.org/types/Attribute') | dataset.metadata = dataset.metadata.remove_semantic_type(('learningData', metadata_base.ALL_ELEMENTS, 5), 'https://metadata.datadrivendiscovery.org/types/Attribute') | ||||
hyperparams_class = no_split.NoSplitDatasetSplitPrimitive.metadata.get_hyperparams() | |||||
primitive = no_split.NoSplitDatasetSplitPrimitive(hyperparams=hyperparams_class.defaults()) | |||||
hyperparams_class = NoSplit.NoSplitDatasetSplitPrimitive.metadata.get_hyperparams() | |||||
primitive = NoSplit.NoSplitDatasetSplitPrimitive(hyperparams=hyperparams_class.defaults()) | |||||
primitive.set_training_data(dataset=dataset) | primitive.set_training_data(dataset=dataset) | ||||
primitive.fit() | primitive.fit() | ||||
@@ -36,22 +42,20 @@ class NoSplitDatasetSplitPrimitiveTestCase(unittest.TestCase): | |||||
for dataset in results: | for dataset in results: | ||||
self.assertEqual(len(dataset), 1) | self.assertEqual(len(dataset), 1) | ||||
self.assertEqual(results[0]['learningData'].shape[0], 150) | |||||
self.assertEqual(list(results[0]['learningData'].iloc[:, 0]), [str(i) for i in range(150)]) | |||||
self.assertEqual(results[0]['learningData'].shape[0], 1260) | |||||
self.assertEqual(list(results[0]['learningData'].iloc[:, 0]), [str(i) for i in range(1260)]) | |||||
def test_produce_score(self): | def test_produce_score(self): | ||||
dataset_doc_path = os.path.abspath(os.path.join(os.path.dirname(__file__), '..', 'tests', 'data', 'datasets', 'iris_dataset_1', 'datasetDoc.json')) | |||||
dataset = container.Dataset.load('file://{dataset_doc_path}'.format(dataset_doc_path=dataset_doc_path)) | |||||
dataset = self._get_yahoo_dataset() | |||||
# We set semantic types like runtime would. | # We set semantic types like runtime would. | ||||
dataset.metadata = dataset.metadata.add_semantic_type(('learningData', metadata_base.ALL_ELEMENTS, 5), 'https://metadata.datadrivendiscovery.org/types/Target') | |||||
dataset.metadata = dataset.metadata.add_semantic_type(('learningData', metadata_base.ALL_ELEMENTS, 5), 'https://metadata.datadrivendiscovery.org/types/TrueTarget') | |||||
dataset.metadata = dataset.metadata.add_semantic_type(('learningData', metadata_base.ALL_ELEMENTS, 0), 'https://metadata.datadrivendiscovery.org/types/Index') | |||||
dataset.metadata = dataset.metadata.add_semantic_type(('learningData', metadata_base.ALL_ELEMENTS, 7), 'https://metadata.datadrivendiscovery.org/types/TrueTarget') | |||||
dataset.metadata = dataset.metadata.remove_semantic_type(('learningData', metadata_base.ALL_ELEMENTS, 5), 'https://metadata.datadrivendiscovery.org/types/Attribute') | dataset.metadata = dataset.metadata.remove_semantic_type(('learningData', metadata_base.ALL_ELEMENTS, 5), 'https://metadata.datadrivendiscovery.org/types/Attribute') | ||||
hyperparams_class = no_split.NoSplitDatasetSplitPrimitive.metadata.get_hyperparams() | |||||
hyperparams_class = NoSplit.NoSplitDatasetSplitPrimitive.metadata.get_hyperparams() | |||||
primitive = no_split.NoSplitDatasetSplitPrimitive(hyperparams=hyperparams_class.defaults()) | |||||
primitive = NoSplit.NoSplitDatasetSplitPrimitive(hyperparams=hyperparams_class.defaults()) | |||||
primitive.set_training_data(dataset=dataset) | primitive.set_training_data(dataset=dataset) | ||||
primitive.fit() | primitive.fit() | ||||
@@ -63,8 +67,8 @@ class NoSplitDatasetSplitPrimitiveTestCase(unittest.TestCase): | |||||
for dataset in results: | for dataset in results: | ||||
self.assertEqual(len(dataset), 1) | self.assertEqual(len(dataset), 1) | ||||
self.assertEqual(results[0]['learningData'].shape[0], 150) | |||||
self.assertEqual(list(results[0]['learningData'].iloc[:, 0]), [str(i) for i in range(150)]) | |||||
self.assertEqual(results[0]['learningData'].shape[0], 1260) | |||||
self.assertEqual(list(results[0]['learningData'].iloc[:, 0]), [str(i) for i in range(1260)]) | |||||
if __name__ == '__main__': | if __name__ == '__main__': | ||||
@@ -4,18 +4,18 @@ import unittest | |||||
from d3m import container, utils | from d3m import container, utils | ||||
from d3m.metadata import base as metadata_base | from d3m.metadata import base as metadata_base | ||||
from common_primitives import redact_columns | |||||
from tods.common import RedactColumns | |||||
class RedactColumnsPrimitiveTestCase(unittest.TestCase): | class RedactColumnsPrimitiveTestCase(unittest.TestCase): | ||||
def _get_datasets(self): | def _get_datasets(self): | ||||
dataset_doc_path = os.path.abspath(os.path.join(os.path.dirname(__file__), 'data', 'datasets', 'iris_dataset_1', 'datasetDoc.json')) | |||||
dataset_doc_path = os.path.abspath(os.path.join(os.path.dirname(__file__),'..', '..', '..', 'datasets', 'anomaly','yahoo_sub_5','TRAIN','dataset_TRAIN', 'datasetDoc.json')) | |||||
dataset = container.Dataset.load('file://{dataset_doc_path}'.format(dataset_doc_path=dataset_doc_path)) | dataset = container.Dataset.load('file://{dataset_doc_path}'.format(dataset_doc_path=dataset_doc_path)) | ||||
# We set semantic types like runtime would. | # We set semantic types like runtime would. | ||||
dataset.metadata = dataset.metadata.add_semantic_type(('learningData', metadata_base.ALL_ELEMENTS, 5), 'https://metadata.datadrivendiscovery.org/types/Target') | |||||
dataset.metadata = dataset.metadata.add_semantic_type(('learningData', metadata_base.ALL_ELEMENTS, 5), 'https://metadata.datadrivendiscovery.org/types/TrueTarget') | |||||
dataset.metadata = dataset.metadata.add_semantic_type(('learningData', metadata_base.ALL_ELEMENTS, 0), 'https://metadata.datadrivendiscovery.org/types/Index') | |||||
dataset.metadata = dataset.metadata.add_semantic_type(('learningData', metadata_base.ALL_ELEMENTS, 7), 'https://metadata.datadrivendiscovery.org/types/TrueTarget') | |||||
dataset.metadata = dataset.metadata.remove_semantic_type(('learningData', metadata_base.ALL_ELEMENTS, 5), 'https://metadata.datadrivendiscovery.org/types/Attribute') | dataset.metadata = dataset.metadata.remove_semantic_type(('learningData', metadata_base.ALL_ELEMENTS, 5), 'https://metadata.datadrivendiscovery.org/types/Attribute') | ||||
datasets = container.List([dataset], { | datasets = container.List([dataset], { | ||||
@@ -37,9 +37,9 @@ class RedactColumnsPrimitiveTestCase(unittest.TestCase): | |||||
def test_basic(self): | def test_basic(self): | ||||
dataset_doc_path, datasets = self._get_datasets() | dataset_doc_path, datasets = self._get_datasets() | ||||
hyperparams_class = redact_columns.RedactColumnsPrimitive.metadata.get_hyperparams() | |||||
hyperparams_class = RedactColumns.RedactColumnsPrimitive.metadata.get_hyperparams() | |||||
primitive = redact_columns.RedactColumnsPrimitive(hyperparams=hyperparams_class.defaults().replace({ | |||||
primitive = RedactColumns.RedactColumnsPrimitive(hyperparams=hyperparams_class.defaults().replace({ | |||||
'semantic_types': ('https://metadata.datadrivendiscovery.org/types/TrueTarget',), | 'semantic_types': ('https://metadata.datadrivendiscovery.org/types/TrueTarget',), | ||||
'add_semantic_types': ('https://metadata.datadrivendiscovery.org/types/RedactedTarget', 'https://metadata.datadrivendiscovery.org/types/MissingData'), | 'add_semantic_types': ('https://metadata.datadrivendiscovery.org/types/RedactedTarget', 'https://metadata.datadrivendiscovery.org/types/MissingData'), | ||||
})) | })) | ||||
@@ -50,15 +50,15 @@ class RedactColumnsPrimitiveTestCase(unittest.TestCase): | |||||
redacted_dataset = redacted_datasets[0] | redacted_dataset = redacted_datasets[0] | ||||
self.assertIsInstance(redacted_dataset, container.Dataset) | self.assertIsInstance(redacted_dataset, container.Dataset) | ||||
self.assertEqual(redacted_dataset['learningData']['species'].values.tolist(), [''] * 150) | |||||
self._test_metadata(redacted_datasets.metadata, dataset_doc_path, True) | |||||
self._test_metadata(redacted_dataset.metadata, dataset_doc_path, False) | |||||
# TODO: check metadata of yahoo dataset | |||||
#self._test_metadata(redacted_datasets.metadata, dataset_doc_path, True) | |||||
#self._test_metadata(redacted_dataset.metadata, dataset_doc_path, False) | |||||
def _test_metadata(self, metadata, dataset_doc_path, is_list): | def _test_metadata(self, metadata, dataset_doc_path, is_list): | ||||
top_metadata = { | top_metadata = { | ||||
'structural_type': 'd3m.container.dataset.Dataset', | 'structural_type': 'd3m.container.dataset.Dataset', | ||||
'id': 'iris_dataset_1', | |||||
'id': 'yahoo_sub_5_dataset_TRAIN', | |||||
'version': '4.0.0', | 'version': '4.0.0', | ||||
'name': 'Iris Dataset', | 'name': 'Iris Dataset', | ||||
'location_uris': [ | 'location_uris': [ | ||||
@@ -5,23 +5,33 @@ import unittest | |||||
from d3m import container | from d3m import container | ||||
from d3m.metadata import base as metadata_base | from d3m.metadata import base as metadata_base | ||||
from common_primitives import train_score_split | |||||
from tods.common import TrainScoreSplit | |||||
class TrainScoreDatasetSplitPrimitiveTestCase(unittest.TestCase): | class TrainScoreDatasetSplitPrimitiveTestCase(unittest.TestCase): | ||||
def test_produce_train(self): | |||||
dataset_doc_path = os.path.abspath(os.path.join(os.path.dirname(__file__), '..', 'tests', 'data', 'datasets', 'iris_dataset_1', 'datasetDoc.json')) | |||||
def _get_yahoo_dataset(self): | |||||
dataset_doc_path = os.path.abspath(os.path.join(os.path.dirname(__file__),'..', '..', '..', 'datasets', 'anomaly','yahoo_sub_5','TRAIN','dataset_TRAIN', 'datasetDoc.json')) | |||||
dataset = container.Dataset.load('file://{dataset_doc_path}'.format(dataset_doc_path=dataset_doc_path)) | dataset = container.Dataset.load('file://{dataset_doc_path}'.format(dataset_doc_path=dataset_doc_path)) | ||||
return dataset | |||||
def test_produce_train(self): | |||||
dataset = self._get_yahoo_dataset() | |||||
# We set semantic types like runtime would. | # We set semantic types like runtime would. | ||||
dataset.metadata = dataset.metadata.add_semantic_type(('learningData', metadata_base.ALL_ELEMENTS, 5), 'https://metadata.datadrivendiscovery.org/types/Target') | |||||
dataset.metadata = dataset.metadata.add_semantic_type(('learningData', metadata_base.ALL_ELEMENTS, 5), 'https://metadata.datadrivendiscovery.org/types/TrueTarget') | |||||
dataset.metadata = dataset.metadata.add_semantic_type(('learningData', metadata_base.ALL_ELEMENTS, 0), 'https://metadata.datadrivendiscovery.org/types/Index') | |||||
dataset.metadata = dataset.metadata.add_semantic_type(('learningData', metadata_base.ALL_ELEMENTS, 7), 'https://metadata.datadrivendiscovery.org/types/TrueTarget') | |||||
dataset.metadata = dataset.metadata.remove_semantic_type(('learningData', metadata_base.ALL_ELEMENTS, 2), 'https://metadata.datadrivendiscovery.org/types/Attribute') | |||||
dataset.metadata = dataset.metadata.remove_semantic_type(('learningData', metadata_base.ALL_ELEMENTS, 3), 'https://metadata.datadrivendiscovery.org/types/Attribute') | |||||
dataset.metadata = dataset.metadata.remove_semantic_type(('learningData', metadata_base.ALL_ELEMENTS, 4), 'https://metadata.datadrivendiscovery.org/types/Attribute') | |||||
dataset.metadata = dataset.metadata.remove_semantic_type(('learningData', metadata_base.ALL_ELEMENTS, 5), 'https://metadata.datadrivendiscovery.org/types/Attribute') | dataset.metadata = dataset.metadata.remove_semantic_type(('learningData', metadata_base.ALL_ELEMENTS, 5), 'https://metadata.datadrivendiscovery.org/types/Attribute') | ||||
dataset.metadata = dataset.metadata.remove_semantic_type(('learningData', metadata_base.ALL_ELEMENTS, 6), 'https://metadata.datadrivendiscovery.org/types/Attribute') | |||||
dataset.metadata = dataset.metadata.remove_semantic_type(('learningData', metadata_base.ALL_ELEMENTS, 7), 'https://metadata.datadrivendiscovery.org/types/Attribute') | |||||
hyperparams_class = train_score_split.TrainScoreDatasetSplitPrimitive.metadata.get_hyperparams() | |||||
hyperparams_class = TrainScoreSplit.TrainScoreDatasetSplitPrimitive.metadata.get_hyperparams() | |||||
primitive = train_score_split.TrainScoreDatasetSplitPrimitive(hyperparams=hyperparams_class.defaults().replace({ | |||||
primitive = TrainScoreSplit.TrainScoreDatasetSplitPrimitive(hyperparams=hyperparams_class.defaults().replace({ | |||||
'shuffle': True, | 'shuffle': True, | ||||
})) | })) | ||||
@@ -38,51 +48,42 @@ class TrainScoreDatasetSplitPrimitiveTestCase(unittest.TestCase): | |||||
for dataset in results: | for dataset in results: | ||||
self.assertEqual(len(dataset), 1) | self.assertEqual(len(dataset), 1) | ||||
self.assertEqual(results[0]['learningData'].shape[0], 112) | |||||
self.assertEqual(list(results[0]['learningData'].iloc[:, 0]), [ | |||||
'0', '1', '2', '3', '4', '5', '6', '9', '10', '11', '12', '13', '14', '15', '17', '19', '20', | |||||
'21', '23', '25', '28', '29', '30', '31', '32', '34', '35', '36', '38', '39', '41', '42', '43', | |||||
'46', '47', '48', '49', '50', '52', '53', '55', '56', '57', '58', '60', '61', '64', '65', '67', | |||||
'68', '69', '70', '72', '74', '75', '77', '79', '80', '81', '82', '85', '87', '88', '89', '91', | |||||
'92', '94', '95', '96', '98', '99', '101', '102', '103', '104', '105', '106', '108', '109', '110', | |||||
'111', '112', '113', '115', '116', '117', '118', '119', '120', '122', '123', '124', '125', '128', | |||||
'129', '130', '131', '133', '135', '136', '138', '139', '140', '141', '142', '143', '144', '145', | |||||
'146', '147', '148', '149', | |||||
]) | |||||
self.assertEqual(results.metadata.query((0, 'learningData'))['dimension']['length'], 112) | |||||
self.assertEqual(results[0]['learningData'].shape[0], 945) | |||||
column_names = ['d3mIndex', 'sepalLength', 'sepalWidth', 'petalLength', 'petalWidth', 'species'] | |||||
for i in range(6): | |||||
self.assertEqual(results.metadata.query((0, 'learningData', metadata_base.ALL_ELEMENTS, i))['name'], column_names[i]) | |||||
column_names = ['d3mIndex', 'timestamp', 'value_0', 'value_1', 'value_2', 'value_3', 'value_4','ground_truth'] | |||||
for i in range(8): | |||||
self.assertEqual(results.metadata.query((0, 'learningData', metadata_base.ALL_ELEMENTS, i))['name'], | |||||
column_names[i]) | |||||
self.assertEqual(results.metadata.query((0, 'learningData', metadata_base.ALL_ELEMENTS, 0))['semantic_types'], ( | self.assertEqual(results.metadata.query((0, 'learningData', metadata_base.ALL_ELEMENTS, 0))['semantic_types'], ( | ||||
"http://schema.org/Integer", "https://metadata.datadrivendiscovery.org/types/PrimaryKey" | |||||
'http://schema.org/Integer', 'https://metadata.datadrivendiscovery.org/types/PrimaryKey', 'https://metadata.datadrivendiscovery.org/types/Index' | |||||
)) | )) | ||||
for i in range(1, 5): | |||||
self.assertEqual(results.metadata.query((0, 'learningData', metadata_base.ALL_ELEMENTS, i))['semantic_types'], ( | |||||
'http://schema.org/Float', 'https://metadata.datadrivendiscovery.org/types/Attribute' | |||||
)) | |||||
self.assertEqual(results.metadata.query((0, 'learningData', metadata_base.ALL_ELEMENTS, 5))['semantic_types'],( | |||||
'https://metadata.datadrivendiscovery.org/types/CategoricalData', | |||||
for i in range(2, 6): | |||||
self.assertEqual( | |||||
results.metadata.query((0, 'learningData', metadata_base.ALL_ELEMENTS, i))['semantic_types'], ('http://schema.org/Float',) | |||||
) | |||||
self.assertEqual(results.metadata.query((0, 'learningData', metadata_base.ALL_ELEMENTS, 7))['semantic_types'],( | |||||
'http://schema.org/Integer', | |||||
'https://metadata.datadrivendiscovery.org/types/SuggestedTarget', | 'https://metadata.datadrivendiscovery.org/types/SuggestedTarget', | ||||
'https://metadata.datadrivendiscovery.org/types/Target', | |||||
'https://metadata.datadrivendiscovery.org/types/TrueTarget' | |||||
'https://metadata.datadrivendiscovery.org/types/TrueTarget', | |||||
)) | )) | ||||
def test_produce_score(self): | def test_produce_score(self): | ||||
dataset_doc_path = os.path.abspath(os.path.join(os.path.dirname(__file__), '..', 'tests', 'data', 'datasets', 'iris_dataset_1', 'datasetDoc.json')) | |||||
dataset = container.Dataset.load('file://{dataset_doc_path}'.format(dataset_doc_path=dataset_doc_path)) | |||||
dataset = self._get_yahoo_dataset() | |||||
# We set semantic types like runtime would. | # We set semantic types like runtime would. | ||||
dataset.metadata = dataset.metadata.add_semantic_type(('learningData', metadata_base.ALL_ELEMENTS, 5), 'https://metadata.datadrivendiscovery.org/types/Target') | |||||
dataset.metadata = dataset.metadata.add_semantic_type(('learningData', metadata_base.ALL_ELEMENTS, 5), 'https://metadata.datadrivendiscovery.org/types/TrueTarget') | |||||
dataset.metadata = dataset.metadata.add_semantic_type(('learningData', metadata_base.ALL_ELEMENTS, 0), 'https://metadata.datadrivendiscovery.org/types/Index') | |||||
dataset.metadata = dataset.metadata.add_semantic_type(('learningData', metadata_base.ALL_ELEMENTS, 7), 'https://metadata.datadrivendiscovery.org/types/TrueTarget') | |||||
dataset.metadata = dataset.metadata.remove_semantic_type(('learningData', metadata_base.ALL_ELEMENTS, 2), 'https://metadata.datadrivendiscovery.org/types/Attribute') | |||||
dataset.metadata = dataset.metadata.remove_semantic_type(('learningData', metadata_base.ALL_ELEMENTS, 3), 'https://metadata.datadrivendiscovery.org/types/Attribute') | |||||
dataset.metadata = dataset.metadata.remove_semantic_type(('learningData', metadata_base.ALL_ELEMENTS, 4), 'https://metadata.datadrivendiscovery.org/types/Attribute') | |||||
dataset.metadata = dataset.metadata.remove_semantic_type(('learningData', metadata_base.ALL_ELEMENTS, 5), 'https://metadata.datadrivendiscovery.org/types/Attribute') | dataset.metadata = dataset.metadata.remove_semantic_type(('learningData', metadata_base.ALL_ELEMENTS, 5), 'https://metadata.datadrivendiscovery.org/types/Attribute') | ||||
dataset.metadata = dataset.metadata.remove_semantic_type(('learningData', metadata_base.ALL_ELEMENTS, 6), 'https://metadata.datadrivendiscovery.org/types/Attribute') | |||||
dataset.metadata = dataset.metadata.remove_semantic_type(('learningData', metadata_base.ALL_ELEMENTS, 7), 'https://metadata.datadrivendiscovery.org/types/Attribute') | |||||
hyperparams_class = train_score_split.TrainScoreDatasetSplitPrimitive.metadata.get_hyperparams() | |||||
hyperparams_class = TrainScoreSplit.TrainScoreDatasetSplitPrimitive.metadata.get_hyperparams() | |||||
primitive = train_score_split.TrainScoreDatasetSplitPrimitive(hyperparams=hyperparams_class.defaults().replace({ | |||||
primitive = TrainScoreSplit.TrainScoreDatasetSplitPrimitive(hyperparams=hyperparams_class.defaults().replace({ | |||||
'shuffle': True, | 'shuffle': True, | ||||
})) | })) | ||||
@@ -96,33 +97,28 @@ class TrainScoreDatasetSplitPrimitiveTestCase(unittest.TestCase): | |||||
for dataset in results: | for dataset in results: | ||||
self.assertEqual(len(dataset), 1) | self.assertEqual(len(dataset), 1) | ||||
self.assertEqual(results[0]['learningData'].shape[0], 38) | |||||
self.assertEqual(list(results[0]['learningData'].iloc[:, 0]), [ | |||||
'7', '8', '16', '18', '22', '24', '26', '27', '33', '37', '40', '44', '45', '51', '54', | |||||
'59', '62', '63', '66', '71', '73', '76', '78', '83', '84', '86', '90', '93', '97', '100', | |||||
'107', '114', '121', '126', '127', '132', '134', '137', | |||||
]) | |||||
self.assertEqual(results[0]['learningData'].shape[0], 315) | |||||
#TODO check data type | |||||
self.assertEqual(results.metadata.query((0, 'learningData'))['dimension']['length'], 38) | |||||
self.assertEqual(results.metadata.query((0, 'learningData'))['dimension']['length'], 315) | |||||
column_names = ['d3mIndex', 'sepalLength', 'sepalWidth', 'petalLength', 'petalWidth', 'species'] | |||||
for i in range(6): | |||||
column_names = ['d3mIndex', 'timestamp', 'value_0', 'value_1', 'value_2', 'value_3', 'value_4','ground_truth'] | |||||
for i in range(8): | |||||
self.assertEqual(results.metadata.query((0, 'learningData', metadata_base.ALL_ELEMENTS, i))['name'], | self.assertEqual(results.metadata.query((0, 'learningData', metadata_base.ALL_ELEMENTS, i))['name'], | ||||
column_names[i]) | column_names[i]) | ||||
self.assertEqual(results.metadata.query((0, 'learningData', metadata_base.ALL_ELEMENTS, 0))['semantic_types'], ( | self.assertEqual(results.metadata.query((0, 'learningData', metadata_base.ALL_ELEMENTS, 0))['semantic_types'], ( | ||||
"http://schema.org/Integer", "https://metadata.datadrivendiscovery.org/types/PrimaryKey" | |||||
'http://schema.org/Integer', 'https://metadata.datadrivendiscovery.org/types/PrimaryKey', 'https://metadata.datadrivendiscovery.org/types/Index' | |||||
)) | )) | ||||
for i in range(1, 5): | |||||
for i in range(2, 6): | |||||
self.assertEqual( | self.assertEqual( | ||||
results.metadata.query((0, 'learningData', metadata_base.ALL_ELEMENTS, i))['semantic_types'], ( | |||||
'http://schema.org/Float', 'https://metadata.datadrivendiscovery.org/types/Attribute' | |||||
)) | |||||
self.assertEqual(results.metadata.query((0, 'learningData', metadata_base.ALL_ELEMENTS, 5))['semantic_types'], ( | |||||
'https://metadata.datadrivendiscovery.org/types/CategoricalData', | |||||
results.metadata.query((0, 'learningData', metadata_base.ALL_ELEMENTS, i))['semantic_types'], ('http://schema.org/Float',) | |||||
) | |||||
print(results.metadata.query((0, 'learningData', metadata_base.ALL_ELEMENTS, 7))['semantic_types']) | |||||
self.assertEqual(results.metadata.query((0, 'learningData', metadata_base.ALL_ELEMENTS, 7))['semantic_types'], ( | |||||
'http://schema.org/Integer', | |||||
'https://metadata.datadrivendiscovery.org/types/SuggestedTarget', | 'https://metadata.datadrivendiscovery.org/types/SuggestedTarget', | ||||
'https://metadata.datadrivendiscovery.org/types/Target', | |||||
'https://metadata.datadrivendiscovery.org/types/TrueTarget' | |||||
'https://metadata.datadrivendiscovery.org/types/TrueTarget', | |||||
)) | )) | ||||