From f8d5668dc1a61e6826efaf5c489fef16a8b5dd46 Mon Sep 17 00:00:00 2001 From: Devesh Kumar Date: Wed, 11 Nov 2020 04:58:45 -0600 Subject: [PATCH] test cases added Former-commit-id: 46d43eb3b536d2cd90c4446983a292865df48d89 [formerly 1d4e5b5978e7c1b95f0f20f29b18eeabda87a45d] [formerly a7894f60c8f940c22fef24986add03a56348ca2b [formerly 86280ce9c93e2016fd603d4a8cb210ba4c979015]] [formerly 7686434aa517086c30ad820ac508c3b614c9332c [formerly e5c8bc392ff92c3f7a0a3bc804be20e5f893a701] [formerly 2076f136eaf58e966d3347af475d55b08cb9303b [formerly c5cfc9d1bb95ca904fbfdff6a275ec7e7b52a400]]] [formerly a3a7cc3fe77ba05d67f57888bb30d47ff9b75109 [formerly 6633595b581a5b59cd499c90315d88d50570fe4d] [formerly a772904a10f19417318e1ec53bcd48dd2abc8407 [formerly 5bfb3d73e13f8dcd87e0218b14b807270d2a95ad]] [formerly 86ac3f80fb15b330cb8b1c0a6b408bb7e3f5ced5 [formerly 35bc6c859c43b10c21670d33f191f70497efcfb9] [formerly dce21b3e9f8de071b0e8fed41d1ff25a245f91f1 [formerly f50fde7be7cbd6b13af482b9351116e84df25dc0]]]] [formerly f3e52476763c42536faa9d4209628ebc72209056 [formerly 84725abec94f03e8bb6a199b6c9567fead6e1275] [formerly 4c25a20f650ccf485a981081925cacf7f6b41af5 [formerly 35d1f43703e54cfa473082c7ad726c0062fbc23c]] [formerly 6964458a6cc98d347b4f0a41321e13360ecf351d [formerly a1b9b56b0b7ac13904cf44f7741fa73936b0a814] [formerly 0b36b8597073d008037d6d025ed11a7fd9e89506 [formerly 846ad9e311db3bec4b357a8519e913362d5e80ff]]] [formerly 10b203765ca6839efd057abfdde0c73722295834 [formerly 69ef8d335b550600490a96b9eb4e8de454eea1f9] [formerly e568fea250dcc2c49ebcf2118e63b1c2c0dc421c [formerly c8a7e5a298cba7b175f39d873a0a1803f7e2ca7b]] [formerly 55d575e702a1af0a6c0b67838d126f1ea222cc25 [formerly 5d41bad9ce13c37d5ec453bfb36ed3f9fd65c80a] [formerly 4f609302d658e6c9ae24a260de04488307ff09b6 [formerly 8e4b5ee9eb25c9f920037bbe9d9bc039c5cfb085]]]]] [formerly 0761ffdc456915520b7d221e099953e53f56819d [formerly c971130f03f821d2bf076896ef9bfa3714c1f5f0] [formerly 5165328709d38338aa6f8d0b90f1a808ca3ff313 [formerly b3ba5008d79c8af592187b5bdc42c274e2cc4ea1]] [formerly b4c9a49bb079c7fb7dbead72f1c1ccd18abba73a [formerly 802c0e4689319709b2fadac5c2884530edbbc042] [formerly e190c9f5273a988cb160e76a63babd44939d7ce7 [formerly f5f784b68e07271248b31d4fa5d03b79e747605d]]] [formerly ae73ed9c36fd18b2b71fbcb591262e03a58568ab [formerly a8ebcc43507e66f0704a91e22ae9ca11b0d4b924] [formerly 480f95ca28165761336b2f2cf1d0712904b84351 [formerly 8ba2b06eaca0992177ac097189127691bacae10e]] [formerly 7dfc73aa225cf4acedb8c60953117695e1a0f4f4 [formerly d93cbb45c8a7f7e6c0fbcf9228051fb02bef9318] [formerly c68dfe583bde44b6bd7b0f31e3e3a9c02ecee340 [formerly c1fc0f62b33aa8f6fb72042fb5108d86300aad54]]]] [formerly 82a666a3be9ceec587c75cb00254a43d41dc38b0 [formerly 72bcd362be8a26937230d92f49648aaf25523676] [formerly 33282bc59ce8bb106b72f99dfac944bd32dfb78f [formerly 51a8423b9bfd54fb1c7fb0a426fbeb45a13728af]] [formerly 637d5cf49e79eeb05cbd6561b9d732d8758904e9 [formerly 88aa198ea98cf89e3d7e124527b3e7cd0026f72d] [formerly 04cd6d589c1a49d94c53a69d7a8e6b5e6d07e760 [formerly 25b48a24bbcc79ee64c56b178bafc22714db76c2]]] [formerly b7bc4c19167c845a8ba65bad490b511e1d6b9b81 [formerly 33aba5599183845b3da9b2b58bb20af47ea3285c] [formerly 253bffa7153f3a8954eb5d45e7a37045e35ba334 [formerly 774a63133b588c3c49ea67c66b8a5e91a0da9c1a]] [formerly 61678b32fcf9c87f195970b6105fcb44f9d4d273 [formerly 54a92a464657b5c15678763e5a557010aaf3c1b7] [formerly cd7e572aee33b6471b32018b9ae66535274ad84c [formerly 12507d77c8d901ec5abee861e5ca5d520352965f]]]]]] Former-commit-id: be11f7e68b67a74728cc381b0389ea7577aa3e5d [formerly cf5d9f6bac993e37fd597ceb7d16f2536a8aedd3] [formerly 192df6ba14ff31a41490d76173fbfe004f93ff5d [formerly c78dd9a7709ce11eb202e519999233d2b8334f98]] [formerly e9599f93403a04978f77b7660af766bd3178bd2b [formerly 5fa597d99e34cb45a4aee9184f2348444b9df61a] [formerly 1c0a81ef4a13055f900334f7114ff5179ff0c00d [formerly fdb69b1b9955cfb0f12e0bb89fc3a8d71929a13b]]] [formerly 2aaa17cc052c0f39cace752f7be0b2d75a7efe60 [formerly 9d981eabc2dc731b8ef59f4265f3a371ffbf6dbf] [formerly 1cb91b70d6544fd1c1754f9a72efd57bb20044b8 [formerly a6b6898d4553a0715d70de82062fa779aa6ad631]] [formerly 46915532fa857c4d3ed623e8708dc5a90fc7e114 [formerly e63b6759a8d78dd99bc803d49f35bcedb68fc211] [formerly 507bf2788407bb65cd3d3eddc0cc40092ffa23fa [formerly 4787017cab723db7c3ad85b43820ff455cb09e7c]]]] [formerly 7e60834b56047231d4043229d803ee70a5be9ea4 [formerly 0fe096fe4644b4f7f191eec0e8f1f7f1f3fdc8ab] [formerly 8ece7e0be0f0544873b245255ef42ee7aa119da3 [formerly 126ce647c6249ec2fecc90cd9987666159801b6c]] [formerly 0f1dea464e6e27180c3bfc9476121422828b00a9 [formerly 4bea5590515cd4366ae1f42d434e084f8be65747] [formerly 9f4af75ee3b95b281881c75bfb27d229371fc8d2 [formerly e51c173ac4b646ac77e205f809712f45184fb574]]] [formerly 5290099d420ca836b4a4e336a1ab96b633cf0e51 [formerly 4f98c634c1f573320c77a3c0bf99d220568f9837] [formerly b5661390c0c91049b749f62effc8580fa272daeb [formerly 01f4dc5b817b4c53c2118611151d1f72e36d3d3b]] [formerly 1ffc2ce3b24e33e9037b055521bc844b2f71aea9 [formerly b6cf5f21e81f3e04bd957c8bceb7734ac2ad7cbb] [formerly cd7e572aee33b6471b32018b9ae66535274ad84c]]]] Former-commit-id: e00e93f654f27f3bddb770bc9d883a465d884bc2 [formerly f465f9315295ff5ec96f51fb4a107619738548e4] [formerly 96bf6cd1e947efa2767910529b8a8b833575b2b7 [formerly e3cb872e95693ad4a94a4d93fc098abc8a8fdf58]] [formerly 7bbf6de45a1ca6dddf1724cce121bb907e743d53 [formerly 6246bc436f34e86268a00e5f2a60708eaa742333] [formerly f7e6badd788feb2f42d39fce65147b3ecfb17de3 [formerly 4a56039409e8c5ad1d540a60b1a000e8b4edc2bc]]] [formerly b9951098fbd09e3d91c18fbda48ed9ba13fa60a4 [formerly 81d285898e5314cb61796a38d9f2f96e7267a14f] [formerly bb34629983bccd0834b47c98af143eec05c43022 [formerly 13ef5cc298282d3f753a69e4cb98af77ddb3b48f]] [formerly 9ab9f3457d5cd5099c14657aa1e978f2e8b5a9e4 [formerly 7ddec8785b891f8088a6fbcb0616ab41ca1c0415] [formerly 42efd4d2ec326412f8539c6872baf94a51432106 [formerly 8eb58743bb3cea056c0a997a1f4aa18c1c0626c9]]]] Former-commit-id: a1afe3cce57798369c4e94f72bfd8d58f4072c89 [formerly 91fab9355dde9f65730ed6faecb64cc47af0b1ce] [formerly dc19a95fa7d3a54ab918f2fd0e12d48c84894dbb [formerly b4e22f8abd1315f34043c993224015557700062d]] [formerly b2f2b2761006d4b2951f2e5d49df64edafe4a34b [formerly aa7160ec365233d0f36c65047a393a4acd03d60a] [formerly 67a7bef30286f01e906240e62e3ad6e5d29917bd [formerly c3f9e539d4175a328c69949792778af7496c56d5]]] Former-commit-id: 8a4a143a492fee6e646b601f3c7fcf5128d3260f [formerly 499606f224a80245cd8b7fc94cec62b1a9818a25] [formerly 98e3acbe03273b8eda8c9cd6e31ccdb4e2274409 [formerly 042fc1dde9eef5a2fcbbe690b5db540f6e0306ec]] Former-commit-id: 659c4020ebad0400327c940cd37a28779bef34e6 [formerly 925f9c92620f0988de2f7ed81a27c1fed22222fa] Former-commit-id: 9686f047ab41fc301ab38c1b032e418075580a2f --- tods/tests/data_processing/test_ColumnParser.py | 98 +++++++++++++++ .../data_processing/test_ConstructPredictions.py | 131 +++++++++++++++++++++ .../data_processing/test_DatasetToDataFrame.py | 80 +++++++++++++ .../test_ExtractColumnsBySemanticTypes.py | 107 +++++++++++++++++ tods/tests/data_processing/test_SKImputer.py | 92 +++++++++++++++ 5 files changed, 508 insertions(+) create mode 100644 tods/tests/data_processing/test_ColumnParser.py create mode 100644 tods/tests/data_processing/test_ConstructPredictions.py create mode 100644 tods/tests/data_processing/test_DatasetToDataFrame.py create mode 100644 tods/tests/data_processing/test_ExtractColumnsBySemanticTypes.py create mode 100644 tods/tests/data_processing/test_SKImputer.py diff --git a/tods/tests/data_processing/test_ColumnParser.py b/tods/tests/data_processing/test_ColumnParser.py new file mode 100644 index 0000000..1b75e6e --- /dev/null +++ b/tods/tests/data_processing/test_ColumnParser.py @@ -0,0 +1,98 @@ + +import os.path +import unittest + + + +from d3m import container, utils +from d3m.metadata import base as metadata_base + +from tods.data_processing import DatasetToDataframe, ColumnParser + +import utils as test_utils + + +class ColumnParserPrimitiveTestCase(unittest.TestCase): + def test_basic(self): + dataset_doc_path = os.path.abspath(os.path.join(os.path.dirname(__file__),'..', '..', '..', 'datasets', 'anomaly','yahoo_sub_5','TRAIN','dataset_TRAIN', 'datasetDoc.json')) + + + dataset = container.Dataset.load('file://{dataset_doc_path}'.format(dataset_doc_path=dataset_doc_path)) + + hyperparams_class = DatasetToDataframe.DatasetToDataFramePrimitive.metadata.get_hyperparams() + + primitive = DatasetToDataframe.DatasetToDataFramePrimitive(hyperparams=hyperparams_class.defaults()) + + call_metadata = primitive.produce(inputs=dataset) + + dataframe = call_metadata.value + + hyperparams_class = ColumnParser.ColumnParserPrimitive.metadata.get_hyperparams() + + primitive = ColumnParser.ColumnParserPrimitive(hyperparams=hyperparams_class.defaults()) + + call_metadata = primitive.produce(inputs=dataframe) + + dataframe = call_metadata.value + + first_row = list(dataframe.itertuples(index=False, name=None))[0] + + self.assertEqual(first_row, (0, 1, 12183.0, 0.0, 3.7166666666667, 5.0, 2109.0, 0)) + + self.assertEqual([type(o) for o in first_row], [int,int, float,float, float, float, float, int]) + + self._test_basic_metadata(dataframe.metadata) + + def _test_basic_metadata(self, metadata): + self.maxDiff = None + + self.assertEqual(test_utils.convert_through_json(metadata.query(())), { + 'schema': metadata_base.CONTAINER_SCHEMA_VERSION, + 'structural_type': 'd3m.container.pandas.DataFrame', + 'semantic_types': [ + 'https://metadata.datadrivendiscovery.org/types/Table', + ], + 'dimension': { + 'name': 'rows', + 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/TabularRow'], + 'length': 1260, + } + }) + + self.assertEqual(test_utils.convert_through_json(metadata.query((metadata_base.ALL_ELEMENTS,))), { + 'dimension': { + 'name': 'columns', + 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/TabularColumn'], + 'length': 8, + } + }) + + self.assertEqual(test_utils.convert_through_json(metadata.query((metadata_base.ALL_ELEMENTS, 0))), { + 'name': 'd3mIndex', + 'structural_type': 'int', + 'semantic_types': [ + 'http://schema.org/Integer', + 'https://metadata.datadrivendiscovery.org/types/PrimaryKey', + ], + }) + + + + self.assertEqual(test_utils.convert_through_json(metadata.query((metadata_base.ALL_ELEMENTS, 0))), {'name': 'd3mIndex', 'structural_type': 'int', 'semantic_types': ['http://schema.org/Integer', 'https://metadata.datadrivendiscovery.org/types/PrimaryKey']}) + self.assertEqual(test_utils.convert_through_json(metadata.query((metadata_base.ALL_ELEMENTS, 1))), {'name': 'timestamp', 'structural_type': 'int', 'semantic_types': ['http://schema.org/Integer', 'https://metadata.datadrivendiscovery.org/types/Attribute']}) + self.assertEqual(test_utils.convert_through_json(metadata.query((metadata_base.ALL_ELEMENTS, 2))), {'name': 'value_0', 'structural_type': 'float', 'semantic_types': ['http://schema.org/Float', 'https://metadata.datadrivendiscovery.org/types/Attribute']}) + + self.assertEqual(test_utils.convert_through_json(metadata.query((metadata_base.ALL_ELEMENTS, 3))), {'name': 'value_1', 'structural_type': 'float', 'semantic_types': ['http://schema.org/Float', 'https://metadata.datadrivendiscovery.org/types/Attribute']}) + + self.assertEqual(test_utils.convert_through_json(metadata.query((metadata_base.ALL_ELEMENTS, 4))), {'name': 'value_2', 'structural_type': 'float', 'semantic_types': ['http://schema.org/Float', 'https://metadata.datadrivendiscovery.org/types/Attribute']}) + + self.assertEqual(test_utils.convert_through_json(metadata.query((metadata_base.ALL_ELEMENTS, 5))), {'name': 'value_3', 'structural_type': 'float', 'semantic_types': ['http://schema.org/Float', 'https://metadata.datadrivendiscovery.org/types/Attribute']}) + + self.assertEqual(test_utils.convert_through_json(metadata.query((metadata_base.ALL_ELEMENTS, 6))), {'name': 'value_4', 'structural_type': 'float', 'semantic_types': ['http://schema.org/Float', 'https://metadata.datadrivendiscovery.org/types/Attribute']}) + + self.assertEqual(test_utils.convert_through_json(metadata.query((metadata_base.ALL_ELEMENTS, 7))), {'name': 'ground_truth', 'structural_type': 'int', 'semantic_types': ['http://schema.org/Integer', 'https://metadata.datadrivendiscovery.org/types/SuggestedTarget', 'https://metadata.datadrivendiscovery.org/types/Attribute']}) + + + +if __name__ == '__main__': + unittest.main() diff --git a/tods/tests/data_processing/test_ConstructPredictions.py b/tods/tests/data_processing/test_ConstructPredictions.py new file mode 100644 index 0000000..0d5ea62 --- /dev/null +++ b/tods/tests/data_processing/test_ConstructPredictions.py @@ -0,0 +1,131 @@ +import copy +import os +import unittest + +import numpy + +from d3m import container +from d3m.metadata import base as metadata_base + +from tods.data_processing import DatasetToDataframe , ConstructPredictions , ExtractColumnsBySemanticTypes + +import utils as test_utils + + +class ConstructPredictionsPrimitiveTestCase(unittest.TestCase): + # TODO: Make this part of metadata API. + # Something like setting a semantic type for given columns. + def _mark_all_targets(self, dataset, targets): + for target in targets: + dataset.metadata = dataset.metadata.add_semantic_type((target['resource_id'], metadata_base.ALL_ELEMENTS, target['column_index']), 'https://metadata.datadrivendiscovery.org/types/Target') + dataset.metadata = dataset.metadata.add_semantic_type((target['resource_id'], metadata_base.ALL_ELEMENTS, target['column_index']), 'https://metadata.datadrivendiscovery.org/types/TrueTarget') + dataset.metadata = dataset.metadata.remove_semantic_type((target['resource_id'], metadata_base.ALL_ELEMENTS, target['column_index']), 'https://metadata.datadrivendiscovery.org/types/Attribute') + + def _get_yahoo_dataframe(self): + dataset_doc_path = os.path.abspath(os.path.join(os.path.dirname(__file__),'..', '..', '..', 'datasets', 'anomaly','yahoo_sub_5','TRAIN','dataset_TRAIN', 'datasetDoc.json')) + + dataset = container.Dataset.load('file://{dataset_doc_path}'.format(dataset_doc_path=dataset_doc_path)) + + self._mark_all_targets(dataset, [{'resource_id': 'learningData', 'column_index': 5}]) + + hyperparams_class = DatasetToDataframe.DatasetToDataFramePrimitive.metadata.get_hyperparams() + + primitive = DatasetToDataframe.DatasetToDataFramePrimitive(hyperparams=hyperparams_class.defaults()) + + call_metadata = primitive.produce(inputs=dataset) + + dataframe = call_metadata.value + + return dataframe + + def test_correct_order(self): + dataframe = self._get_yahoo_dataframe() + + hyperparams_class = ExtractColumnsBySemanticTypes.ExtractColumnsBySemanticTypesPrimitive.metadata.get_hyperparams() + + # We extract both the primary index and targets. So it is in the output format already. + primitive = ExtractColumnsBySemanticTypes.ExtractColumnsBySemanticTypesPrimitive(hyperparams=hyperparams_class.defaults().replace({'semantic_types': ('https://metadata.datadrivendiscovery.org/types/PrimaryKey', 'https://metadata.datadrivendiscovery.org/types/Target',)})) + + call_metadata = primitive.produce(inputs=dataframe) + + targets = call_metadata.value + + # We pretend these are our predictions. + targets.metadata = targets.metadata.remove_semantic_type((metadata_base.ALL_ELEMENTS, 1), 'https://metadata.datadrivendiscovery.org/types/TrueTarget') + targets.metadata = targets.metadata.add_semantic_type((metadata_base.ALL_ELEMENTS, 1), 'https://metadata.datadrivendiscovery.org/types/PredictedTarget') + + # We switch columns around. + targets = targets.select_columns([1, 0]) + + hyperparams_class = ConstructPredictions.ConstructPredictionsPrimitive.metadata.get_hyperparams() + + construct_primitive = ConstructPredictions.ConstructPredictionsPrimitive(hyperparams=hyperparams_class.defaults()) + + call_metadata = construct_primitive.produce(inputs=targets, reference=dataframe) + + dataframe = call_metadata.value + + self.assertEqual(list(dataframe.columns), ['d3mIndex', 'value_3']) + + self._test_metadata(dataframe.metadata) + + + + def _test_metadata(self, metadata, no_metadata=False): + self.maxDiff = None + + self.assertEqual(test_utils.convert_through_json(metadata.query(())), { + 'schema': metadata_base.CONTAINER_SCHEMA_VERSION, + 'structural_type': 'd3m.container.pandas.DataFrame', + 'semantic_types': [ + 'https://metadata.datadrivendiscovery.org/types/Table', + ], + 'dimension': { + 'name': 'rows', + 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/TabularRow'], + 'length': 1260, + } + }) + + self.assertEqual(test_utils.convert_through_json(metadata.query((metadata_base.ALL_ELEMENTS,))), { + 'dimension': { + 'name': 'columns', + 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/TabularColumn'], + 'length': 2, + } + }) + + self.assertEqual(test_utils.convert_through_json(metadata.query((metadata_base.ALL_ELEMENTS, 0))), { + 'name': 'd3mIndex', + 'structural_type': 'str', + 'semantic_types': [ + 'http://schema.org/Integer', + 'https://metadata.datadrivendiscovery.org/types/PrimaryKey', + ], + }) + + if no_metadata: + self.assertEqual(test_utils.convert_through_json(metadata.query((metadata_base.ALL_ELEMENTS, 1))), { + 'name': 'value_3', + 'structural_type': 'str', + 'semantic_types': [ + 'https://metadata.datadrivendiscovery.org/types/Target', + 'https://metadata.datadrivendiscovery.org/types/PredictedTarget', + ], + }) + + else: + self.assertEqual(test_utils.convert_through_json(metadata.query((metadata_base.ALL_ELEMENTS, 1))), { + 'name': 'value_3', + 'structural_type': 'str', + 'semantic_types': [ + 'http://schema.org/Float', + 'https://metadata.datadrivendiscovery.org/types/Target', + 'https://metadata.datadrivendiscovery.org/types/PredictedTarget', + ], + }) + + + +if __name__ == '__main__': + unittest.main() diff --git a/tods/tests/data_processing/test_DatasetToDataFrame.py b/tods/tests/data_processing/test_DatasetToDataFrame.py new file mode 100644 index 0000000..52bc45e --- /dev/null +++ b/tods/tests/data_processing/test_DatasetToDataFrame.py @@ -0,0 +1,80 @@ +import os.path +import unittest + + + +from d3m import container, utils +from d3m.metadata import base as metadata_base + +from tods.data_processing import DatasetToDataframe + +import utils as test_utils + + +class ColumnParserPrimitiveTestCase(unittest.TestCase): + def test_basic(self): + dataset_doc_path = os.path.abspath(os.path.join(os.path.dirname(__file__),'..', '..', '..', 'datasets', 'anomaly','yahoo_sub_5','TRAIN','dataset_TRAIN', 'datasetDoc.json')) + + + dataset = container.Dataset.load('file://{dataset_doc_path}'.format(dataset_doc_path=dataset_doc_path)) + + hyperparams_class = DatasetToDataframe.DatasetToDataFramePrimitive.metadata.get_hyperparams() + + primitive = DatasetToDataframe.DatasetToDataFramePrimitive(hyperparams=hyperparams_class.defaults()) + + call_metadata = primitive.produce(inputs=dataset) + + dataframe = call_metadata.value + + + first_row = list(dataframe.itertuples(index=False, name=None))[0] + + self.assertEqual(first_row, ('0', '1', '12183', '0.0', '3.7166666666667', '5', '2109', '0')) + + self.assertEqual([type(o) for o in first_row], [str,str, str,str, str, str, str, str]) + + self._test_basic_metadata(dataframe.metadata) + + def _test_basic_metadata(self, metadata): + self.maxDiff = None + + self.assertEqual(test_utils.convert_through_json(metadata.query(())), { + 'schema': metadata_base.CONTAINER_SCHEMA_VERSION, + 'structural_type': 'd3m.container.pandas.DataFrame', + 'semantic_types': [ + 'https://metadata.datadrivendiscovery.org/types/Table', + ], + 'dimension': { + 'name': 'rows', + 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/TabularRow'], + 'length': 1260, + } + }) + + self.assertEqual(test_utils.convert_through_json(metadata.query((metadata_base.ALL_ELEMENTS,))), { + 'dimension': { + 'name': 'columns', + 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/TabularColumn'], + 'length': 8, + } + }) + + + self.assertEqual(test_utils.convert_through_json(metadata.query((metadata_base.ALL_ELEMENTS, 0))), {'name': 'd3mIndex', 'structural_type': 'str', 'semantic_types': ['http://schema.org/Integer', 'https://metadata.datadrivendiscovery.org/types/PrimaryKey']}) + self.assertEqual(test_utils.convert_through_json(metadata.query((metadata_base.ALL_ELEMENTS, 1))), {'name': 'timestamp', 'structural_type': 'str', 'semantic_types': ['http://schema.org/Integer', 'https://metadata.datadrivendiscovery.org/types/Attribute']}) + self.assertEqual(test_utils.convert_through_json(metadata.query((metadata_base.ALL_ELEMENTS, 2))), {'name': 'value_0', 'structural_type': 'str', 'semantic_types': ['http://schema.org/Float', 'https://metadata.datadrivendiscovery.org/types/Attribute']}) + + self.assertEqual(test_utils.convert_through_json(metadata.query((metadata_base.ALL_ELEMENTS, 3))), {'name': 'value_1', 'structural_type': 'str', 'semantic_types': ['http://schema.org/Float', 'https://metadata.datadrivendiscovery.org/types/Attribute']}) + + self.assertEqual(test_utils.convert_through_json(metadata.query((metadata_base.ALL_ELEMENTS, 4))), {'name': 'value_2', 'structural_type': 'str', 'semantic_types': ['http://schema.org/Float', 'https://metadata.datadrivendiscovery.org/types/Attribute']}) + + self.assertEqual(test_utils.convert_through_json(metadata.query((metadata_base.ALL_ELEMENTS, 5))), {'name': 'value_3', 'structural_type': 'str', 'semantic_types': ['http://schema.org/Float', 'https://metadata.datadrivendiscovery.org/types/Attribute']}) + + self.assertEqual(test_utils.convert_through_json(metadata.query((metadata_base.ALL_ELEMENTS, 6))), {'name': 'value_4', 'structural_type': 'str', 'semantic_types': ['http://schema.org/Float', 'https://metadata.datadrivendiscovery.org/types/Attribute']}) + + self.assertEqual(test_utils.convert_through_json(metadata.query((metadata_base.ALL_ELEMENTS, 7))), {'name': 'ground_truth', 'structural_type': 'str', 'semantic_types': ['http://schema.org/Integer', 'https://metadata.datadrivendiscovery.org/types/SuggestedTarget', 'https://metadata.datadrivendiscovery.org/types/Attribute']}) + + + +if __name__ == '__main__': + unittest.main() diff --git a/tods/tests/data_processing/test_ExtractColumnsBySemanticTypes.py b/tods/tests/data_processing/test_ExtractColumnsBySemanticTypes.py new file mode 100644 index 0000000..f0ecf57 --- /dev/null +++ b/tods/tests/data_processing/test_ExtractColumnsBySemanticTypes.py @@ -0,0 +1,107 @@ +import os.path +import unittest + + + +from d3m import container, utils +from d3m.metadata import base as metadata_base + +from tods.data_processing import DatasetToDataframe, ExtractColumnsBySemanticTypes + +import utils as test_utils + + +class ExtractColumnsBySemanticTypePrimitiveTestCase(unittest.TestCase): + def test_basic(self): + dataset_doc_path = os.path.abspath(os.path.join(os.path.dirname(__file__),'..', '..', '..', 'datasets', 'anomaly','yahoo_sub_5','TRAIN','dataset_TRAIN', 'datasetDoc.json')) + + dataset = container.Dataset.load('file://{dataset_doc_path}'.format(dataset_doc_path=dataset_doc_path)) + + # We set semantic types like runtime would. + dataset.metadata = dataset.metadata.add_semantic_type(('learningData', metadata_base.ALL_ELEMENTS, 7), 'https://metadata.datadrivendiscovery.org/types/Target') + dataset.metadata = dataset.metadata.add_semantic_type(('learningData', metadata_base.ALL_ELEMENTS, 7), 'https://metadata.datadrivendiscovery.org/types/TrueTarget') + dataset.metadata = dataset.metadata.remove_semantic_type(('learningData', metadata_base.ALL_ELEMENTS, 7), 'https://metadata.datadrivendiscovery.org/types/Attribute') + + hyperparams_class = DatasetToDataframe.DatasetToDataFramePrimitive.metadata.get_hyperparams() + + primitive = DatasetToDataframe.DatasetToDataFramePrimitive(hyperparams=hyperparams_class.defaults()) + + call_metadata = primitive.produce(inputs=dataset) + + dataframe = call_metadata.value + + hyperparams_class = ExtractColumnsBySemanticTypes.ExtractColumnsBySemanticTypesPrimitive.metadata.get_hyperparams() + + primitive = ExtractColumnsBySemanticTypes.ExtractColumnsBySemanticTypesPrimitive(hyperparams=hyperparams_class.defaults().replace({'semantic_types': ('https://metadata.datadrivendiscovery.org/types/Attribute', 'https://metadata.datadrivendiscovery.org/types/PrimaryKey')})) + + call_metadata = primitive.produce(inputs=dataframe) + + dataframe = call_metadata.value + + self._test_metadata(dataframe.metadata) + + def _test_metadata(self, metadata): + self.maxDiff = None + + self.assertEqual(test_utils.convert_through_json(metadata.query(())), { + 'schema': metadata_base.CONTAINER_SCHEMA_VERSION, + 'structural_type': 'd3m.container.pandas.DataFrame', + 'semantic_types': [ + 'https://metadata.datadrivendiscovery.org/types/Table', + ], + 'dimension': { + 'name': 'rows', + 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/TabularRow'], + 'length': 1260, + } + }) + + self.assertEqual(test_utils.convert_through_json(metadata.query((metadata_base.ALL_ELEMENTS,))), { + 'dimension': { + 'name': 'columns', + 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/TabularColumn'], + 'length': 7, + } + }) + + self.assertEqual(test_utils.convert_through_json(metadata.query((metadata_base.ALL_ELEMENTS, 0))), { + 'name': 'd3mIndex', + 'structural_type': 'str', + 'semantic_types': [ + 'http://schema.org/Integer', + 'https://metadata.datadrivendiscovery.org/types/PrimaryKey', + ], + }) + + self.assertEqual(test_utils.convert_through_json(metadata.query((metadata_base.ALL_ELEMENTS, 0))), + {'name': 'd3mIndex', 'structural_type': 'str', 'semantic_types': ['http://schema.org/Integer', + 'https://metadata.datadrivendiscovery.org/types/PrimaryKey']}) + self.assertEqual(test_utils.convert_through_json(metadata.query((metadata_base.ALL_ELEMENTS, 1))), + {'name': 'timestamp', 'structural_type': 'str', 'semantic_types': ['http://schema.org/Integer', + 'https://metadata.datadrivendiscovery.org/types/Attribute']}) + self.assertEqual(test_utils.convert_through_json(metadata.query((metadata_base.ALL_ELEMENTS, 2))), + {'name': 'value_0', 'structural_type': 'str', 'semantic_types': ['http://schema.org/Float', + 'https://metadata.datadrivendiscovery.org/types/Attribute']}) + + self.assertEqual(test_utils.convert_through_json(metadata.query((metadata_base.ALL_ELEMENTS, 3))), + {'name': 'value_1', 'structural_type': 'str', 'semantic_types': ['http://schema.org/Float', + 'https://metadata.datadrivendiscovery.org/types/Attribute']}) + + self.assertEqual(test_utils.convert_through_json(metadata.query((metadata_base.ALL_ELEMENTS, 4))), + {'name': 'value_2', 'structural_type': 'str', 'semantic_types': ['http://schema.org/Float', + 'https://metadata.datadrivendiscovery.org/types/Attribute']}) + + self.assertEqual(test_utils.convert_through_json(metadata.query((metadata_base.ALL_ELEMENTS, 5))), + {'name': 'value_3', 'structural_type': 'str', 'semantic_types': ['http://schema.org/Float', + 'https://metadata.datadrivendiscovery.org/types/Attribute']}) + + self.assertEqual(test_utils.convert_through_json(metadata.query((metadata_base.ALL_ELEMENTS, 6))), + {'name': 'value_4', 'structural_type': 'str', 'semantic_types': ['http://schema.org/Float', + 'https://metadata.datadrivendiscovery.org/types/Attribute']}) + + + + + +if __name__ == '__main__': + unittest.main() diff --git a/tods/tests/data_processing/test_SKImputer.py b/tods/tests/data_processing/test_SKImputer.py new file mode 100644 index 0000000..6fdef4f --- /dev/null +++ b/tods/tests/data_processing/test_SKImputer.py @@ -0,0 +1,92 @@ +import unittest +import numpy +from d3m import container, utils +from d3m.metadata import base as metadata_base + +from tods.data_processing import SKImputer + + +class SkImputerTestCase(unittest.TestCase): + def test_basic(self): + + main = container.DataFrame({'timestamp': [1,2,3,5], 'a': [numpy.nan,2.0,3.0,4.0],'b':[1.0,4.0,5.0,6.0]},columns=['timestamp', 'a', 'b'], + generate_metadata=True) + + + + + self.assertEqual(utils.to_json_structure(main.metadata.to_internal_simple_structure()), [{ + 'selector': [], + 'metadata': { + # 'top_level': 'main', + 'schema': metadata_base.CONTAINER_SCHEMA_VERSION, + 'structural_type': 'd3m.container.pandas.DataFrame', + 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/Table'], + 'dimension': { + 'name': 'rows', + 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/TabularRow'], + 'length': 4, + }, + }, + }, { + 'selector': ['__ALL_ELEMENTS__'], + 'metadata': { + 'dimension': { + 'name': 'columns', + 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/TabularColumn'], + 'length': 3, + }, + }, + }, { + 'selector': ['__ALL_ELEMENTS__', 0], + 'metadata': {'structural_type': 'numpy.int64', 'name': 'timestamp'}, + }, { + 'selector': ['__ALL_ELEMENTS__', 1], + 'metadata': {'structural_type': 'numpy.float64', 'name': 'a'}, + }, { + 'selector': ['__ALL_ELEMENTS__', 2], + 'metadata': {'structural_type': 'numpy.float64', 'name': 'b'}, + }]) + + hyperparams_class = SKImputer.SKImputerPrimitive.metadata.get_hyperparams() + + primitive = SKImputer.SKImputerPrimitive(hyperparams=hyperparams_class.defaults()) + primitive.set_training_data(inputs=main) + primitive.fit() + output_main = primitive.produce(inputs=main).value + print(output_main[['timestamp','a','b']].values.tolist()) + expected_output = container.DataFrame({'timestamp': [1,2,3,5], 'a': [3.0,2.0,3.0,4.0],'b': [1.0,4.0,5.0,6.0]}) + + self.assertEqual(output_main[['timestamp','a','b']].values.tolist() , expected_output[['timestamp','a','b']].values.tolist()) + + self.assertEqual(utils.to_json_structure(output_main.metadata.to_internal_simple_structure()), [{'metadata': {'dimension': {'length': 4, + 'name': 'rows', + 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/TabularRow']}, + 'schema': 'https://metadata.datadrivendiscovery.org/schemas/v0/container.json', + 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/Table'], + 'structural_type': 'd3m.container.pandas.DataFrame'}, + 'selector': []}, + {'metadata': {'dimension': {'length': 3, + 'name': 'columns', + 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/TabularColumn']}}, + 'selector': ['__ALL_ELEMENTS__']}, + {'metadata': {'name': 'timestamp', + 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/Attribute'], + 'structural_type': 'numpy.float64'}, + 'selector': ['__ALL_ELEMENTS__', 0]}, + {'metadata': {'name': 'a', + 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/Attribute'], + 'structural_type': 'numpy.float64'}, + 'selector': ['__ALL_ELEMENTS__', 1]}, + {'metadata': {'name': 'b', + 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/Attribute'], + 'structural_type': 'numpy.float64'}, + 'selector': ['__ALL_ELEMENTS__', 2]}]) + + params = primitive.get_params() + primitive.set_params(params=params) + + +if __name__ == '__main__': + unittest.main() +