Former-commit-id:master46d43eb3b5
[formerly1d4e5b5978
] [formerlya7894f60c8
[formerly86280ce9c9
]] [formerly7686434aa5
[formerlye5c8bc392f
] [formerly2076f136ea
[formerlyc5cfc9d1bb
]]] [formerlya3a7cc3fe7
[formerly6633595b58
] [formerlya772904a10
[formerly5bfb3d73e1
]] [formerly86ac3f80fb
[formerly35bc6c859c
] [formerlydce21b3e9f
[formerlyf50fde7be7
]]]] [formerlyf3e5247676
[formerly84725abec9
] [formerly4c25a20f65
[formerly35d1f43703
]] [formerly6964458a6c
[formerlya1b9b56b0b
] [formerly0b36b85970
[formerly846ad9e311
]]] [formerly10b203765c
[formerly69ef8d335b
] [formerlye568fea250
[formerlyc8a7e5a298
]] [formerly55d575e702
[formerly5d41bad9ce
] [formerly4f609302d6
[formerly8e4b5ee9eb
]]]]] [formerly0761ffdc45
[formerlyc971130f03
] [formerly5165328709
[formerlyb3ba5008d7
]] [formerlyb4c9a49bb0
[formerly802c0e4689
] [formerlye190c9f527
[formerlyf5f784b68e
]]] [formerlyae73ed9c36
[formerlya8ebcc4350
] [formerly480f95ca28
[formerly8ba2b06eac
]] [formerly7dfc73aa22
[formerlyd93cbb45c8
] [formerlyc68dfe583b
[formerlyc1fc0f62b3
]]]] [formerly82a666a3be
[formerly72bcd362be
] [formerly33282bc59c
[formerly51a8423b9b
]] [formerly637d5cf49e
[formerly88aa198ea9
] [formerly04cd6d589c
[formerly25b48a24bb
]]] [formerlyb7bc4c1916
[formerly33aba55991
] [formerly253bffa715
[formerly774a63133b
]] [formerly61678b32fc
[formerly54a92a4646
] [formerlycd7e572aee
[formerly12507d77c8
]]]]]] Former-commit-id:be11f7e68b
[formerlycf5d9f6bac
] [formerly192df6ba14
[formerlyc78dd9a770
]] [formerlye9599f9340
[formerly5fa597d99e
] [formerly1c0a81ef4a
[formerlyfdb69b1b99
]]] [formerly2aaa17cc05
[formerly9d981eabc2
] [formerly1cb91b70d6
[formerlya6b6898d45
]] [formerly46915532fa
[formerlye63b6759a8
] [formerly507bf27884
[formerly4787017cab
]]]] [formerly7e60834b56
[formerly0fe096fe46
] [formerly8ece7e0be0
[formerly126ce647c6
]] [formerly0f1dea464e
[formerly4bea559051
] [formerly9f4af75ee3
[formerlye51c173ac4
]]] [formerly5290099d42
[formerly4f98c634c1
] [formerlyb5661390c0
[formerly01f4dc5b81
]] [formerly1ffc2ce3b2
[formerlyb6cf5f21e8
] [formerlycd7e572aee
]]]] Former-commit-id:e00e93f654
[formerlyf465f93152
] [formerly96bf6cd1e9
[formerlye3cb872e95
]] [formerly7bbf6de45a
[formerly6246bc436f
] [formerlyf7e6badd78
[formerly4a56039409
]]] [formerlyb9951098fb
[formerly81d285898e
] [formerlybb34629983
[formerly13ef5cc298
]] [formerly9ab9f3457d
[formerly7ddec8785b
] [formerly42efd4d2ec
[formerly8eb58743bb
]]]] Former-commit-id:a1afe3cce5
[formerly91fab9355d
] [formerlydc19a95fa7
[formerlyb4e22f8abd
]] [formerlyb2f2b27610
[formerlyaa7160ec36
] [formerly67a7bef302
[formerlyc3f9e539d4
]]] Former-commit-id:8a4a143a49
[formerly499606f224
] [formerly98e3acbe03
[formerly042fc1dde9
]] Former-commit-id:659c4020eb
[formerly925f9c9262
] Former-commit-id:9686f047ab
@@ -0,0 +1,98 @@ | |||
import os.path | |||
import unittest | |||
from d3m import container, utils | |||
from d3m.metadata import base as metadata_base | |||
from tods.data_processing import DatasetToDataframe, ColumnParser | |||
import utils as test_utils | |||
class ColumnParserPrimitiveTestCase(unittest.TestCase): | |||
def test_basic(self): | |||
dataset_doc_path = os.path.abspath(os.path.join(os.path.dirname(__file__),'..', '..', '..', 'datasets', 'anomaly','yahoo_sub_5','TRAIN','dataset_TRAIN', 'datasetDoc.json')) | |||
dataset = container.Dataset.load('file://{dataset_doc_path}'.format(dataset_doc_path=dataset_doc_path)) | |||
hyperparams_class = DatasetToDataframe.DatasetToDataFramePrimitive.metadata.get_hyperparams() | |||
primitive = DatasetToDataframe.DatasetToDataFramePrimitive(hyperparams=hyperparams_class.defaults()) | |||
call_metadata = primitive.produce(inputs=dataset) | |||
dataframe = call_metadata.value | |||
hyperparams_class = ColumnParser.ColumnParserPrimitive.metadata.get_hyperparams() | |||
primitive = ColumnParser.ColumnParserPrimitive(hyperparams=hyperparams_class.defaults()) | |||
call_metadata = primitive.produce(inputs=dataframe) | |||
dataframe = call_metadata.value | |||
first_row = list(dataframe.itertuples(index=False, name=None))[0] | |||
self.assertEqual(first_row, (0, 1, 12183.0, 0.0, 3.7166666666667, 5.0, 2109.0, 0)) | |||
self.assertEqual([type(o) for o in first_row], [int,int, float,float, float, float, float, int]) | |||
self._test_basic_metadata(dataframe.metadata) | |||
def _test_basic_metadata(self, metadata): | |||
self.maxDiff = None | |||
self.assertEqual(test_utils.convert_through_json(metadata.query(())), { | |||
'schema': metadata_base.CONTAINER_SCHEMA_VERSION, | |||
'structural_type': 'd3m.container.pandas.DataFrame', | |||
'semantic_types': [ | |||
'https://metadata.datadrivendiscovery.org/types/Table', | |||
], | |||
'dimension': { | |||
'name': 'rows', | |||
'semantic_types': ['https://metadata.datadrivendiscovery.org/types/TabularRow'], | |||
'length': 1260, | |||
} | |||
}) | |||
self.assertEqual(test_utils.convert_through_json(metadata.query((metadata_base.ALL_ELEMENTS,))), { | |||
'dimension': { | |||
'name': 'columns', | |||
'semantic_types': ['https://metadata.datadrivendiscovery.org/types/TabularColumn'], | |||
'length': 8, | |||
} | |||
}) | |||
self.assertEqual(test_utils.convert_through_json(metadata.query((metadata_base.ALL_ELEMENTS, 0))), { | |||
'name': 'd3mIndex', | |||
'structural_type': 'int', | |||
'semantic_types': [ | |||
'http://schema.org/Integer', | |||
'https://metadata.datadrivendiscovery.org/types/PrimaryKey', | |||
], | |||
}) | |||
self.assertEqual(test_utils.convert_through_json(metadata.query((metadata_base.ALL_ELEMENTS, 0))), {'name': 'd3mIndex', 'structural_type': 'int', 'semantic_types': ['http://schema.org/Integer', 'https://metadata.datadrivendiscovery.org/types/PrimaryKey']}) | |||
self.assertEqual(test_utils.convert_through_json(metadata.query((metadata_base.ALL_ELEMENTS, 1))), {'name': 'timestamp', 'structural_type': 'int', 'semantic_types': ['http://schema.org/Integer', 'https://metadata.datadrivendiscovery.org/types/Attribute']}) | |||
self.assertEqual(test_utils.convert_through_json(metadata.query((metadata_base.ALL_ELEMENTS, 2))), {'name': 'value_0', 'structural_type': 'float', 'semantic_types': ['http://schema.org/Float', 'https://metadata.datadrivendiscovery.org/types/Attribute']}) | |||
self.assertEqual(test_utils.convert_through_json(metadata.query((metadata_base.ALL_ELEMENTS, 3))), {'name': 'value_1', 'structural_type': 'float', 'semantic_types': ['http://schema.org/Float', 'https://metadata.datadrivendiscovery.org/types/Attribute']}) | |||
self.assertEqual(test_utils.convert_through_json(metadata.query((metadata_base.ALL_ELEMENTS, 4))), {'name': 'value_2', 'structural_type': 'float', 'semantic_types': ['http://schema.org/Float', 'https://metadata.datadrivendiscovery.org/types/Attribute']}) | |||
self.assertEqual(test_utils.convert_through_json(metadata.query((metadata_base.ALL_ELEMENTS, 5))), {'name': 'value_3', 'structural_type': 'float', 'semantic_types': ['http://schema.org/Float', 'https://metadata.datadrivendiscovery.org/types/Attribute']}) | |||
self.assertEqual(test_utils.convert_through_json(metadata.query((metadata_base.ALL_ELEMENTS, 6))), {'name': 'value_4', 'structural_type': 'float', 'semantic_types': ['http://schema.org/Float', 'https://metadata.datadrivendiscovery.org/types/Attribute']}) | |||
self.assertEqual(test_utils.convert_through_json(metadata.query((metadata_base.ALL_ELEMENTS, 7))), {'name': 'ground_truth', 'structural_type': 'int', 'semantic_types': ['http://schema.org/Integer', 'https://metadata.datadrivendiscovery.org/types/SuggestedTarget', 'https://metadata.datadrivendiscovery.org/types/Attribute']}) | |||
if __name__ == '__main__': | |||
unittest.main() |
@@ -0,0 +1,131 @@ | |||
import copy | |||
import os | |||
import unittest | |||
import numpy | |||
from d3m import container | |||
from d3m.metadata import base as metadata_base | |||
from tods.data_processing import DatasetToDataframe , ConstructPredictions , ExtractColumnsBySemanticTypes | |||
import utils as test_utils | |||
class ConstructPredictionsPrimitiveTestCase(unittest.TestCase): | |||
# TODO: Make this part of metadata API. | |||
# Something like setting a semantic type for given columns. | |||
def _mark_all_targets(self, dataset, targets): | |||
for target in targets: | |||
dataset.metadata = dataset.metadata.add_semantic_type((target['resource_id'], metadata_base.ALL_ELEMENTS, target['column_index']), 'https://metadata.datadrivendiscovery.org/types/Target') | |||
dataset.metadata = dataset.metadata.add_semantic_type((target['resource_id'], metadata_base.ALL_ELEMENTS, target['column_index']), 'https://metadata.datadrivendiscovery.org/types/TrueTarget') | |||
dataset.metadata = dataset.metadata.remove_semantic_type((target['resource_id'], metadata_base.ALL_ELEMENTS, target['column_index']), 'https://metadata.datadrivendiscovery.org/types/Attribute') | |||
def _get_yahoo_dataframe(self): | |||
dataset_doc_path = os.path.abspath(os.path.join(os.path.dirname(__file__),'..', '..', '..', 'datasets', 'anomaly','yahoo_sub_5','TRAIN','dataset_TRAIN', 'datasetDoc.json')) | |||
dataset = container.Dataset.load('file://{dataset_doc_path}'.format(dataset_doc_path=dataset_doc_path)) | |||
self._mark_all_targets(dataset, [{'resource_id': 'learningData', 'column_index': 5}]) | |||
hyperparams_class = DatasetToDataframe.DatasetToDataFramePrimitive.metadata.get_hyperparams() | |||
primitive = DatasetToDataframe.DatasetToDataFramePrimitive(hyperparams=hyperparams_class.defaults()) | |||
call_metadata = primitive.produce(inputs=dataset) | |||
dataframe = call_metadata.value | |||
return dataframe | |||
def test_correct_order(self): | |||
dataframe = self._get_yahoo_dataframe() | |||
hyperparams_class = ExtractColumnsBySemanticTypes.ExtractColumnsBySemanticTypesPrimitive.metadata.get_hyperparams() | |||
# We extract both the primary index and targets. So it is in the output format already. | |||
primitive = ExtractColumnsBySemanticTypes.ExtractColumnsBySemanticTypesPrimitive(hyperparams=hyperparams_class.defaults().replace({'semantic_types': ('https://metadata.datadrivendiscovery.org/types/PrimaryKey', 'https://metadata.datadrivendiscovery.org/types/Target',)})) | |||
call_metadata = primitive.produce(inputs=dataframe) | |||
targets = call_metadata.value | |||
# We pretend these are our predictions. | |||
targets.metadata = targets.metadata.remove_semantic_type((metadata_base.ALL_ELEMENTS, 1), 'https://metadata.datadrivendiscovery.org/types/TrueTarget') | |||
targets.metadata = targets.metadata.add_semantic_type((metadata_base.ALL_ELEMENTS, 1), 'https://metadata.datadrivendiscovery.org/types/PredictedTarget') | |||
# We switch columns around. | |||
targets = targets.select_columns([1, 0]) | |||
hyperparams_class = ConstructPredictions.ConstructPredictionsPrimitive.metadata.get_hyperparams() | |||
construct_primitive = ConstructPredictions.ConstructPredictionsPrimitive(hyperparams=hyperparams_class.defaults()) | |||
call_metadata = construct_primitive.produce(inputs=targets, reference=dataframe) | |||
dataframe = call_metadata.value | |||
self.assertEqual(list(dataframe.columns), ['d3mIndex', 'value_3']) | |||
self._test_metadata(dataframe.metadata) | |||
def _test_metadata(self, metadata, no_metadata=False): | |||
self.maxDiff = None | |||
self.assertEqual(test_utils.convert_through_json(metadata.query(())), { | |||
'schema': metadata_base.CONTAINER_SCHEMA_VERSION, | |||
'structural_type': 'd3m.container.pandas.DataFrame', | |||
'semantic_types': [ | |||
'https://metadata.datadrivendiscovery.org/types/Table', | |||
], | |||
'dimension': { | |||
'name': 'rows', | |||
'semantic_types': ['https://metadata.datadrivendiscovery.org/types/TabularRow'], | |||
'length': 1260, | |||
} | |||
}) | |||
self.assertEqual(test_utils.convert_through_json(metadata.query((metadata_base.ALL_ELEMENTS,))), { | |||
'dimension': { | |||
'name': 'columns', | |||
'semantic_types': ['https://metadata.datadrivendiscovery.org/types/TabularColumn'], | |||
'length': 2, | |||
} | |||
}) | |||
self.assertEqual(test_utils.convert_through_json(metadata.query((metadata_base.ALL_ELEMENTS, 0))), { | |||
'name': 'd3mIndex', | |||
'structural_type': 'str', | |||
'semantic_types': [ | |||
'http://schema.org/Integer', | |||
'https://metadata.datadrivendiscovery.org/types/PrimaryKey', | |||
], | |||
}) | |||
if no_metadata: | |||
self.assertEqual(test_utils.convert_through_json(metadata.query((metadata_base.ALL_ELEMENTS, 1))), { | |||
'name': 'value_3', | |||
'structural_type': 'str', | |||
'semantic_types': [ | |||
'https://metadata.datadrivendiscovery.org/types/Target', | |||
'https://metadata.datadrivendiscovery.org/types/PredictedTarget', | |||
], | |||
}) | |||
else: | |||
self.assertEqual(test_utils.convert_through_json(metadata.query((metadata_base.ALL_ELEMENTS, 1))), { | |||
'name': 'value_3', | |||
'structural_type': 'str', | |||
'semantic_types': [ | |||
'http://schema.org/Float', | |||
'https://metadata.datadrivendiscovery.org/types/Target', | |||
'https://metadata.datadrivendiscovery.org/types/PredictedTarget', | |||
], | |||
}) | |||
if __name__ == '__main__': | |||
unittest.main() |
@@ -0,0 +1,80 @@ | |||
import os.path | |||
import unittest | |||
from d3m import container, utils | |||
from d3m.metadata import base as metadata_base | |||
from tods.data_processing import DatasetToDataframe | |||
import utils as test_utils | |||
class ColumnParserPrimitiveTestCase(unittest.TestCase): | |||
def test_basic(self): | |||
dataset_doc_path = os.path.abspath(os.path.join(os.path.dirname(__file__),'..', '..', '..', 'datasets', 'anomaly','yahoo_sub_5','TRAIN','dataset_TRAIN', 'datasetDoc.json')) | |||
dataset = container.Dataset.load('file://{dataset_doc_path}'.format(dataset_doc_path=dataset_doc_path)) | |||
hyperparams_class = DatasetToDataframe.DatasetToDataFramePrimitive.metadata.get_hyperparams() | |||
primitive = DatasetToDataframe.DatasetToDataFramePrimitive(hyperparams=hyperparams_class.defaults()) | |||
call_metadata = primitive.produce(inputs=dataset) | |||
dataframe = call_metadata.value | |||
first_row = list(dataframe.itertuples(index=False, name=None))[0] | |||
self.assertEqual(first_row, ('0', '1', '12183', '0.0', '3.7166666666667', '5', '2109', '0')) | |||
self.assertEqual([type(o) for o in first_row], [str,str, str,str, str, str, str, str]) | |||
self._test_basic_metadata(dataframe.metadata) | |||
def _test_basic_metadata(self, metadata): | |||
self.maxDiff = None | |||
self.assertEqual(test_utils.convert_through_json(metadata.query(())), { | |||
'schema': metadata_base.CONTAINER_SCHEMA_VERSION, | |||
'structural_type': 'd3m.container.pandas.DataFrame', | |||
'semantic_types': [ | |||
'https://metadata.datadrivendiscovery.org/types/Table', | |||
], | |||
'dimension': { | |||
'name': 'rows', | |||
'semantic_types': ['https://metadata.datadrivendiscovery.org/types/TabularRow'], | |||
'length': 1260, | |||
} | |||
}) | |||
self.assertEqual(test_utils.convert_through_json(metadata.query((metadata_base.ALL_ELEMENTS,))), { | |||
'dimension': { | |||
'name': 'columns', | |||
'semantic_types': ['https://metadata.datadrivendiscovery.org/types/TabularColumn'], | |||
'length': 8, | |||
} | |||
}) | |||
self.assertEqual(test_utils.convert_through_json(metadata.query((metadata_base.ALL_ELEMENTS, 0))), {'name': 'd3mIndex', 'structural_type': 'str', 'semantic_types': ['http://schema.org/Integer', 'https://metadata.datadrivendiscovery.org/types/PrimaryKey']}) | |||
self.assertEqual(test_utils.convert_through_json(metadata.query((metadata_base.ALL_ELEMENTS, 1))), {'name': 'timestamp', 'structural_type': 'str', 'semantic_types': ['http://schema.org/Integer', 'https://metadata.datadrivendiscovery.org/types/Attribute']}) | |||
self.assertEqual(test_utils.convert_through_json(metadata.query((metadata_base.ALL_ELEMENTS, 2))), {'name': 'value_0', 'structural_type': 'str', 'semantic_types': ['http://schema.org/Float', 'https://metadata.datadrivendiscovery.org/types/Attribute']}) | |||
self.assertEqual(test_utils.convert_through_json(metadata.query((metadata_base.ALL_ELEMENTS, 3))), {'name': 'value_1', 'structural_type': 'str', 'semantic_types': ['http://schema.org/Float', 'https://metadata.datadrivendiscovery.org/types/Attribute']}) | |||
self.assertEqual(test_utils.convert_through_json(metadata.query((metadata_base.ALL_ELEMENTS, 4))), {'name': 'value_2', 'structural_type': 'str', 'semantic_types': ['http://schema.org/Float', 'https://metadata.datadrivendiscovery.org/types/Attribute']}) | |||
self.assertEqual(test_utils.convert_through_json(metadata.query((metadata_base.ALL_ELEMENTS, 5))), {'name': 'value_3', 'structural_type': 'str', 'semantic_types': ['http://schema.org/Float', 'https://metadata.datadrivendiscovery.org/types/Attribute']}) | |||
self.assertEqual(test_utils.convert_through_json(metadata.query((metadata_base.ALL_ELEMENTS, 6))), {'name': 'value_4', 'structural_type': 'str', 'semantic_types': ['http://schema.org/Float', 'https://metadata.datadrivendiscovery.org/types/Attribute']}) | |||
self.assertEqual(test_utils.convert_through_json(metadata.query((metadata_base.ALL_ELEMENTS, 7))), {'name': 'ground_truth', 'structural_type': 'str', 'semantic_types': ['http://schema.org/Integer', 'https://metadata.datadrivendiscovery.org/types/SuggestedTarget', 'https://metadata.datadrivendiscovery.org/types/Attribute']}) | |||
if __name__ == '__main__': | |||
unittest.main() |
@@ -0,0 +1,107 @@ | |||
import os.path | |||
import unittest | |||
from d3m import container, utils | |||
from d3m.metadata import base as metadata_base | |||
from tods.data_processing import DatasetToDataframe, ExtractColumnsBySemanticTypes | |||
import utils as test_utils | |||
class ExtractColumnsBySemanticTypePrimitiveTestCase(unittest.TestCase): | |||
def test_basic(self): | |||
dataset_doc_path = os.path.abspath(os.path.join(os.path.dirname(__file__),'..', '..', '..', 'datasets', 'anomaly','yahoo_sub_5','TRAIN','dataset_TRAIN', 'datasetDoc.json')) | |||
dataset = container.Dataset.load('file://{dataset_doc_path}'.format(dataset_doc_path=dataset_doc_path)) | |||
# We set semantic types like runtime would. | |||
dataset.metadata = dataset.metadata.add_semantic_type(('learningData', metadata_base.ALL_ELEMENTS, 7), 'https://metadata.datadrivendiscovery.org/types/Target') | |||
dataset.metadata = dataset.metadata.add_semantic_type(('learningData', metadata_base.ALL_ELEMENTS, 7), 'https://metadata.datadrivendiscovery.org/types/TrueTarget') | |||
dataset.metadata = dataset.metadata.remove_semantic_type(('learningData', metadata_base.ALL_ELEMENTS, 7), 'https://metadata.datadrivendiscovery.org/types/Attribute') | |||
hyperparams_class = DatasetToDataframe.DatasetToDataFramePrimitive.metadata.get_hyperparams() | |||
primitive = DatasetToDataframe.DatasetToDataFramePrimitive(hyperparams=hyperparams_class.defaults()) | |||
call_metadata = primitive.produce(inputs=dataset) | |||
dataframe = call_metadata.value | |||
hyperparams_class = ExtractColumnsBySemanticTypes.ExtractColumnsBySemanticTypesPrimitive.metadata.get_hyperparams() | |||
primitive = ExtractColumnsBySemanticTypes.ExtractColumnsBySemanticTypesPrimitive(hyperparams=hyperparams_class.defaults().replace({'semantic_types': ('https://metadata.datadrivendiscovery.org/types/Attribute', 'https://metadata.datadrivendiscovery.org/types/PrimaryKey')})) | |||
call_metadata = primitive.produce(inputs=dataframe) | |||
dataframe = call_metadata.value | |||
self._test_metadata(dataframe.metadata) | |||
def _test_metadata(self, metadata): | |||
self.maxDiff = None | |||
self.assertEqual(test_utils.convert_through_json(metadata.query(())), { | |||
'schema': metadata_base.CONTAINER_SCHEMA_VERSION, | |||
'structural_type': 'd3m.container.pandas.DataFrame', | |||
'semantic_types': [ | |||
'https://metadata.datadrivendiscovery.org/types/Table', | |||
], | |||
'dimension': { | |||
'name': 'rows', | |||
'semantic_types': ['https://metadata.datadrivendiscovery.org/types/TabularRow'], | |||
'length': 1260, | |||
} | |||
}) | |||
self.assertEqual(test_utils.convert_through_json(metadata.query((metadata_base.ALL_ELEMENTS,))), { | |||
'dimension': { | |||
'name': 'columns', | |||
'semantic_types': ['https://metadata.datadrivendiscovery.org/types/TabularColumn'], | |||
'length': 7, | |||
} | |||
}) | |||
self.assertEqual(test_utils.convert_through_json(metadata.query((metadata_base.ALL_ELEMENTS, 0))), { | |||
'name': 'd3mIndex', | |||
'structural_type': 'str', | |||
'semantic_types': [ | |||
'http://schema.org/Integer', | |||
'https://metadata.datadrivendiscovery.org/types/PrimaryKey', | |||
], | |||
}) | |||
self.assertEqual(test_utils.convert_through_json(metadata.query((metadata_base.ALL_ELEMENTS, 0))), | |||
{'name': 'd3mIndex', 'structural_type': 'str', 'semantic_types': ['http://schema.org/Integer', | |||
'https://metadata.datadrivendiscovery.org/types/PrimaryKey']}) | |||
self.assertEqual(test_utils.convert_through_json(metadata.query((metadata_base.ALL_ELEMENTS, 1))), | |||
{'name': 'timestamp', 'structural_type': 'str', 'semantic_types': ['http://schema.org/Integer', | |||
'https://metadata.datadrivendiscovery.org/types/Attribute']}) | |||
self.assertEqual(test_utils.convert_through_json(metadata.query((metadata_base.ALL_ELEMENTS, 2))), | |||
{'name': 'value_0', 'structural_type': 'str', 'semantic_types': ['http://schema.org/Float', | |||
'https://metadata.datadrivendiscovery.org/types/Attribute']}) | |||
self.assertEqual(test_utils.convert_through_json(metadata.query((metadata_base.ALL_ELEMENTS, 3))), | |||
{'name': 'value_1', 'structural_type': 'str', 'semantic_types': ['http://schema.org/Float', | |||
'https://metadata.datadrivendiscovery.org/types/Attribute']}) | |||
self.assertEqual(test_utils.convert_through_json(metadata.query((metadata_base.ALL_ELEMENTS, 4))), | |||
{'name': 'value_2', 'structural_type': 'str', 'semantic_types': ['http://schema.org/Float', | |||
'https://metadata.datadrivendiscovery.org/types/Attribute']}) | |||
self.assertEqual(test_utils.convert_through_json(metadata.query((metadata_base.ALL_ELEMENTS, 5))), | |||
{'name': 'value_3', 'structural_type': 'str', 'semantic_types': ['http://schema.org/Float', | |||
'https://metadata.datadrivendiscovery.org/types/Attribute']}) | |||
self.assertEqual(test_utils.convert_through_json(metadata.query((metadata_base.ALL_ELEMENTS, 6))), | |||
{'name': 'value_4', 'structural_type': 'str', 'semantic_types': ['http://schema.org/Float', | |||
'https://metadata.datadrivendiscovery.org/types/Attribute']}) | |||
if __name__ == '__main__': | |||
unittest.main() |
@@ -0,0 +1,92 @@ | |||
import unittest | |||
import numpy | |||
from d3m import container, utils | |||
from d3m.metadata import base as metadata_base | |||
from tods.data_processing import SKImputer | |||
class SkImputerTestCase(unittest.TestCase): | |||
def test_basic(self): | |||
main = container.DataFrame({'timestamp': [1,2,3,5], 'a': [numpy.nan,2.0,3.0,4.0],'b':[1.0,4.0,5.0,6.0]},columns=['timestamp', 'a', 'b'], | |||
generate_metadata=True) | |||
self.assertEqual(utils.to_json_structure(main.metadata.to_internal_simple_structure()), [{ | |||
'selector': [], | |||
'metadata': { | |||
# 'top_level': 'main', | |||
'schema': metadata_base.CONTAINER_SCHEMA_VERSION, | |||
'structural_type': 'd3m.container.pandas.DataFrame', | |||
'semantic_types': ['https://metadata.datadrivendiscovery.org/types/Table'], | |||
'dimension': { | |||
'name': 'rows', | |||
'semantic_types': ['https://metadata.datadrivendiscovery.org/types/TabularRow'], | |||
'length': 4, | |||
}, | |||
}, | |||
}, { | |||
'selector': ['__ALL_ELEMENTS__'], | |||
'metadata': { | |||
'dimension': { | |||
'name': 'columns', | |||
'semantic_types': ['https://metadata.datadrivendiscovery.org/types/TabularColumn'], | |||
'length': 3, | |||
}, | |||
}, | |||
}, { | |||
'selector': ['__ALL_ELEMENTS__', 0], | |||
'metadata': {'structural_type': 'numpy.int64', 'name': 'timestamp'}, | |||
}, { | |||
'selector': ['__ALL_ELEMENTS__', 1], | |||
'metadata': {'structural_type': 'numpy.float64', 'name': 'a'}, | |||
}, { | |||
'selector': ['__ALL_ELEMENTS__', 2], | |||
'metadata': {'structural_type': 'numpy.float64', 'name': 'b'}, | |||
}]) | |||
hyperparams_class = SKImputer.SKImputerPrimitive.metadata.get_hyperparams() | |||
primitive = SKImputer.SKImputerPrimitive(hyperparams=hyperparams_class.defaults()) | |||
primitive.set_training_data(inputs=main) | |||
primitive.fit() | |||
output_main = primitive.produce(inputs=main).value | |||
print(output_main[['timestamp','a','b']].values.tolist()) | |||
expected_output = container.DataFrame({'timestamp': [1,2,3,5], 'a': [3.0,2.0,3.0,4.0],'b': [1.0,4.0,5.0,6.0]}) | |||
self.assertEqual(output_main[['timestamp','a','b']].values.tolist() , expected_output[['timestamp','a','b']].values.tolist()) | |||
self.assertEqual(utils.to_json_structure(output_main.metadata.to_internal_simple_structure()), [{'metadata': {'dimension': {'length': 4, | |||
'name': 'rows', | |||
'semantic_types': ['https://metadata.datadrivendiscovery.org/types/TabularRow']}, | |||
'schema': 'https://metadata.datadrivendiscovery.org/schemas/v0/container.json', | |||
'semantic_types': ['https://metadata.datadrivendiscovery.org/types/Table'], | |||
'structural_type': 'd3m.container.pandas.DataFrame'}, | |||
'selector': []}, | |||
{'metadata': {'dimension': {'length': 3, | |||
'name': 'columns', | |||
'semantic_types': ['https://metadata.datadrivendiscovery.org/types/TabularColumn']}}, | |||
'selector': ['__ALL_ELEMENTS__']}, | |||
{'metadata': {'name': 'timestamp', | |||
'semantic_types': ['https://metadata.datadrivendiscovery.org/types/Attribute'], | |||
'structural_type': 'numpy.float64'}, | |||
'selector': ['__ALL_ELEMENTS__', 0]}, | |||
{'metadata': {'name': 'a', | |||
'semantic_types': ['https://metadata.datadrivendiscovery.org/types/Attribute'], | |||
'structural_type': 'numpy.float64'}, | |||
'selector': ['__ALL_ELEMENTS__', 1]}, | |||
{'metadata': {'name': 'b', | |||
'semantic_types': ['https://metadata.datadrivendiscovery.org/types/Attribute'], | |||
'structural_type': 'numpy.float64'}, | |||
'selector': ['__ALL_ELEMENTS__', 2]}]) | |||
params = primitive.get_params() | |||
primitive.set_params(params=params) | |||
if __name__ == '__main__': | |||
unittest.main() | |||