Browse Source

test cases added

Former-commit-id: 46d43eb3b5 [formerly 1d4e5b5978] [formerly a7894f60c8 [formerly 86280ce9c9]] [formerly 7686434aa5 [formerly e5c8bc392f] [formerly 2076f136ea [formerly c5cfc9d1bb]]] [formerly a3a7cc3fe7 [formerly 6633595b58] [formerly a772904a10 [formerly 5bfb3d73e1]] [formerly 86ac3f80fb [formerly 35bc6c859c] [formerly dce21b3e9f [formerly f50fde7be7]]]] [formerly f3e5247676 [formerly 84725abec9] [formerly 4c25a20f65 [formerly 35d1f43703]] [formerly 6964458a6c [formerly a1b9b56b0b] [formerly 0b36b85970 [formerly 846ad9e311]]] [formerly 10b203765c [formerly 69ef8d335b] [formerly e568fea250 [formerly c8a7e5a298]] [formerly 55d575e702 [formerly 5d41bad9ce] [formerly 4f609302d6 [formerly 8e4b5ee9eb]]]]] [formerly 0761ffdc45 [formerly c971130f03] [formerly 5165328709 [formerly b3ba5008d7]] [formerly b4c9a49bb0 [formerly 802c0e4689] [formerly e190c9f527 [formerly f5f784b68e]]] [formerly ae73ed9c36 [formerly a8ebcc4350] [formerly 480f95ca28 [formerly 8ba2b06eac]] [formerly 7dfc73aa22 [formerly d93cbb45c8] [formerly c68dfe583b [formerly c1fc0f62b3]]]] [formerly 82a666a3be [formerly 72bcd362be] [formerly 33282bc59c [formerly 51a8423b9b]] [formerly 637d5cf49e [formerly 88aa198ea9] [formerly 04cd6d589c [formerly 25b48a24bb]]] [formerly b7bc4c1916 [formerly 33aba55991] [formerly 253bffa715 [formerly 774a63133b]] [formerly 61678b32fc [formerly 54a92a4646] [formerly cd7e572aee [formerly 12507d77c8]]]]]]
Former-commit-id: be11f7e68b [formerly cf5d9f6bac] [formerly 192df6ba14 [formerly c78dd9a770]] [formerly e9599f9340 [formerly 5fa597d99e] [formerly 1c0a81ef4a [formerly fdb69b1b99]]] [formerly 2aaa17cc05 [formerly 9d981eabc2] [formerly 1cb91b70d6 [formerly a6b6898d45]] [formerly 46915532fa [formerly e63b6759a8] [formerly 507bf27884 [formerly 4787017cab]]]] [formerly 7e60834b56 [formerly 0fe096fe46] [formerly 8ece7e0be0 [formerly 126ce647c6]] [formerly 0f1dea464e [formerly 4bea559051] [formerly 9f4af75ee3 [formerly e51c173ac4]]] [formerly 5290099d42 [formerly 4f98c634c1] [formerly b5661390c0 [formerly 01f4dc5b81]] [formerly 1ffc2ce3b2 [formerly b6cf5f21e8] [formerly cd7e572aee]]]]
Former-commit-id: e00e93f654 [formerly f465f93152] [formerly 96bf6cd1e9 [formerly e3cb872e95]] [formerly 7bbf6de45a [formerly 6246bc436f] [formerly f7e6badd78 [formerly 4a56039409]]] [formerly b9951098fb [formerly 81d285898e] [formerly bb34629983 [formerly 13ef5cc298]] [formerly 9ab9f3457d [formerly 7ddec8785b] [formerly 42efd4d2ec [formerly 8eb58743bb]]]]
Former-commit-id: a1afe3cce5 [formerly 91fab9355d] [formerly dc19a95fa7 [formerly b4e22f8abd]] [formerly b2f2b27610 [formerly aa7160ec36] [formerly 67a7bef302 [formerly c3f9e539d4]]]
Former-commit-id: 8a4a143a49 [formerly 499606f224] [formerly 98e3acbe03 [formerly 042fc1dde9]]
Former-commit-id: 659c4020eb [formerly 925f9c9262]
Former-commit-id: 9686f047ab
master
Devesh Kumar 4 years ago
parent
commit
f8d5668dc1
5 changed files with 508 additions and 0 deletions
  1. +98
    -0
      tods/tests/data_processing/test_ColumnParser.py
  2. +131
    -0
      tods/tests/data_processing/test_ConstructPredictions.py
  3. +80
    -0
      tods/tests/data_processing/test_DatasetToDataFrame.py
  4. +107
    -0
      tods/tests/data_processing/test_ExtractColumnsBySemanticTypes.py
  5. +92
    -0
      tods/tests/data_processing/test_SKImputer.py

+ 98
- 0
tods/tests/data_processing/test_ColumnParser.py View File

@@ -0,0 +1,98 @@

import os.path
import unittest



from d3m import container, utils
from d3m.metadata import base as metadata_base

from tods.data_processing import DatasetToDataframe, ColumnParser

import utils as test_utils


class ColumnParserPrimitiveTestCase(unittest.TestCase):
def test_basic(self):
dataset_doc_path = os.path.abspath(os.path.join(os.path.dirname(__file__),'..', '..', '..', 'datasets', 'anomaly','yahoo_sub_5','TRAIN','dataset_TRAIN', 'datasetDoc.json'))


dataset = container.Dataset.load('file://{dataset_doc_path}'.format(dataset_doc_path=dataset_doc_path))

hyperparams_class = DatasetToDataframe.DatasetToDataFramePrimitive.metadata.get_hyperparams()

primitive = DatasetToDataframe.DatasetToDataFramePrimitive(hyperparams=hyperparams_class.defaults())

call_metadata = primitive.produce(inputs=dataset)

dataframe = call_metadata.value

hyperparams_class = ColumnParser.ColumnParserPrimitive.metadata.get_hyperparams()

primitive = ColumnParser.ColumnParserPrimitive(hyperparams=hyperparams_class.defaults())

call_metadata = primitive.produce(inputs=dataframe)

dataframe = call_metadata.value

first_row = list(dataframe.itertuples(index=False, name=None))[0]

self.assertEqual(first_row, (0, 1, 12183.0, 0.0, 3.7166666666667, 5.0, 2109.0, 0))

self.assertEqual([type(o) for o in first_row], [int,int, float,float, float, float, float, int])

self._test_basic_metadata(dataframe.metadata)

def _test_basic_metadata(self, metadata):
self.maxDiff = None

self.assertEqual(test_utils.convert_through_json(metadata.query(())), {
'schema': metadata_base.CONTAINER_SCHEMA_VERSION,
'structural_type': 'd3m.container.pandas.DataFrame',
'semantic_types': [
'https://metadata.datadrivendiscovery.org/types/Table',
],
'dimension': {
'name': 'rows',
'semantic_types': ['https://metadata.datadrivendiscovery.org/types/TabularRow'],
'length': 1260,
}
})

self.assertEqual(test_utils.convert_through_json(metadata.query((metadata_base.ALL_ELEMENTS,))), {
'dimension': {
'name': 'columns',
'semantic_types': ['https://metadata.datadrivendiscovery.org/types/TabularColumn'],
'length': 8,
}
})

self.assertEqual(test_utils.convert_through_json(metadata.query((metadata_base.ALL_ELEMENTS, 0))), {
'name': 'd3mIndex',
'structural_type': 'int',
'semantic_types': [
'http://schema.org/Integer',
'https://metadata.datadrivendiscovery.org/types/PrimaryKey',
],
})



self.assertEqual(test_utils.convert_through_json(metadata.query((metadata_base.ALL_ELEMENTS, 0))), {'name': 'd3mIndex', 'structural_type': 'int', 'semantic_types': ['http://schema.org/Integer', 'https://metadata.datadrivendiscovery.org/types/PrimaryKey']})
self.assertEqual(test_utils.convert_through_json(metadata.query((metadata_base.ALL_ELEMENTS, 1))), {'name': 'timestamp', 'structural_type': 'int', 'semantic_types': ['http://schema.org/Integer', 'https://metadata.datadrivendiscovery.org/types/Attribute']})
self.assertEqual(test_utils.convert_through_json(metadata.query((metadata_base.ALL_ELEMENTS, 2))), {'name': 'value_0', 'structural_type': 'float', 'semantic_types': ['http://schema.org/Float', 'https://metadata.datadrivendiscovery.org/types/Attribute']})

self.assertEqual(test_utils.convert_through_json(metadata.query((metadata_base.ALL_ELEMENTS, 3))), {'name': 'value_1', 'structural_type': 'float', 'semantic_types': ['http://schema.org/Float', 'https://metadata.datadrivendiscovery.org/types/Attribute']})

self.assertEqual(test_utils.convert_through_json(metadata.query((metadata_base.ALL_ELEMENTS, 4))), {'name': 'value_2', 'structural_type': 'float', 'semantic_types': ['http://schema.org/Float', 'https://metadata.datadrivendiscovery.org/types/Attribute']})

self.assertEqual(test_utils.convert_through_json(metadata.query((metadata_base.ALL_ELEMENTS, 5))), {'name': 'value_3', 'structural_type': 'float', 'semantic_types': ['http://schema.org/Float', 'https://metadata.datadrivendiscovery.org/types/Attribute']})

self.assertEqual(test_utils.convert_through_json(metadata.query((metadata_base.ALL_ELEMENTS, 6))), {'name': 'value_4', 'structural_type': 'float', 'semantic_types': ['http://schema.org/Float', 'https://metadata.datadrivendiscovery.org/types/Attribute']})

self.assertEqual(test_utils.convert_through_json(metadata.query((metadata_base.ALL_ELEMENTS, 7))), {'name': 'ground_truth', 'structural_type': 'int', 'semantic_types': ['http://schema.org/Integer', 'https://metadata.datadrivendiscovery.org/types/SuggestedTarget', 'https://metadata.datadrivendiscovery.org/types/Attribute']})



if __name__ == '__main__':
unittest.main()

+ 131
- 0
tods/tests/data_processing/test_ConstructPredictions.py View File

@@ -0,0 +1,131 @@
import copy
import os
import unittest

import numpy

from d3m import container
from d3m.metadata import base as metadata_base

from tods.data_processing import DatasetToDataframe , ConstructPredictions , ExtractColumnsBySemanticTypes

import utils as test_utils


class ConstructPredictionsPrimitiveTestCase(unittest.TestCase):
# TODO: Make this part of metadata API.
# Something like setting a semantic type for given columns.
def _mark_all_targets(self, dataset, targets):
for target in targets:
dataset.metadata = dataset.metadata.add_semantic_type((target['resource_id'], metadata_base.ALL_ELEMENTS, target['column_index']), 'https://metadata.datadrivendiscovery.org/types/Target')
dataset.metadata = dataset.metadata.add_semantic_type((target['resource_id'], metadata_base.ALL_ELEMENTS, target['column_index']), 'https://metadata.datadrivendiscovery.org/types/TrueTarget')
dataset.metadata = dataset.metadata.remove_semantic_type((target['resource_id'], metadata_base.ALL_ELEMENTS, target['column_index']), 'https://metadata.datadrivendiscovery.org/types/Attribute')

def _get_yahoo_dataframe(self):
dataset_doc_path = os.path.abspath(os.path.join(os.path.dirname(__file__),'..', '..', '..', 'datasets', 'anomaly','yahoo_sub_5','TRAIN','dataset_TRAIN', 'datasetDoc.json'))

dataset = container.Dataset.load('file://{dataset_doc_path}'.format(dataset_doc_path=dataset_doc_path))

self._mark_all_targets(dataset, [{'resource_id': 'learningData', 'column_index': 5}])

hyperparams_class = DatasetToDataframe.DatasetToDataFramePrimitive.metadata.get_hyperparams()

primitive = DatasetToDataframe.DatasetToDataFramePrimitive(hyperparams=hyperparams_class.defaults())

call_metadata = primitive.produce(inputs=dataset)

dataframe = call_metadata.value

return dataframe

def test_correct_order(self):
dataframe = self._get_yahoo_dataframe()

hyperparams_class = ExtractColumnsBySemanticTypes.ExtractColumnsBySemanticTypesPrimitive.metadata.get_hyperparams()

# We extract both the primary index and targets. So it is in the output format already.
primitive = ExtractColumnsBySemanticTypes.ExtractColumnsBySemanticTypesPrimitive(hyperparams=hyperparams_class.defaults().replace({'semantic_types': ('https://metadata.datadrivendiscovery.org/types/PrimaryKey', 'https://metadata.datadrivendiscovery.org/types/Target',)}))

call_metadata = primitive.produce(inputs=dataframe)

targets = call_metadata.value

# We pretend these are our predictions.
targets.metadata = targets.metadata.remove_semantic_type((metadata_base.ALL_ELEMENTS, 1), 'https://metadata.datadrivendiscovery.org/types/TrueTarget')
targets.metadata = targets.metadata.add_semantic_type((metadata_base.ALL_ELEMENTS, 1), 'https://metadata.datadrivendiscovery.org/types/PredictedTarget')

# We switch columns around.
targets = targets.select_columns([1, 0])

hyperparams_class = ConstructPredictions.ConstructPredictionsPrimitive.metadata.get_hyperparams()

construct_primitive = ConstructPredictions.ConstructPredictionsPrimitive(hyperparams=hyperparams_class.defaults())

call_metadata = construct_primitive.produce(inputs=targets, reference=dataframe)

dataframe = call_metadata.value

self.assertEqual(list(dataframe.columns), ['d3mIndex', 'value_3'])

self._test_metadata(dataframe.metadata)



def _test_metadata(self, metadata, no_metadata=False):
self.maxDiff = None

self.assertEqual(test_utils.convert_through_json(metadata.query(())), {
'schema': metadata_base.CONTAINER_SCHEMA_VERSION,
'structural_type': 'd3m.container.pandas.DataFrame',
'semantic_types': [
'https://metadata.datadrivendiscovery.org/types/Table',
],
'dimension': {
'name': 'rows',
'semantic_types': ['https://metadata.datadrivendiscovery.org/types/TabularRow'],
'length': 1260,
}
})

self.assertEqual(test_utils.convert_through_json(metadata.query((metadata_base.ALL_ELEMENTS,))), {
'dimension': {
'name': 'columns',
'semantic_types': ['https://metadata.datadrivendiscovery.org/types/TabularColumn'],
'length': 2,
}
})

self.assertEqual(test_utils.convert_through_json(metadata.query((metadata_base.ALL_ELEMENTS, 0))), {
'name': 'd3mIndex',
'structural_type': 'str',
'semantic_types': [
'http://schema.org/Integer',
'https://metadata.datadrivendiscovery.org/types/PrimaryKey',
],
})

if no_metadata:
self.assertEqual(test_utils.convert_through_json(metadata.query((metadata_base.ALL_ELEMENTS, 1))), {
'name': 'value_3',
'structural_type': 'str',
'semantic_types': [
'https://metadata.datadrivendiscovery.org/types/Target',
'https://metadata.datadrivendiscovery.org/types/PredictedTarget',
],
})

else:
self.assertEqual(test_utils.convert_through_json(metadata.query((metadata_base.ALL_ELEMENTS, 1))), {
'name': 'value_3',
'structural_type': 'str',
'semantic_types': [
'http://schema.org/Float',
'https://metadata.datadrivendiscovery.org/types/Target',
'https://metadata.datadrivendiscovery.org/types/PredictedTarget',
],
})



if __name__ == '__main__':
unittest.main()

+ 80
- 0
tods/tests/data_processing/test_DatasetToDataFrame.py View File

@@ -0,0 +1,80 @@
import os.path
import unittest



from d3m import container, utils
from d3m.metadata import base as metadata_base

from tods.data_processing import DatasetToDataframe

import utils as test_utils


class ColumnParserPrimitiveTestCase(unittest.TestCase):
def test_basic(self):
dataset_doc_path = os.path.abspath(os.path.join(os.path.dirname(__file__),'..', '..', '..', 'datasets', 'anomaly','yahoo_sub_5','TRAIN','dataset_TRAIN', 'datasetDoc.json'))


dataset = container.Dataset.load('file://{dataset_doc_path}'.format(dataset_doc_path=dataset_doc_path))

hyperparams_class = DatasetToDataframe.DatasetToDataFramePrimitive.metadata.get_hyperparams()

primitive = DatasetToDataframe.DatasetToDataFramePrimitive(hyperparams=hyperparams_class.defaults())

call_metadata = primitive.produce(inputs=dataset)

dataframe = call_metadata.value


first_row = list(dataframe.itertuples(index=False, name=None))[0]

self.assertEqual(first_row, ('0', '1', '12183', '0.0', '3.7166666666667', '5', '2109', '0'))

self.assertEqual([type(o) for o in first_row], [str,str, str,str, str, str, str, str])

self._test_basic_metadata(dataframe.metadata)

def _test_basic_metadata(self, metadata):
self.maxDiff = None

self.assertEqual(test_utils.convert_through_json(metadata.query(())), {
'schema': metadata_base.CONTAINER_SCHEMA_VERSION,
'structural_type': 'd3m.container.pandas.DataFrame',
'semantic_types': [
'https://metadata.datadrivendiscovery.org/types/Table',
],
'dimension': {
'name': 'rows',
'semantic_types': ['https://metadata.datadrivendiscovery.org/types/TabularRow'],
'length': 1260,
}
})

self.assertEqual(test_utils.convert_through_json(metadata.query((metadata_base.ALL_ELEMENTS,))), {
'dimension': {
'name': 'columns',
'semantic_types': ['https://metadata.datadrivendiscovery.org/types/TabularColumn'],
'length': 8,
}
})


self.assertEqual(test_utils.convert_through_json(metadata.query((metadata_base.ALL_ELEMENTS, 0))), {'name': 'd3mIndex', 'structural_type': 'str', 'semantic_types': ['http://schema.org/Integer', 'https://metadata.datadrivendiscovery.org/types/PrimaryKey']})
self.assertEqual(test_utils.convert_through_json(metadata.query((metadata_base.ALL_ELEMENTS, 1))), {'name': 'timestamp', 'structural_type': 'str', 'semantic_types': ['http://schema.org/Integer', 'https://metadata.datadrivendiscovery.org/types/Attribute']})
self.assertEqual(test_utils.convert_through_json(metadata.query((metadata_base.ALL_ELEMENTS, 2))), {'name': 'value_0', 'structural_type': 'str', 'semantic_types': ['http://schema.org/Float', 'https://metadata.datadrivendiscovery.org/types/Attribute']})

self.assertEqual(test_utils.convert_through_json(metadata.query((metadata_base.ALL_ELEMENTS, 3))), {'name': 'value_1', 'structural_type': 'str', 'semantic_types': ['http://schema.org/Float', 'https://metadata.datadrivendiscovery.org/types/Attribute']})

self.assertEqual(test_utils.convert_through_json(metadata.query((metadata_base.ALL_ELEMENTS, 4))), {'name': 'value_2', 'structural_type': 'str', 'semantic_types': ['http://schema.org/Float', 'https://metadata.datadrivendiscovery.org/types/Attribute']})

self.assertEqual(test_utils.convert_through_json(metadata.query((metadata_base.ALL_ELEMENTS, 5))), {'name': 'value_3', 'structural_type': 'str', 'semantic_types': ['http://schema.org/Float', 'https://metadata.datadrivendiscovery.org/types/Attribute']})

self.assertEqual(test_utils.convert_through_json(metadata.query((metadata_base.ALL_ELEMENTS, 6))), {'name': 'value_4', 'structural_type': 'str', 'semantic_types': ['http://schema.org/Float', 'https://metadata.datadrivendiscovery.org/types/Attribute']})

self.assertEqual(test_utils.convert_through_json(metadata.query((metadata_base.ALL_ELEMENTS, 7))), {'name': 'ground_truth', 'structural_type': 'str', 'semantic_types': ['http://schema.org/Integer', 'https://metadata.datadrivendiscovery.org/types/SuggestedTarget', 'https://metadata.datadrivendiscovery.org/types/Attribute']})



if __name__ == '__main__':
unittest.main()

+ 107
- 0
tods/tests/data_processing/test_ExtractColumnsBySemanticTypes.py View File

@@ -0,0 +1,107 @@
import os.path
import unittest



from d3m import container, utils
from d3m.metadata import base as metadata_base

from tods.data_processing import DatasetToDataframe, ExtractColumnsBySemanticTypes

import utils as test_utils


class ExtractColumnsBySemanticTypePrimitiveTestCase(unittest.TestCase):
def test_basic(self):
dataset_doc_path = os.path.abspath(os.path.join(os.path.dirname(__file__),'..', '..', '..', 'datasets', 'anomaly','yahoo_sub_5','TRAIN','dataset_TRAIN', 'datasetDoc.json'))

dataset = container.Dataset.load('file://{dataset_doc_path}'.format(dataset_doc_path=dataset_doc_path))

# We set semantic types like runtime would.
dataset.metadata = dataset.metadata.add_semantic_type(('learningData', metadata_base.ALL_ELEMENTS, 7), 'https://metadata.datadrivendiscovery.org/types/Target')
dataset.metadata = dataset.metadata.add_semantic_type(('learningData', metadata_base.ALL_ELEMENTS, 7), 'https://metadata.datadrivendiscovery.org/types/TrueTarget')
dataset.metadata = dataset.metadata.remove_semantic_type(('learningData', metadata_base.ALL_ELEMENTS, 7), 'https://metadata.datadrivendiscovery.org/types/Attribute')

hyperparams_class = DatasetToDataframe.DatasetToDataFramePrimitive.metadata.get_hyperparams()

primitive = DatasetToDataframe.DatasetToDataFramePrimitive(hyperparams=hyperparams_class.defaults())

call_metadata = primitive.produce(inputs=dataset)

dataframe = call_metadata.value

hyperparams_class = ExtractColumnsBySemanticTypes.ExtractColumnsBySemanticTypesPrimitive.metadata.get_hyperparams()

primitive = ExtractColumnsBySemanticTypes.ExtractColumnsBySemanticTypesPrimitive(hyperparams=hyperparams_class.defaults().replace({'semantic_types': ('https://metadata.datadrivendiscovery.org/types/Attribute', 'https://metadata.datadrivendiscovery.org/types/PrimaryKey')}))

call_metadata = primitive.produce(inputs=dataframe)

dataframe = call_metadata.value

self._test_metadata(dataframe.metadata)

def _test_metadata(self, metadata):
self.maxDiff = None

self.assertEqual(test_utils.convert_through_json(metadata.query(())), {
'schema': metadata_base.CONTAINER_SCHEMA_VERSION,
'structural_type': 'd3m.container.pandas.DataFrame',
'semantic_types': [
'https://metadata.datadrivendiscovery.org/types/Table',
],
'dimension': {
'name': 'rows',
'semantic_types': ['https://metadata.datadrivendiscovery.org/types/TabularRow'],
'length': 1260,
}
})

self.assertEqual(test_utils.convert_through_json(metadata.query((metadata_base.ALL_ELEMENTS,))), {
'dimension': {
'name': 'columns',
'semantic_types': ['https://metadata.datadrivendiscovery.org/types/TabularColumn'],
'length': 7,
}
})

self.assertEqual(test_utils.convert_through_json(metadata.query((metadata_base.ALL_ELEMENTS, 0))), {
'name': 'd3mIndex',
'structural_type': 'str',
'semantic_types': [
'http://schema.org/Integer',
'https://metadata.datadrivendiscovery.org/types/PrimaryKey',
],
})

self.assertEqual(test_utils.convert_through_json(metadata.query((metadata_base.ALL_ELEMENTS, 0))),
{'name': 'd3mIndex', 'structural_type': 'str', 'semantic_types': ['http://schema.org/Integer',
'https://metadata.datadrivendiscovery.org/types/PrimaryKey']})
self.assertEqual(test_utils.convert_through_json(metadata.query((metadata_base.ALL_ELEMENTS, 1))),
{'name': 'timestamp', 'structural_type': 'str', 'semantic_types': ['http://schema.org/Integer',
'https://metadata.datadrivendiscovery.org/types/Attribute']})
self.assertEqual(test_utils.convert_through_json(metadata.query((metadata_base.ALL_ELEMENTS, 2))),
{'name': 'value_0', 'structural_type': 'str', 'semantic_types': ['http://schema.org/Float',
'https://metadata.datadrivendiscovery.org/types/Attribute']})

self.assertEqual(test_utils.convert_through_json(metadata.query((metadata_base.ALL_ELEMENTS, 3))),
{'name': 'value_1', 'structural_type': 'str', 'semantic_types': ['http://schema.org/Float',
'https://metadata.datadrivendiscovery.org/types/Attribute']})

self.assertEqual(test_utils.convert_through_json(metadata.query((metadata_base.ALL_ELEMENTS, 4))),
{'name': 'value_2', 'structural_type': 'str', 'semantic_types': ['http://schema.org/Float',
'https://metadata.datadrivendiscovery.org/types/Attribute']})

self.assertEqual(test_utils.convert_through_json(metadata.query((metadata_base.ALL_ELEMENTS, 5))),
{'name': 'value_3', 'structural_type': 'str', 'semantic_types': ['http://schema.org/Float',
'https://metadata.datadrivendiscovery.org/types/Attribute']})

self.assertEqual(test_utils.convert_through_json(metadata.query((metadata_base.ALL_ELEMENTS, 6))),
{'name': 'value_4', 'structural_type': 'str', 'semantic_types': ['http://schema.org/Float',
'https://metadata.datadrivendiscovery.org/types/Attribute']})





if __name__ == '__main__':
unittest.main()

+ 92
- 0
tods/tests/data_processing/test_SKImputer.py View File

@@ -0,0 +1,92 @@
import unittest
import numpy
from d3m import container, utils
from d3m.metadata import base as metadata_base

from tods.data_processing import SKImputer


class SkImputerTestCase(unittest.TestCase):
def test_basic(self):

main = container.DataFrame({'timestamp': [1,2,3,5], 'a': [numpy.nan,2.0,3.0,4.0],'b':[1.0,4.0,5.0,6.0]},columns=['timestamp', 'a', 'b'],
generate_metadata=True)




self.assertEqual(utils.to_json_structure(main.metadata.to_internal_simple_structure()), [{
'selector': [],
'metadata': {
# 'top_level': 'main',
'schema': metadata_base.CONTAINER_SCHEMA_VERSION,
'structural_type': 'd3m.container.pandas.DataFrame',
'semantic_types': ['https://metadata.datadrivendiscovery.org/types/Table'],
'dimension': {
'name': 'rows',
'semantic_types': ['https://metadata.datadrivendiscovery.org/types/TabularRow'],
'length': 4,
},
},
}, {
'selector': ['__ALL_ELEMENTS__'],
'metadata': {
'dimension': {
'name': 'columns',
'semantic_types': ['https://metadata.datadrivendiscovery.org/types/TabularColumn'],
'length': 3,
},
},
}, {
'selector': ['__ALL_ELEMENTS__', 0],
'metadata': {'structural_type': 'numpy.int64', 'name': 'timestamp'},
}, {
'selector': ['__ALL_ELEMENTS__', 1],
'metadata': {'structural_type': 'numpy.float64', 'name': 'a'},
}, {
'selector': ['__ALL_ELEMENTS__', 2],
'metadata': {'structural_type': 'numpy.float64', 'name': 'b'},
}])

hyperparams_class = SKImputer.SKImputerPrimitive.metadata.get_hyperparams()

primitive = SKImputer.SKImputerPrimitive(hyperparams=hyperparams_class.defaults())
primitive.set_training_data(inputs=main)
primitive.fit()
output_main = primitive.produce(inputs=main).value
print(output_main[['timestamp','a','b']].values.tolist())
expected_output = container.DataFrame({'timestamp': [1,2,3,5], 'a': [3.0,2.0,3.0,4.0],'b': [1.0,4.0,5.0,6.0]})

self.assertEqual(output_main[['timestamp','a','b']].values.tolist() , expected_output[['timestamp','a','b']].values.tolist())

self.assertEqual(utils.to_json_structure(output_main.metadata.to_internal_simple_structure()), [{'metadata': {'dimension': {'length': 4,
'name': 'rows',
'semantic_types': ['https://metadata.datadrivendiscovery.org/types/TabularRow']},
'schema': 'https://metadata.datadrivendiscovery.org/schemas/v0/container.json',
'semantic_types': ['https://metadata.datadrivendiscovery.org/types/Table'],
'structural_type': 'd3m.container.pandas.DataFrame'},
'selector': []},
{'metadata': {'dimension': {'length': 3,
'name': 'columns',
'semantic_types': ['https://metadata.datadrivendiscovery.org/types/TabularColumn']}},
'selector': ['__ALL_ELEMENTS__']},
{'metadata': {'name': 'timestamp',
'semantic_types': ['https://metadata.datadrivendiscovery.org/types/Attribute'],
'structural_type': 'numpy.float64'},
'selector': ['__ALL_ELEMENTS__', 0]},
{'metadata': {'name': 'a',
'semantic_types': ['https://metadata.datadrivendiscovery.org/types/Attribute'],
'structural_type': 'numpy.float64'},
'selector': ['__ALL_ELEMENTS__', 1]},
{'metadata': {'name': 'b',
'semantic_types': ['https://metadata.datadrivendiscovery.org/types/Attribute'],
'structural_type': 'numpy.float64'},
'selector': ['__ALL_ELEMENTS__', 2]}])

params = primitive.get_params()
primitive.set_params(params=params)


if __name__ == '__main__':
unittest.main()


Loading…
Cancel
Save