diff --git a/src/axolotl b/src/axolotl new file mode 160000 index 0000000..af54e69 --- /dev/null +++ b/src/axolotl @@ -0,0 +1 @@ +Subproject commit af54e6970476a081bf0cd65990c9f56a1200d8a2 diff --git a/src/common-primitives b/src/common-primitives new file mode 160000 index 0000000..046b20d --- /dev/null +++ b/src/common-primitives @@ -0,0 +1 @@ +Subproject commit 046b20d2f6d4543dcbe18f0a1d4bcbb1f61cf518 diff --git a/src/d3m b/src/d3m new file mode 160000 index 0000000..70aeefe --- /dev/null +++ b/src/d3m @@ -0,0 +1 @@ +Subproject commit 70aeefed6b7307941581357c4b7858bb3f88e1da diff --git a/tods/data_processing/ContinuityValidation.py b/tods/data_processing/ContinuityValidation.py index 7786552..9a86560 100644 --- a/tods/data_processing/ContinuityValidation.py +++ b/tods/data_processing/ContinuityValidation.py @@ -170,9 +170,3 @@ class ContinuityValidation(transformer.TransformerPrimitiveBase[Inputs, Outputs, inputs['d3mIndex'] = list(range(inputs.shape[0])) return inputs - - def _write(self, inputs:Inputs): - """ - write inputs to current directory, only for test - """ - inputs.to_csv(str(time.time())+'.csv') diff --git a/tods/data_processing/DuplicationValidation.py b/tods/data_processing/DuplicationValidation.py index b9cdb36..2d3a85e 100644 --- a/tods/data_processing/DuplicationValidation.py +++ b/tods/data_processing/DuplicationValidation.py @@ -91,8 +91,3 @@ class DuplicationValidation(transformer.TransformerPrimitiveBase[Inputs, Outputs return inputs - def _write(self, inputs:Inputs): - """ - write inputs to current directory, only for test - """ - inputs.to_csv(str(time.time())+'.csv') diff --git a/tods/detection_algorithm/core/LSTMOD.py b/tods/detection_algorithm/core/LSTMOD.py index 636b7c1..28ced1d 100755 --- a/tods/detection_algorithm/core/LSTMOD.py +++ b/tods/detection_algorithm/core/LSTMOD.py @@ -169,7 +169,7 @@ class LSTMOutlierDetector(CollectiveBaseDetector): # print(danger_coefficient, averaged_relative_error) - else: + else: # pragma: no cover danger_coefficient = np.zeros(relative_error.shape) averaged_relative_error = np.zeros(relative_error.shape) @@ -210,7 +210,7 @@ class LSTMOutlierDetector(CollectiveBaseDetector): -if __name__ == "__main__": +if __name__ == "__main__": # pragma: no cover X_train = np.asarray( [3., 4., 8., 16, 18, 13., 22., 36., 59., 128, 62, 67, 78, 100]).reshape(-1, 1) diff --git a/tods/feature_analysis/BKFilter.py b/tods/feature_analysis/BKFilter.py index c35d12c..f7633de 100644 --- a/tods/feature_analysis/BKFilter.py +++ b/tods/feature_analysis/BKFilter.py @@ -186,14 +186,14 @@ class BKFilter(transformer.TransformerPrimitiveBase[Inputs, Outputs, Hyperparams if len(self._training_indices) > 0: # self._clf.fit(self._training_inputs) self._fitted = True - else: + else: # pragma: no cover if self.hyperparams['error_on_no_input']: raise RuntimeError("No input columns were selected") self.logger.warn("No input columns were selected") - if not self._fitted: + if not self._fitted: # pragma: no cover raise PrimitiveNotFittedError("Primitive not fitted.") sk_inputs = inputs if self.hyperparams['use_semantic_types']: @@ -209,7 +209,7 @@ class BKFilter(transformer.TransformerPrimitiveBase[Inputs, Outputs, Hyperparams outputs.columns = self._input_column_names output_columns = [outputs] - else: + else: # pragma: no cover if self.hyperparams['error_on_no_input']: raise RuntimeError("No input columns were selected") self.logger.warn("No input columns were selected") @@ -217,14 +217,11 @@ class BKFilter(transformer.TransformerPrimitiveBase[Inputs, Outputs, Hyperparams add_index_columns=self.hyperparams['add_index_columns'], inputs=inputs, column_indices=self._training_indices, columns_list=output_columns) - - # self._write(outputs) - # self.logger.warning('produce was called3') return CallResult(outputs) @classmethod - def _get_columns_to_fit(cls, inputs: Inputs, hyperparams: Hyperparams): + def _get_columns_to_fit(cls, inputs: Inputs, hyperparams: Hyperparams): # pragma: no cover """ Select columns to fit. Args: @@ -261,7 +258,7 @@ class BKFilter(transformer.TransformerPrimitiveBase[Inputs, Outputs, Hyperparams # return columns_to_produce @classmethod - def _can_produce_column(cls, inputs_metadata: metadata_base.DataMetadata, column_index: int, hyperparams: Hyperparams) -> bool: + def _can_produce_column(cls, inputs_metadata: metadata_base.DataMetadata, column_index: int, hyperparams: Hyperparams) -> bool: # pragma: no cover """ Output whether a column can be processed. Args: @@ -354,8 +351,6 @@ class BKFilter(transformer.TransformerPrimitiveBase[Inputs, Outputs, Hyperparams return target_columns_metadata - def _write(self, inputs:Inputs): - inputs.to_csv(str(time.time())+'.csv') def _bkfilter(self, X, low, high, K): """ diff --git a/tods/feature_analysis/HPFilter.py b/tods/feature_analysis/HPFilter.py index ba26d5e..f2a5c5e 100644 --- a/tods/feature_analysis/HPFilter.py +++ b/tods/feature_analysis/HPFilter.py @@ -163,14 +163,14 @@ class HPFilter(transformer.TransformerPrimitiveBase[Inputs, Outputs, Hyperparams if len(self._training_indices) > 0: # self._clf.fit(self._training_inputs) self._fitted = True - else: + else: # pragma: no cover if self.hyperparams['error_on_no_input']: raise RuntimeError("No input columns were selected") self.logger.warn("No input columns were selected") - if not self._fitted: + if not self._fitted: # pragma: no cover raise PrimitiveNotFittedError("Primitive not fitted.") sk_inputs = inputs if self.hyperparams['use_semantic_types']: @@ -186,7 +186,7 @@ class HPFilter(transformer.TransformerPrimitiveBase[Inputs, Outputs, Hyperparams outputs.columns = self._input_column_names output_columns = [outputs] - else: + else: # pragma: no cover if self.hyperparams['error_on_no_input']: raise RuntimeError("No input columns were selected") self.logger.warn("No input columns were selected") @@ -194,14 +194,11 @@ class HPFilter(transformer.TransformerPrimitiveBase[Inputs, Outputs, Hyperparams add_index_columns=self.hyperparams['add_index_columns'], inputs=inputs, column_indices=self._training_indices, columns_list=output_columns) - - # self._write(outputs) - # self.logger.warning('produce was called3') return CallResult(outputs) @classmethod - def _get_columns_to_fit(cls, inputs: Inputs, hyperparams: Hyperparams): + def _get_columns_to_fit(cls, inputs: Inputs, hyperparams: Hyperparams): # pragma: no cover """ Select columns to fit. Args: @@ -238,7 +235,7 @@ class HPFilter(transformer.TransformerPrimitiveBase[Inputs, Outputs, Hyperparams # return columns_to_produce @classmethod - def _can_produce_column(cls, inputs_metadata: metadata_base.DataMetadata, column_index: int, hyperparams: Hyperparams) -> bool: + def _can_produce_column(cls, inputs_metadata: metadata_base.DataMetadata, column_index: int, hyperparams: Hyperparams) -> bool: # pragma: no cover """ Output whether a column can be processed. Args: @@ -331,9 +328,6 @@ class HPFilter(transformer.TransformerPrimitiveBase[Inputs, Outputs, Hyperparams return target_columns_metadata - def _write(self, inputs:Inputs): - inputs.to_csv(str(time.time())+'.csv') - def _hpfilter(self, X, lamb): """ Perform HPFilter diff --git a/tods/feature_analysis/SKTruncatedSVD.py b/tods/feature_analysis/SKTruncatedSVD.py index 2b6f038..01c6950 100644 --- a/tods/feature_analysis/SKTruncatedSVD.py +++ b/tods/feature_analysis/SKTruncatedSVD.py @@ -224,7 +224,7 @@ class SKTruncatedSVD(UnsupervisedLearnerPrimitiveBase[Inputs, Outputs, Params, H Returns: None """ - if self._fitted: + if self._fitted: # pragma: no cover return CallResult(None) # Get cols to fit. @@ -239,7 +239,7 @@ class SKTruncatedSVD(UnsupervisedLearnerPrimitiveBase[Inputs, Outputs, Params, H if len(self._training_indices) > 0: self._clf.fit(self._training_inputs) self._fitted = True - else: + else: # pragma: no cover if self.hyperparams['error_on_no_input']: raise RuntimeError("No input columns were selected") self.logger.warn("No input columns were selected") @@ -257,7 +257,7 @@ class SKTruncatedSVD(UnsupervisedLearnerPrimitiveBase[Inputs, Outputs, Params, H # self.logger.warning(str(self.metadata.query()['name'])) - if not self._fitted: + if not self._fitted: # pragma: no cover raise PrimitiveNotFittedError("Primitive not fitted.") sk_inputs = inputs if self.hyperparams['use_semantic_types']: @@ -272,7 +272,7 @@ class SKTruncatedSVD(UnsupervisedLearnerPrimitiveBase[Inputs, Outputs, Params, H if len(outputs.columns) == len(self._input_column_names): outputs.columns = self._input_column_names output_columns = [outputs] - else: + else: # pragma: no cover if self.hyperparams['error_on_no_input']: raise RuntimeError("No input columns were selected") self.logger.warn("No input columns were selected") @@ -286,7 +286,7 @@ class SKTruncatedSVD(UnsupervisedLearnerPrimitiveBase[Inputs, Outputs, Params, H return CallResult(outputs) - def get_params(self) -> Params: + def get_params(self) -> Params: # pragma: no cover """ Return parameters. Args: @@ -320,7 +320,7 @@ class SKTruncatedSVD(UnsupervisedLearnerPrimitiveBase[Inputs, Outputs, Params, H target_columns_metadata_=self._target_columns_metadata ) - def set_params(self, *, params: Params) -> None: + def set_params(self, *, params: Params) -> None: # pragma: no cover """ Set parameters for SKTruncatedSVD. Args: @@ -351,7 +351,7 @@ class SKTruncatedSVD(UnsupervisedLearnerPrimitiveBase[Inputs, Outputs, Params, H @classmethod - def _get_columns_to_fit(cls, inputs: Inputs, hyperparams: Hyperparams): + def _get_columns_to_fit(cls, inputs: Inputs, hyperparams: Hyperparams): # pragma: no cover """ Select columns to fit. Args: @@ -377,7 +377,7 @@ class SKTruncatedSVD(UnsupervisedLearnerPrimitiveBase[Inputs, Outputs, Params, H # return columns_to_produce @classmethod - def _can_produce_column(cls, inputs_metadata: metadata_base.DataMetadata, column_index: int, hyperparams: Hyperparams) -> bool: + def _can_produce_column(cls, inputs_metadata: metadata_base.DataMetadata, column_index: int, hyperparams: Hyperparams) -> bool: # pragma: no cover """ Output whether a column can be processed. Args: @@ -408,35 +408,35 @@ class SKTruncatedSVD(UnsupervisedLearnerPrimitiveBase[Inputs, Outputs, Params, H return False - @classmethod - def _get_target_columns_metadata(cls, outputs_metadata: metadata_base.DataMetadata, hyperparams) -> List[OrderedDict]: - """ - Output metadata of selected columns. - Args: - outputs_metadata: metadata_base.DataMetadata - hyperparams: d3m.metadata.hyperparams.Hyperparams - - Returns: - d3m.metadata.base.DataMetadata - """ - outputs_length = outputs_metadata.query((metadata_base.ALL_ELEMENTS,))['dimension']['length'] - - target_columns_metadata: List[OrderedDict] = [] - for column_index in range(outputs_length): - column_metadata = OrderedDict(outputs_metadata.query_column(column_index)) - - # Update semantic types and prepare it for predicted targets. - semantic_types = set(column_metadata.get('semantic_types', [])) - semantic_types_to_remove = set([]) - add_semantic_types = [] - add_semantic_types.add(hyperparams["return_semantic_type"]) - semantic_types = semantic_types - semantic_types_to_remove - semantic_types = semantic_types.union(add_semantic_types) - column_metadata['semantic_types'] = list(semantic_types) - - target_columns_metadata.append(column_metadata) - - return target_columns_metadata + # @classmethod + # def _get_target_columns_metadata(cls, outputs_metadata: metadata_base.DataMetadata, hyperparams) -> List[OrderedDict]: + # """ + # Output metadata of selected columns. + # Args: + # outputs_metadata: metadata_base.DataMetadata + # hyperparams: d3m.metadata.hyperparams.Hyperparams + + # Returns: + # d3m.metadata.base.DataMetadata + # """ + # outputs_length = outputs_metadata.query((metadata_base.ALL_ELEMENTS,))['dimension']['length'] + + # target_columns_metadata: List[OrderedDict] = [] + # for column_index in range(outputs_length): + # column_metadata = OrderedDict(outputs_metadata.query_column(column_index)) + + # # Update semantic types and prepare it for predicted targets. + # semantic_types = set(column_metadata.get('semantic_types', [])) + # semantic_types_to_remove = set([]) + # add_semantic_types = [] + # add_semantic_types.add(hyperparams["return_semantic_type"]) + # semantic_types = semantic_types - semantic_types_to_remove + # semantic_types = semantic_types.union(add_semantic_types) + # column_metadata['semantic_types'] = list(semantic_types) + + # target_columns_metadata.append(column_metadata) + + # return target_columns_metadata @classmethod def _update_predictions_metadata(cls, inputs_metadata: metadata_base.DataMetadata, outputs: Optional[Outputs], @@ -500,11 +500,3 @@ class SKTruncatedSVD(UnsupervisedLearnerPrimitiveBase[Inputs, Outputs, Params, H return target_columns_metadata - def _write(self, inputs:Inputs): - """ - write inputs to current directory, only for test - """ - inputs.to_csv(str(time.time())+'.csv') - - -# SKTruncatedSVD.__doc__ = TruncatedSVD.__doc__ diff --git a/tods/feature_analysis/TRMF.py b/tods/feature_analysis/TRMF.py index 3b21fa1..fcbdab9 100644 --- a/tods/feature_analysis/TRMF.py +++ b/tods/feature_analysis/TRMF.py @@ -276,14 +276,14 @@ class TRMF(transformer.TransformerPrimitiveBase[Inputs, Outputs, Hyperparams]): if len(self._training_indices) > 0: self._clf.fit(self._training_inputs) self._fitted = True - else: + else: # pragma: no cover if self.hyperparams['error_on_no_input']: raise RuntimeError("No input columns were selected") self.logger.warn("No input columns were selected") - if not self._fitted: + if not self._fitted: # pragma: no cover raise PrimitiveNotFittedError("Primitive not fitted.") sk_inputs = inputs @@ -301,7 +301,7 @@ class TRMF(transformer.TransformerPrimitiveBase[Inputs, Outputs, Hyperparams]): if len(outputs.columns) == len(self._input_column_names): outputs.columns = self._input_column_names output_columns = [outputs] - else: + else: # pragma: no cover if self.hyperparams['error_on_no_input']: raise RuntimeError("No input columns were selected") self.logger.warn("No input columns were selected") @@ -316,7 +316,7 @@ class TRMF(transformer.TransformerPrimitiveBase[Inputs, Outputs, Hyperparams]): @classmethod - def _get_columns_to_fit(cls, inputs: Inputs, hyperparams: Hyperparams): + def _get_columns_to_fit(cls, inputs: Inputs, hyperparams: Hyperparams): # pragma: no cover """ Select columns to fit. Args: @@ -342,7 +342,7 @@ class TRMF(transformer.TransformerPrimitiveBase[Inputs, Outputs, Hyperparams]): # return columns_to_produce @classmethod - def _can_produce_column(cls, inputs_metadata: metadata_base.DataMetadata, column_index: int, hyperparams: Hyperparams) -> bool: + def _can_produce_column(cls, inputs_metadata: metadata_base.DataMetadata, column_index: int, hyperparams: Hyperparams) -> bool: # pragma: no cover """ Output whether a column can be processed. Args: @@ -373,35 +373,35 @@ class TRMF(transformer.TransformerPrimitiveBase[Inputs, Outputs, Hyperparams]): return False - @classmethod - def _get_target_columns_metadata(cls, outputs_metadata: metadata_base.DataMetadata, hyperparams) -> List[OrderedDict]: - """ - Output metadata of selected columns. - Args: - outputs_metadata: metadata_base.DataMetadata - hyperparams: d3m.metadata.hyperparams.Hyperparams - - Returns: - d3m.metadata.base.DataMetadata - """ - outputs_length = outputs_metadata.query((metadata_base.ALL_ELEMENTS,))['dimension']['length'] - - target_columns_metadata: List[OrderedDict] = [] - for column_index in range(outputs_length): - column_metadata = OrderedDict(outputs_metadata.query_column(column_index)) - - # Update semantic types and prepare it for predicted targets. - semantic_types = set(column_metadata.get('semantic_types', [])) - semantic_types_to_remove = set([]) - add_semantic_types = [] - add_semantic_types.add(hyperparams["return_semantic_type"]) - semantic_types = semantic_types - semantic_types_to_remove - semantic_types = semantic_types.union(add_semantic_types) - column_metadata['semantic_types'] = list(semantic_types) - - target_columns_metadata.append(column_metadata) - - return target_columns_metadata + # @classmethod + # def _get_target_columns_metadata(cls, outputs_metadata: metadata_base.DataMetadata, hyperparams) -> List[OrderedDict]: + # """ + # Output metadata of selected columns. + # Args: + # outputs_metadata: metadata_base.DataMetadata + # hyperparams: d3m.metadata.hyperparams.Hyperparams + + # Returns: + # d3m.metadata.base.DataMetadata + # """ + # outputs_length = outputs_metadata.query((metadata_base.ALL_ELEMENTS,))['dimension']['length'] + + # target_columns_metadata: List[OrderedDict] = [] + # for column_index in range(outputs_length): + # column_metadata = OrderedDict(outputs_metadata.query_column(column_index)) + + # # Update semantic types and prepare it for predicted targets. + # semantic_types = set(column_metadata.get('semantic_types', [])) + # semantic_types_to_remove = set([]) + # add_semantic_types = [] + # add_semantic_types.add(hyperparams["return_semantic_type"]) + # semantic_types = semantic_types - semantic_types_to_remove + # semantic_types = semantic_types.union(add_semantic_types) + # column_metadata['semantic_types'] = list(semantic_types) + + # target_columns_metadata.append(column_metadata) + + # return target_columns_metadata @classmethod def _update_predictions_metadata(cls, inputs_metadata: metadata_base.DataMetadata, outputs: Optional[Outputs], @@ -465,12 +465,6 @@ class TRMF(transformer.TransformerPrimitiveBase[Inputs, Outputs, Hyperparams]): return target_columns_metadata - def _write(self, inputs:Inputs): - """ - write inputs to current directory, only for test - """ - inputs.to_csv(str(time.time())+'.csv') - """ Temporal Regularized Matrix Factorization @@ -564,7 +558,7 @@ class trmf: return np.dot(self.F, X_preds) - def _predict_X(self, h): + def _predict_X(self, h): # pragma: no cover """Predict X h timepoints ahead. Evaluates matrix X with the help of matrix W. diff --git a/tods/tests/test_ContinuityValidation.py b/tods/tests/test_ContinuityValidation.py index 185bd4c..9c69f8e 100644 --- a/tods/tests/test_ContinuityValidation.py +++ b/tods/tests/test_ContinuityValidation.py @@ -59,7 +59,7 @@ class ContinuityValidationTest(unittest.TestCase): hyperparams_class = ContinuityValidation.ContinuityValidation.metadata.get_hyperparams() primitive = ContinuityValidation.ContinuityValidation(hyperparams=hyperparams_class.defaults()) new_main = primitive.produce(inputs=main).value - # print(new_main) + expected_output = container.DataFrame({'d3mIndex': [0, 1, 2, 3], 'timestamp': [1., 2., 3., 4.], @@ -124,6 +124,67 @@ class ContinuityValidationTest(unittest.TestCase): self._test_continuity(new_main) + hyperparams = hyperparams_class.defaults() + hyperparams = hyperparams.replace({'continuity_option': 'ablation'}) + primitive2 = ContinuityValidation.ContinuityValidation(hyperparams=hyperparams) + new_main2 = primitive2.produce(inputs=main).value + print(new_main2) + + self.assertEqual(utils.to_json_structure(new_main2.metadata.to_internal_simple_structure()), [{ + 'selector': [], + 'metadata': { + # 'top_level': 'main', + 'schema': metadata_base.CONTAINER_SCHEMA_VERSION, + 'structural_type': 'd3m.container.pandas.DataFrame', + 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/Table'], + 'dimension': { + 'name': 'rows', + 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/TabularRow'], + 'length': 2, + }, + }, + }, { + 'selector': ['__ALL_ELEMENTS__'], + 'metadata': { + 'dimension': { + 'name': 'columns', + 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/TabularColumn'], + 'length': 5, + }, + }, + }, { + 'selector': ['__ALL_ELEMENTS__', 0], + 'metadata': { + 'name': 'd3mIndex', + 'structural_type': 'numpy.int64', + }, + }, { + 'selector': ['__ALL_ELEMENTS__', 1], + 'metadata': { + 'name': 'timestamp', + 'structural_type': 'numpy.float64', + }, + }, { + 'selector': ['__ALL_ELEMENTS__', 2], + 'metadata': { + 'name': 'a', + 'structural_type': 'numpy.float64', + }, + }, { + 'selector': ['__ALL_ELEMENTS__', 3], + 'metadata': { + 'name': 'b', + 'structural_type': 'numpy.float64', + }, + }, { + 'selector': ['__ALL_ELEMENTS__', 4], + 'metadata': { + 'name': 'ground_truth', + 'structural_type': 'numpy.int64', + }, + }]) + + def _test_continuity(self, data_value): tmp_col = data_value['timestamp'] diff --git a/tods/tests/test_DuplicationValidation.py b/tods/tests/test_DuplicationValidation.py index 6c85cfb..6e34d1f 100644 --- a/tods/tests/test_DuplicationValidation.py +++ b/tods/tests/test_DuplicationValidation.py @@ -102,6 +102,54 @@ class DuplicationValidationTest(unittest.TestCase): self._test_drop_duplication(new_main) + hyperparams = hyperparams_class.defaults() + hyperparams = hyperparams.replace({'keep_option': 'average'}) + primitive2 = DuplicationValidation.DuplicationValidation(hyperparams=hyperparams) + new_main2 = primitive2.produce(inputs=main).value + print(new_main2) + + self.assertEqual(utils.to_json_structure(new_main.metadata.to_internal_simple_structure()), [{ + 'selector': [], + 'metadata': { + # 'top_level': 'main', + 'schema': metadata_base.CONTAINER_SCHEMA_VERSION, + 'structural_type': 'd3m.container.pandas.DataFrame', + 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/Table'], + 'dimension': { + 'name': 'rows', + 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/TabularRow'], + 'length': 2, + }, + }, + }, { + 'selector': ['__ALL_ELEMENTS__'], + 'metadata': { + 'dimension': { + 'name': 'columns', + 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/TabularColumn'], + 'length': 3, + }, + }, + }, { + 'selector': ['__ALL_ELEMENTS__', 0], + 'metadata': { + 'name': 'timestamp', + 'structural_type': 'numpy.float64', + }, + }, { + 'selector': ['__ALL_ELEMENTS__', 1], + 'metadata': { + 'name': 'a', + 'structural_type': 'numpy.float64', + }, + }, { + 'selector': ['__ALL_ELEMENTS__', 2], + 'metadata': { + 'name': 'b', + 'structural_type': 'numpy.float64', + }, + }]) + def _test_drop_duplication(self, data_value): self.assertEqual(True in list(data_value.duplicated('timestamp')), False)