fixed merge conflict on BKFilter Former-commit-id:master268b810550
[formerlyc533dc41f0
] [formerly4b7350332a
[formerly91a1dfa29a
]] [formerlyf0ac3f1dfc
[formerly75cc47630f
] [formerly97c0ab1f2c
[formerly244269481f
]]] [formerly407f577123
[formerlya5bea99500
] [formerlyd90dd7a5df
[formerlyf3f7ff9cbe
]] [formerlyf6b79ee96a
[formerly2b0eaf8f26
] [formerlydcb34f9924
[formerlycbcac31a7c
]]]] [formerly276805fd92
[formerly3ffa9cc927
] [formerly11750432d5
[formerly63492199be
]] [formerlyf225a4d340
[formerly34ffc73c25
] [formerlycfe8553fb1
[formerly52b80cba4d
]]] [formerly2045f00d84
[formerlyeb41c739de
] [formerlybae9207571
[formerly7de12dda8a
]] [formerly6c97b93ebc
[formerly5a75188044
] [formerly5f0d45454c
[formerly685cdb979b
]]]]] [formerlydde7e94bb7
[formerlyf6b5266236
] [formerly8cf5ebc7a8
[formerly5c9dd9ae27
]] [formerly8dd7e6467a
[formerly5bd4d0fb0b
] [formerly916e0b6eca
[formerly41e55088dc
]]] [formerlyfb04845556
[formerlyd8b444e0ed
] [formerlyaec60bbd3e
[formerly8d1c160a9a
]] [formerly1943bcb1c9
[formerly2d3cc48fc4
] [formerly58e75575a6
[formerly458469ece2
]]]] [formerlyaa010466ae
[formerlydf086a293d
] [formerly813ce80e0a
[formerlyfb3a25caf0
]] [formerly3b85298123
[formerly148fea965b
] [formerly4ac043cac1
[formerly47dead1fad
]]] [formerly6c799a58ff
[formerlyc0b13c8e50
] [formerly9a4bd12e2e
[formerlyc0dd11a023
]] [formerly1f8c1e3bca
[formerly04db072fa0
] [formerly01d0d8d6a4
[formerly52c56a7453
]]]]]] Former-commit-id:28c0c1bc62
[formerlyca095e2fa2
] [formerly7c73154b90
[formerly22ba8c7e21
]] [formerly56b42d344d
[formerly0deec139cb
] [formerly140524a81c
[formerlye05dfbd701
]]] [formerlyff57ce3469
[formerly8bd9bebc7d
] [formerly5eece021d1
[formerlyab504afde5
]] [formerly16414f8e84
[formerly427bec8abb
] [formerlya92733bafe
[formerly7f9779e073
]]]] [formerly7c47a5ecf4
[formerly003210baa6
] [formerly7235d0ecbb
[formerlyf1d9089804
]] [formerlyaf042a5779
[formerlyd93b0274af
] [formerly651c0e5913
[formerly80a23c8a08
]]] [formerlyc0360a3f39
[formerlye860588e5c
] [formerly92cd46b9b2
[formerly1bb0313b8c
]] [formerlye8cf2fc680
[formerlydaeb7f052e
] [formerly01d0d8d6a4
]]]] Former-commit-id:5a14b9b6c9
[formerlyf6940aff9f
] [formerlyb07d1f51e9
[formerlydad55b1667
]] [formerly2e97a29a05
[formerlyc507fda4e3
] [formerlybca0c8f763
[formerlyeb8363bd42
]]] [formerlya2c98357bc
[formerly9f9de7fa69
] [formerlya0464b7de6
[formerly3766130a24
]] [formerly3ea9371a48
[formerlyb207c05add
] [formerly45043829a6
[formerly34286e1eca
]]]] Former-commit-id:20546a8523
[formerly4c4a4df332
] [formerly1966c0ef91
[formerlyfa8e5393cf
]] [formerlyef2311fcb0
[formerly0c3554978e
] [formerly3399a89945
[formerly4a80a0961a
]]] Former-commit-id:d3d5bd6ddc
[formerlya2ecdd5a7a
] [formerlyf13021e449
[formerly1a182c95e1
]] Former-commit-id:e441e5c7f0
[formerlyeec58f1502
] Former-commit-id:45972a4aa3
@@ -0,0 +1 @@ | |||
Subproject commit af54e6970476a081bf0cd65990c9f56a1200d8a2 |
@@ -0,0 +1 @@ | |||
Subproject commit 046b20d2f6d4543dcbe18f0a1d4bcbb1f61cf518 |
@@ -0,0 +1 @@ | |||
Subproject commit 70aeefed6b7307941581357c4b7858bb3f88e1da |
@@ -170,9 +170,3 @@ class ContinuityValidation(transformer.TransformerPrimitiveBase[Inputs, Outputs, | |||
inputs['d3mIndex'] = list(range(inputs.shape[0])) | |||
return inputs | |||
def _write(self, inputs:Inputs): | |||
""" | |||
write inputs to current directory, only for test | |||
""" | |||
inputs.to_csv(str(time.time())+'.csv') |
@@ -91,8 +91,3 @@ class DuplicationValidation(transformer.TransformerPrimitiveBase[Inputs, Outputs | |||
return inputs | |||
def _write(self, inputs:Inputs): | |||
""" | |||
write inputs to current directory, only for test | |||
""" | |||
inputs.to_csv(str(time.time())+'.csv') |
@@ -169,7 +169,7 @@ class LSTMOutlierDetector(CollectiveBaseDetector): | |||
# print(danger_coefficient, averaged_relative_error) | |||
else: | |||
else: # pragma: no cover | |||
danger_coefficient = np.zeros(relative_error.shape) | |||
averaged_relative_error = np.zeros(relative_error.shape) | |||
@@ -210,7 +210,7 @@ class LSTMOutlierDetector(CollectiveBaseDetector): | |||
if __name__ == "__main__": | |||
if __name__ == "__main__": # pragma: no cover | |||
X_train = np.asarray( | |||
[3., 4., 8., 16, 18, 13., 22., 36., 59., 128, 62, 67, 78, 100]).reshape(-1, 1) | |||
@@ -0,0 +1,376 @@ | |||
from typing import Any, Callable, List, Dict, Union, Optional, Sequence, Tuple | |||
from numpy import ndarray | |||
from collections import OrderedDict | |||
from scipy import sparse | |||
import os | |||
import sklearn | |||
import numpy | |||
import typing | |||
import time | |||
from d3m import container | |||
from d3m.primitive_interfaces import base, transformer | |||
from d3m.metadata import base as metadata_base, hyperparams | |||
from d3m.container.numpy import ndarray as d3m_ndarray | |||
from d3m.container import DataFrame as d3m_dataframe | |||
from d3m.metadata import hyperparams, params, base as metadata_base | |||
from d3m import utils | |||
from d3m.base import utils as base_utils | |||
from d3m.exceptions import PrimitiveNotFittedError | |||
from d3m.primitive_interfaces.base import CallResult, DockerContainer | |||
import os.path | |||
import time | |||
import statsmodels.api as sm | |||
__all__ = ('BKFilter',) | |||
Inputs = container.DataFrame | |||
Outputs = container.DataFrame | |||
class Hyperparams(hyperparams.Hyperparams): | |||
# Tuning | |||
low = hyperparams.UniformInt( | |||
lower=0, | |||
upper=100000000, | |||
default=6, | |||
semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'], | |||
description="Minimum period for oscillations, ie., Baxter and King suggest that the Burns-Mitchell U.S. business cycle has 6 for quarterly data and 1.5 for annual data.", | |||
) | |||
high = hyperparams.UniformInt( | |||
lower=0, | |||
upper=100000000, | |||
default=32, | |||
semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'], | |||
description="Maximum period for oscillations BK suggest that the U.S. business cycle has 32 for quarterly data and 8 for annual data.", | |||
) | |||
K = hyperparams.UniformInt( | |||
lower=0, | |||
upper=100000000, | |||
default=1, | |||
semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'], | |||
description="Lead-lag length of the filter. Baxter and King propose a truncation length of 12 for quarterly data and 3 for annual data.", | |||
) | |||
# Control | |||
columns_using_method= hyperparams.Enumeration( | |||
values=['name', 'index'], | |||
default='index', | |||
semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], | |||
description="Choose to use columns by names or indecies. If 'name', \"use_columns\" or \"exclude_columns\" is used. If 'index', \"use_columns_name\" or \"exclude_columns_name\" is used." | |||
) | |||
use_columns_name = hyperparams.Set( | |||
elements=hyperparams.Hyperparameter[str](''), | |||
default=(), | |||
semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], | |||
description="A set of column names to force primitive to operate on. If any specified column cannot be parsed, it is skipped.", | |||
) | |||
exclude_columns_name = hyperparams.Set( | |||
elements=hyperparams.Hyperparameter[str](''), | |||
default=(), | |||
semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], | |||
description="A set of column names to not operate on. Applicable only if \"use_columns_name\" is not provided.", | |||
) | |||
use_columns = hyperparams.Set( | |||
elements=hyperparams.Hyperparameter[int](-1), | |||
default=(), | |||
semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], | |||
description="A set of column indices to force primitive to operate on. If any specified column cannot be parsed, it is skipped.", | |||
) | |||
exclude_columns = hyperparams.Set( | |||
elements=hyperparams.Hyperparameter[int](-1), | |||
default=(), | |||
semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], | |||
description="A set of column indices to not operate on. Applicable only if \"use_columns\" is not provided.", | |||
) | |||
return_result = hyperparams.Enumeration( | |||
values=['append', 'replace', 'new'], | |||
default='append', | |||
semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], | |||
description="Should parsed columns be appended, should they replace original columns, or should only parsed columns be returned? This hyperparam is ignored if use_semantic_types is set to false.", | |||
) | |||
use_semantic_types = hyperparams.UniformBool( | |||
default=False, | |||
semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], | |||
description="Controls whether semantic_types metadata will be used for filtering columns in input dataframe. Setting this to false makes the code ignore return_result and will produce only the output dataframe" | |||
) | |||
add_index_columns = hyperparams.UniformBool( | |||
default=False, | |||
semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], | |||
description="Also include primary index columns if input data has them. Applicable only if \"return_result\" is set to \"new\".", | |||
) | |||
error_on_no_input = hyperparams.UniformBool( | |||
default=True, | |||
semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], | |||
description="Throw an exception if no input column is selected/provided. Defaults to true to behave like sklearn. To prevent pipelines from breaking set this to False.", | |||
) | |||
return_semantic_type = hyperparams.Enumeration[str]( | |||
values=['https://metadata.datadrivendiscovery.org/types/Attribute', 'https://metadata.datadrivendiscovery.org/types/ConstructedAttribute'], | |||
default='https://metadata.datadrivendiscovery.org/types/Attribute', | |||
description='Decides what semantic type to attach to generated attributes', | |||
semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'] | |||
) | |||
class BKFilter(transformer.TransformerPrimitiveBase[Inputs, Outputs, Hyperparams]): | |||
""" | |||
Filter a time series using the Baxter-King bandpass filter. | |||
Parameters | |||
---------- | |||
low: int | |||
Minimum period for oscillations, ie., Baxter and King suggest that the Burns-Mitchell U.S. business cycle has 6 for quarterly data and 1.5 for annual data. | |||
high: int | |||
Maximum period for oscillations BK suggest that the U.S. business cycle has 32 for quarterly data and 8 for annual data. | |||
K: int | |||
Lead-lag length of the filter. Baxter and King propose a truncation length of 12 for quarterly data and 3 for annual data. | |||
use_columns: Set | |||
A set of column indices to force primitive to operate on. If any specified column cannot be parsed, it is skipped. | |||
exclude_columns: Set | |||
A set of column indices to not operate on. Applicable only if \"use_columns\" is not provided. | |||
return_result: Enumeration | |||
Should parsed columns be appended, should they replace original columns, or should only parsed columns be returned? This hyperparam is ignored if use_semantic_types is set to false. | |||
use_semantic_types: Bool | |||
Controls whether semantic_types metadata will be used for filtering columns in input dataframe. Setting this to false makes the code ignore return_result and will produce only the output dataframe. | |||
add_index_columns: Bool | |||
Also include primary index columns if input data has them. Applicable only if \"return_result\" is set to \"new\". | |||
error_on_no_input: Bool( | |||
Throw an exception if no input column is selected/provided. Defaults to true to behave like sklearn. To prevent pipelines from breaking set this to False. | |||
return_semantic_type: Enumeration[str]( | |||
Decides what semantic type to attach to generated attributes' | |||
""" | |||
__author__: "DATA Lab at Texas A&M University" | |||
metadata = metadata_base.PrimitiveMetadata({ | |||
"name": "Baxter-King Filter Primitive", | |||
"python_path": "d3m.primitives.tods.feature_analysis.bk_filter", | |||
"source": {'name': 'DATA Lab at Texas A&M University', 'contact': 'mailto:khlai037@tamu.edu', | |||
'uris': ['https://gitlab.com/lhenry15/tods.git', 'https://gitlab.com/lhenry15/tods/-/blob/Junjie/anomaly-primitives/anomaly_primitives/DuplicationValidation.py']}, | |||
"algorithm_types": [metadata_base.PrimitiveAlgorithmType.BK_FILTER,], | |||
"primitive_family": metadata_base.PrimitiveFamily.FEATURE_CONSTRUCTION, | |||
"id": "b2bfadc5-dbca-482c-b188-8585e5f245c4", | |||
"hyperparams_to_tune": ['low', 'high', 'K'], | |||
"version": "0.0.1", | |||
}) | |||
def produce(self, *, inputs: Inputs, timeout: float = None, iterations: int = None) -> CallResult[Outputs]: | |||
""" | |||
Process the testing data. | |||
Args: | |||
inputs: Container DataFrame. | |||
Returns: | |||
Container DataFrame after BKFilter. | |||
""" | |||
# Get cols to fit. | |||
self._fitted = False | |||
self._training_inputs, self._training_indices = self._get_columns_to_fit(inputs, self.hyperparams) | |||
self._input_column_names = self._training_inputs.columns | |||
if len(self._training_indices) > 0: | |||
# self._clf.fit(self._training_inputs) | |||
self._fitted = True | |||
else: | |||
if self.hyperparams['error_on_no_input']: | |||
raise RuntimeError("No input columns were selected") | |||
self.logger.warn("No input columns were selected") | |||
if not self._fitted: | |||
raise PrimitiveNotFittedError("Primitive not fitted.") | |||
sk_inputs = inputs | |||
if self.hyperparams['use_semantic_types']: | |||
sk_inputs = inputs.iloc[:, self._training_indices] | |||
output_columns = [] | |||
if len(self._training_indices) > 0: | |||
sk_output = self._bkfilter(sk_inputs, low=self.hyperparams['low'], high=self.hyperparams['high'], K=self.hyperparams['K']) | |||
if sparse.issparse(sk_output): | |||
sk_output = sk_output.toarray() | |||
outputs = self._wrap_predictions(inputs, sk_output) | |||
if len(outputs.columns) == len(self._input_column_names): | |||
outputs.columns = self._input_column_names | |||
output_columns = [outputs] | |||
else: | |||
if self.hyperparams['error_on_no_input']: | |||
raise RuntimeError("No input columns were selected") | |||
self.logger.warn("No input columns were selected") | |||
outputs = base_utils.combine_columns(return_result=self.hyperparams['return_result'], | |||
add_index_columns=self.hyperparams['add_index_columns'], | |||
inputs=inputs, column_indices=self._training_indices, | |||
columns_list=output_columns) | |||
# self._write(outputs) | |||
# self.logger.warning('produce was called3') | |||
return CallResult(outputs) | |||
@classmethod | |||
def _get_columns_to_fit(cls, inputs: Inputs, hyperparams: Hyperparams): | |||
""" | |||
Select columns to fit. | |||
Args: | |||
inputs: Container DataFrame | |||
hyperparams: d3m.metadata.hyperparams.Hyperparams | |||
Returns: | |||
list | |||
""" | |||
if not hyperparams['use_semantic_types']: | |||
return inputs, list(range(len(inputs.columns))) | |||
inputs_metadata = inputs.metadata | |||
def can_produce_column(column_index: int) -> bool: | |||
return cls._can_produce_column(inputs_metadata, column_index, hyperparams) | |||
use_columns = [] | |||
exclude_columns = [] | |||
# if hyperparams['columns_using_method'] == 'name': | |||
# inputs_cols = inputs.columns.values.tolist() | |||
# for i in range(len(inputs_cols)): | |||
# if inputs_cols[i] in hyperparams['use_columns_name']: | |||
# use_columns.append(i) | |||
# elif inputs_cols[i] in hyperparams['exclude_columns_name']: | |||
# exclude_columns.append(i) | |||
# else: | |||
use_columns=hyperparams['use_columns'] | |||
exclude_columns=hyperparams['exclude_columns'] | |||
columns_to_produce, columns_not_to_produce = base_utils.get_columns_to_use(inputs_metadata, use_columns=use_columns, exclude_columns=exclude_columns, can_use_column=can_produce_column) | |||
return inputs.iloc[:, columns_to_produce], columns_to_produce | |||
# return columns_to_produce | |||
@classmethod | |||
def _can_produce_column(cls, inputs_metadata: metadata_base.DataMetadata, column_index: int, hyperparams: Hyperparams) -> bool: | |||
""" | |||
Output whether a column can be processed. | |||
Args: | |||
inputs_metadata: d3m.metadata.base.DataMetadata | |||
column_index: int | |||
Returns: | |||
bool | |||
""" | |||
column_metadata = inputs_metadata.query((metadata_base.ALL_ELEMENTS, column_index)) | |||
accepted_structural_types = (int, float, numpy.integer, numpy.float64) | |||
accepted_semantic_types = set() | |||
accepted_semantic_types.add("https://metadata.datadrivendiscovery.org/types/Attribute") | |||
if not issubclass(column_metadata['structural_type'], accepted_structural_types): | |||
return False | |||
semantic_types = set(column_metadata.get('semantic_types', [])) | |||
if len(semantic_types) == 0: | |||
cls.logger.warning("No semantic types found in column metadata") | |||
return False | |||
# Making sure all accepted_semantic_types are available in semantic_types | |||
if len(accepted_semantic_types - semantic_types) == 0: | |||
return True | |||
return False | |||
@classmethod | |||
def _update_predictions_metadata(cls, inputs_metadata: metadata_base.DataMetadata, outputs: Optional[Outputs], | |||
target_columns_metadata: List[OrderedDict]) -> metadata_base.DataMetadata: | |||
""" | |||
Updata metadata for selected columns. | |||
Args: | |||
inputs_metadata: metadata_base.DataMetadata | |||
outputs: Container Dataframe | |||
target_columns_metadata: list | |||
Returns: | |||
d3m.metadata.base.DataMetadata | |||
""" | |||
outputs_metadata = metadata_base.DataMetadata().generate(value=outputs) | |||
for column_index, column_metadata in enumerate(target_columns_metadata): | |||
column_metadata.pop("structural_type", None) | |||
outputs_metadata = outputs_metadata.update_column(column_index, column_metadata) | |||
return outputs_metadata | |||
def _wrap_predictions(self, inputs: Inputs, predictions: ndarray) -> Outputs: | |||
""" | |||
Wrap predictions into dataframe | |||
Args: | |||
inputs: Container Dataframe | |||
predictions: array-like data (n_samples, n_features) | |||
Returns: | |||
Dataframe | |||
""" | |||
outputs = d3m_dataframe(predictions, generate_metadata=True) | |||
target_columns_metadata = self._add_target_columns_metadata(outputs.metadata, self.hyperparams) | |||
outputs.metadata = self._update_predictions_metadata(inputs.metadata, outputs, target_columns_metadata) | |||
return outputs | |||
@classmethod | |||
def _add_target_columns_metadata(cls, outputs_metadata: metadata_base.DataMetadata, hyperparams): | |||
""" | |||
Add target columns metadata | |||
Args: | |||
outputs_metadata: metadata.base.DataMetadata | |||
hyperparams: d3m.metadata.hyperparams.Hyperparams | |||
Returns: | |||
List[OrderedDict] | |||
""" | |||
outputs_length = outputs_metadata.query((metadata_base.ALL_ELEMENTS,))['dimension']['length'] | |||
target_columns_metadata: List[OrderedDict] = [] | |||
for column_index in range(outputs_length): | |||
column_name = "output_{}".format(column_index) | |||
column_metadata = OrderedDict() | |||
semantic_types = set() | |||
semantic_types.add(hyperparams["return_semantic_type"]) | |||
column_metadata['semantic_types'] = list(semantic_types) | |||
column_metadata["name"] = str(column_name) | |||
target_columns_metadata.append(column_metadata) | |||
return target_columns_metadata | |||
def _write(self, inputs:Inputs): | |||
inputs.to_csv(str(time.time())+'.csv') | |||
def _bkfilter(self, X, low, high, K): | |||
""" | |||
Perform BKFilter | |||
Args: | |||
X: slected rows to be performed | |||
K, low, high: Parameters of BKFilter | |||
Returns: | |||
Dataframe, results of BKFilter | |||
""" | |||
transformed_X = utils.pandas.DataFrame() | |||
for col in X.columns: | |||
cycle = sm.tsa.filters.bkfilter(X[col], low=low, high=high, K=K) | |||
cycle_df = utils.pandas.DataFrame(cycle) | |||
transformed_X = utils.pandas.concat([transformed_X,cycle_df], axis=1) | |||
return transformed_X |
@@ -163,14 +163,14 @@ class HPFilter(transformer.TransformerPrimitiveBase[Inputs, Outputs, Hyperparams | |||
if len(self._training_indices) > 0: | |||
# self._clf.fit(self._training_inputs) | |||
self._fitted = True | |||
else: | |||
else: # pragma: no cover | |||
if self.hyperparams['error_on_no_input']: | |||
raise RuntimeError("No input columns were selected") | |||
self.logger.warn("No input columns were selected") | |||
if not self._fitted: | |||
if not self._fitted: # pragma: no cover | |||
raise PrimitiveNotFittedError("Primitive not fitted.") | |||
sk_inputs = inputs | |||
if self.hyperparams['use_semantic_types']: | |||
@@ -186,7 +186,7 @@ class HPFilter(transformer.TransformerPrimitiveBase[Inputs, Outputs, Hyperparams | |||
outputs.columns = self._input_column_names | |||
output_columns = [outputs] | |||
else: | |||
else: # pragma: no cover | |||
if self.hyperparams['error_on_no_input']: | |||
raise RuntimeError("No input columns were selected") | |||
self.logger.warn("No input columns were selected") | |||
@@ -194,14 +194,11 @@ class HPFilter(transformer.TransformerPrimitiveBase[Inputs, Outputs, Hyperparams | |||
add_index_columns=self.hyperparams['add_index_columns'], | |||
inputs=inputs, column_indices=self._training_indices, | |||
columns_list=output_columns) | |||
# self._write(outputs) | |||
# self.logger.warning('produce was called3') | |||
return CallResult(outputs) | |||
@classmethod | |||
def _get_columns_to_fit(cls, inputs: Inputs, hyperparams: Hyperparams): | |||
def _get_columns_to_fit(cls, inputs: Inputs, hyperparams: Hyperparams): # pragma: no cover | |||
""" | |||
Select columns to fit. | |||
Args: | |||
@@ -238,7 +235,7 @@ class HPFilter(transformer.TransformerPrimitiveBase[Inputs, Outputs, Hyperparams | |||
# return columns_to_produce | |||
@classmethod | |||
def _can_produce_column(cls, inputs_metadata: metadata_base.DataMetadata, column_index: int, hyperparams: Hyperparams) -> bool: | |||
def _can_produce_column(cls, inputs_metadata: metadata_base.DataMetadata, column_index: int, hyperparams: Hyperparams) -> bool: # pragma: no cover | |||
""" | |||
Output whether a column can be processed. | |||
Args: | |||
@@ -331,9 +328,6 @@ class HPFilter(transformer.TransformerPrimitiveBase[Inputs, Outputs, Hyperparams | |||
return target_columns_metadata | |||
def _write(self, inputs:Inputs): | |||
inputs.to_csv(str(time.time())+'.csv') | |||
def _hpfilter(self, X, lamb): | |||
""" | |||
Perform HPFilter | |||
@@ -224,7 +224,7 @@ class SKTruncatedSVD(UnsupervisedLearnerPrimitiveBase[Inputs, Outputs, Params, H | |||
Returns: | |||
None | |||
""" | |||
if self._fitted: | |||
if self._fitted: # pragma: no cover | |||
return CallResult(None) | |||
# Get cols to fit. | |||
@@ -239,7 +239,7 @@ class SKTruncatedSVD(UnsupervisedLearnerPrimitiveBase[Inputs, Outputs, Params, H | |||
if len(self._training_indices) > 0: | |||
self._clf.fit(self._training_inputs) | |||
self._fitted = True | |||
else: | |||
else: # pragma: no cover | |||
if self.hyperparams['error_on_no_input']: | |||
raise RuntimeError("No input columns were selected") | |||
self.logger.warn("No input columns were selected") | |||
@@ -257,7 +257,7 @@ class SKTruncatedSVD(UnsupervisedLearnerPrimitiveBase[Inputs, Outputs, Params, H | |||
# self.logger.warning(str(self.metadata.query()['name'])) | |||
if not self._fitted: | |||
if not self._fitted: # pragma: no cover | |||
raise PrimitiveNotFittedError("Primitive not fitted.") | |||
sk_inputs = inputs | |||
if self.hyperparams['use_semantic_types']: | |||
@@ -272,7 +272,7 @@ class SKTruncatedSVD(UnsupervisedLearnerPrimitiveBase[Inputs, Outputs, Params, H | |||
if len(outputs.columns) == len(self._input_column_names): | |||
outputs.columns = self._input_column_names | |||
output_columns = [outputs] | |||
else: | |||
else: # pragma: no cover | |||
if self.hyperparams['error_on_no_input']: | |||
raise RuntimeError("No input columns were selected") | |||
self.logger.warn("No input columns were selected") | |||
@@ -286,7 +286,7 @@ class SKTruncatedSVD(UnsupervisedLearnerPrimitiveBase[Inputs, Outputs, Params, H | |||
return CallResult(outputs) | |||
def get_params(self) -> Params: | |||
def get_params(self) -> Params: # pragma: no cover | |||
""" | |||
Return parameters. | |||
Args: | |||
@@ -320,7 +320,7 @@ class SKTruncatedSVD(UnsupervisedLearnerPrimitiveBase[Inputs, Outputs, Params, H | |||
target_columns_metadata_=self._target_columns_metadata | |||
) | |||
def set_params(self, *, params: Params) -> None: | |||
def set_params(self, *, params: Params) -> None: # pragma: no cover | |||
""" | |||
Set parameters for SKTruncatedSVD. | |||
Args: | |||
@@ -351,7 +351,7 @@ class SKTruncatedSVD(UnsupervisedLearnerPrimitiveBase[Inputs, Outputs, Params, H | |||
@classmethod | |||
def _get_columns_to_fit(cls, inputs: Inputs, hyperparams: Hyperparams): | |||
def _get_columns_to_fit(cls, inputs: Inputs, hyperparams: Hyperparams): # pragma: no cover | |||
""" | |||
Select columns to fit. | |||
Args: | |||
@@ -377,7 +377,7 @@ class SKTruncatedSVD(UnsupervisedLearnerPrimitiveBase[Inputs, Outputs, Params, H | |||
# return columns_to_produce | |||
@classmethod | |||
def _can_produce_column(cls, inputs_metadata: metadata_base.DataMetadata, column_index: int, hyperparams: Hyperparams) -> bool: | |||
def _can_produce_column(cls, inputs_metadata: metadata_base.DataMetadata, column_index: int, hyperparams: Hyperparams) -> bool: # pragma: no cover | |||
""" | |||
Output whether a column can be processed. | |||
Args: | |||
@@ -408,35 +408,35 @@ class SKTruncatedSVD(UnsupervisedLearnerPrimitiveBase[Inputs, Outputs, Params, H | |||
return False | |||
@classmethod | |||
def _get_target_columns_metadata(cls, outputs_metadata: metadata_base.DataMetadata, hyperparams) -> List[OrderedDict]: | |||
""" | |||
Output metadata of selected columns. | |||
Args: | |||
outputs_metadata: metadata_base.DataMetadata | |||
hyperparams: d3m.metadata.hyperparams.Hyperparams | |||
Returns: | |||
d3m.metadata.base.DataMetadata | |||
""" | |||
outputs_length = outputs_metadata.query((metadata_base.ALL_ELEMENTS,))['dimension']['length'] | |||
target_columns_metadata: List[OrderedDict] = [] | |||
for column_index in range(outputs_length): | |||
column_metadata = OrderedDict(outputs_metadata.query_column(column_index)) | |||
# Update semantic types and prepare it for predicted targets. | |||
semantic_types = set(column_metadata.get('semantic_types', [])) | |||
semantic_types_to_remove = set([]) | |||
add_semantic_types = [] | |||
add_semantic_types.add(hyperparams["return_semantic_type"]) | |||
semantic_types = semantic_types - semantic_types_to_remove | |||
semantic_types = semantic_types.union(add_semantic_types) | |||
column_metadata['semantic_types'] = list(semantic_types) | |||
target_columns_metadata.append(column_metadata) | |||
return target_columns_metadata | |||
# @classmethod | |||
# def _get_target_columns_metadata(cls, outputs_metadata: metadata_base.DataMetadata, hyperparams) -> List[OrderedDict]: | |||
# """ | |||
# Output metadata of selected columns. | |||
# Args: | |||
# outputs_metadata: metadata_base.DataMetadata | |||
# hyperparams: d3m.metadata.hyperparams.Hyperparams | |||
# Returns: | |||
# d3m.metadata.base.DataMetadata | |||
# """ | |||
# outputs_length = outputs_metadata.query((metadata_base.ALL_ELEMENTS,))['dimension']['length'] | |||
# target_columns_metadata: List[OrderedDict] = [] | |||
# for column_index in range(outputs_length): | |||
# column_metadata = OrderedDict(outputs_metadata.query_column(column_index)) | |||
# # Update semantic types and prepare it for predicted targets. | |||
# semantic_types = set(column_metadata.get('semantic_types', [])) | |||
# semantic_types_to_remove = set([]) | |||
# add_semantic_types = [] | |||
# add_semantic_types.add(hyperparams["return_semantic_type"]) | |||
# semantic_types = semantic_types - semantic_types_to_remove | |||
# semantic_types = semantic_types.union(add_semantic_types) | |||
# column_metadata['semantic_types'] = list(semantic_types) | |||
# target_columns_metadata.append(column_metadata) | |||
# return target_columns_metadata | |||
@classmethod | |||
def _update_predictions_metadata(cls, inputs_metadata: metadata_base.DataMetadata, outputs: Optional[Outputs], | |||
@@ -500,11 +500,3 @@ class SKTruncatedSVD(UnsupervisedLearnerPrimitiveBase[Inputs, Outputs, Params, H | |||
return target_columns_metadata | |||
def _write(self, inputs:Inputs): | |||
""" | |||
write inputs to current directory, only for test | |||
""" | |||
inputs.to_csv(str(time.time())+'.csv') | |||
# SKTruncatedSVD.__doc__ = TruncatedSVD.__doc__ |
@@ -276,14 +276,14 @@ class TRMF(transformer.TransformerPrimitiveBase[Inputs, Outputs, Hyperparams]): | |||
if len(self._training_indices) > 0: | |||
self._clf.fit(self._training_inputs) | |||
self._fitted = True | |||
else: | |||
else: # pragma: no cover | |||
if self.hyperparams['error_on_no_input']: | |||
raise RuntimeError("No input columns were selected") | |||
self.logger.warn("No input columns were selected") | |||
if not self._fitted: | |||
if not self._fitted: # pragma: no cover | |||
raise PrimitiveNotFittedError("Primitive not fitted.") | |||
sk_inputs = inputs | |||
@@ -301,7 +301,7 @@ class TRMF(transformer.TransformerPrimitiveBase[Inputs, Outputs, Hyperparams]): | |||
if len(outputs.columns) == len(self._input_column_names): | |||
outputs.columns = self._input_column_names | |||
output_columns = [outputs] | |||
else: | |||
else: # pragma: no cover | |||
if self.hyperparams['error_on_no_input']: | |||
raise RuntimeError("No input columns were selected") | |||
self.logger.warn("No input columns were selected") | |||
@@ -316,7 +316,7 @@ class TRMF(transformer.TransformerPrimitiveBase[Inputs, Outputs, Hyperparams]): | |||
@classmethod | |||
def _get_columns_to_fit(cls, inputs: Inputs, hyperparams: Hyperparams): | |||
def _get_columns_to_fit(cls, inputs: Inputs, hyperparams: Hyperparams): # pragma: no cover | |||
""" | |||
Select columns to fit. | |||
Args: | |||
@@ -342,7 +342,7 @@ class TRMF(transformer.TransformerPrimitiveBase[Inputs, Outputs, Hyperparams]): | |||
# return columns_to_produce | |||
@classmethod | |||
def _can_produce_column(cls, inputs_metadata: metadata_base.DataMetadata, column_index: int, hyperparams: Hyperparams) -> bool: | |||
def _can_produce_column(cls, inputs_metadata: metadata_base.DataMetadata, column_index: int, hyperparams: Hyperparams) -> bool: # pragma: no cover | |||
""" | |||
Output whether a column can be processed. | |||
Args: | |||
@@ -373,35 +373,35 @@ class TRMF(transformer.TransformerPrimitiveBase[Inputs, Outputs, Hyperparams]): | |||
return False | |||
@classmethod | |||
def _get_target_columns_metadata(cls, outputs_metadata: metadata_base.DataMetadata, hyperparams) -> List[OrderedDict]: | |||
""" | |||
Output metadata of selected columns. | |||
Args: | |||
outputs_metadata: metadata_base.DataMetadata | |||
hyperparams: d3m.metadata.hyperparams.Hyperparams | |||
Returns: | |||
d3m.metadata.base.DataMetadata | |||
""" | |||
outputs_length = outputs_metadata.query((metadata_base.ALL_ELEMENTS,))['dimension']['length'] | |||
target_columns_metadata: List[OrderedDict] = [] | |||
for column_index in range(outputs_length): | |||
column_metadata = OrderedDict(outputs_metadata.query_column(column_index)) | |||
# Update semantic types and prepare it for predicted targets. | |||
semantic_types = set(column_metadata.get('semantic_types', [])) | |||
semantic_types_to_remove = set([]) | |||
add_semantic_types = [] | |||
add_semantic_types.add(hyperparams["return_semantic_type"]) | |||
semantic_types = semantic_types - semantic_types_to_remove | |||
semantic_types = semantic_types.union(add_semantic_types) | |||
column_metadata['semantic_types'] = list(semantic_types) | |||
target_columns_metadata.append(column_metadata) | |||
return target_columns_metadata | |||
# @classmethod | |||
# def _get_target_columns_metadata(cls, outputs_metadata: metadata_base.DataMetadata, hyperparams) -> List[OrderedDict]: | |||
# """ | |||
# Output metadata of selected columns. | |||
# Args: | |||
# outputs_metadata: metadata_base.DataMetadata | |||
# hyperparams: d3m.metadata.hyperparams.Hyperparams | |||
# Returns: | |||
# d3m.metadata.base.DataMetadata | |||
# """ | |||
# outputs_length = outputs_metadata.query((metadata_base.ALL_ELEMENTS,))['dimension']['length'] | |||
# target_columns_metadata: List[OrderedDict] = [] | |||
# for column_index in range(outputs_length): | |||
# column_metadata = OrderedDict(outputs_metadata.query_column(column_index)) | |||
# # Update semantic types and prepare it for predicted targets. | |||
# semantic_types = set(column_metadata.get('semantic_types', [])) | |||
# semantic_types_to_remove = set([]) | |||
# add_semantic_types = [] | |||
# add_semantic_types.add(hyperparams["return_semantic_type"]) | |||
# semantic_types = semantic_types - semantic_types_to_remove | |||
# semantic_types = semantic_types.union(add_semantic_types) | |||
# column_metadata['semantic_types'] = list(semantic_types) | |||
# target_columns_metadata.append(column_metadata) | |||
# return target_columns_metadata | |||
@classmethod | |||
def _update_predictions_metadata(cls, inputs_metadata: metadata_base.DataMetadata, outputs: Optional[Outputs], | |||
@@ -465,12 +465,6 @@ class TRMF(transformer.TransformerPrimitiveBase[Inputs, Outputs, Hyperparams]): | |||
return target_columns_metadata | |||
def _write(self, inputs:Inputs): | |||
""" | |||
write inputs to current directory, only for test | |||
""" | |||
inputs.to_csv(str(time.time())+'.csv') | |||
""" | |||
Temporal Regularized Matrix Factorization | |||
@@ -564,7 +558,7 @@ class trmf: | |||
return np.dot(self.F, X_preds) | |||
def _predict_X(self, h): | |||
def _predict_X(self, h): # pragma: no cover | |||
"""Predict X h timepoints ahead. | |||
Evaluates matrix X with the help of matrix W. | |||
@@ -59,7 +59,7 @@ class ContinuityValidationTest(unittest.TestCase): | |||
hyperparams_class = ContinuityValidation.ContinuityValidation.metadata.get_hyperparams() | |||
primitive = ContinuityValidation.ContinuityValidation(hyperparams=hyperparams_class.defaults()) | |||
new_main = primitive.produce(inputs=main).value | |||
# print(new_main) | |||
expected_output = container.DataFrame({'d3mIndex': [0, 1, 2, 3], | |||
'timestamp': [1., 2., 3., 4.], | |||
@@ -124,6 +124,67 @@ class ContinuityValidationTest(unittest.TestCase): | |||
self._test_continuity(new_main) | |||
hyperparams = hyperparams_class.defaults() | |||
hyperparams = hyperparams.replace({'continuity_option': 'ablation'}) | |||
primitive2 = ContinuityValidation.ContinuityValidation(hyperparams=hyperparams) | |||
new_main2 = primitive2.produce(inputs=main).value | |||
print(new_main2) | |||
self.assertEqual(utils.to_json_structure(new_main2.metadata.to_internal_simple_structure()), [{ | |||
'selector': [], | |||
'metadata': { | |||
# 'top_level': 'main', | |||
'schema': metadata_base.CONTAINER_SCHEMA_VERSION, | |||
'structural_type': 'd3m.container.pandas.DataFrame', | |||
'semantic_types': ['https://metadata.datadrivendiscovery.org/types/Table'], | |||
'dimension': { | |||
'name': 'rows', | |||
'semantic_types': ['https://metadata.datadrivendiscovery.org/types/TabularRow'], | |||
'length': 2, | |||
}, | |||
}, | |||
}, { | |||
'selector': ['__ALL_ELEMENTS__'], | |||
'metadata': { | |||
'dimension': { | |||
'name': 'columns', | |||
'semantic_types': ['https://metadata.datadrivendiscovery.org/types/TabularColumn'], | |||
'length': 5, | |||
}, | |||
}, | |||
}, { | |||
'selector': ['__ALL_ELEMENTS__', 0], | |||
'metadata': { | |||
'name': 'd3mIndex', | |||
'structural_type': 'numpy.int64', | |||
}, | |||
}, { | |||
'selector': ['__ALL_ELEMENTS__', 1], | |||
'metadata': { | |||
'name': 'timestamp', | |||
'structural_type': 'numpy.float64', | |||
}, | |||
}, { | |||
'selector': ['__ALL_ELEMENTS__', 2], | |||
'metadata': { | |||
'name': 'a', | |||
'structural_type': 'numpy.float64', | |||
}, | |||
}, { | |||
'selector': ['__ALL_ELEMENTS__', 3], | |||
'metadata': { | |||
'name': 'b', | |||
'structural_type': 'numpy.float64', | |||
}, | |||
}, { | |||
'selector': ['__ALL_ELEMENTS__', 4], | |||
'metadata': { | |||
'name': 'ground_truth', | |||
'structural_type': 'numpy.int64', | |||
}, | |||
}]) | |||
def _test_continuity(self, data_value): | |||
tmp_col = data_value['timestamp'] | |||
@@ -102,6 +102,54 @@ class DuplicationValidationTest(unittest.TestCase): | |||
self._test_drop_duplication(new_main) | |||
hyperparams = hyperparams_class.defaults() | |||
hyperparams = hyperparams.replace({'keep_option': 'average'}) | |||
primitive2 = DuplicationValidation.DuplicationValidation(hyperparams=hyperparams) | |||
new_main2 = primitive2.produce(inputs=main).value | |||
print(new_main2) | |||
self.assertEqual(utils.to_json_structure(new_main.metadata.to_internal_simple_structure()), [{ | |||
'selector': [], | |||
'metadata': { | |||
# 'top_level': 'main', | |||
'schema': metadata_base.CONTAINER_SCHEMA_VERSION, | |||
'structural_type': 'd3m.container.pandas.DataFrame', | |||
'semantic_types': ['https://metadata.datadrivendiscovery.org/types/Table'], | |||
'dimension': { | |||
'name': 'rows', | |||
'semantic_types': ['https://metadata.datadrivendiscovery.org/types/TabularRow'], | |||
'length': 2, | |||
}, | |||
}, | |||
}, { | |||
'selector': ['__ALL_ELEMENTS__'], | |||
'metadata': { | |||
'dimension': { | |||
'name': 'columns', | |||
'semantic_types': ['https://metadata.datadrivendiscovery.org/types/TabularColumn'], | |||
'length': 3, | |||
}, | |||
}, | |||
}, { | |||
'selector': ['__ALL_ELEMENTS__', 0], | |||
'metadata': { | |||
'name': 'timestamp', | |||
'structural_type': 'numpy.float64', | |||
}, | |||
}, { | |||
'selector': ['__ALL_ELEMENTS__', 1], | |||
'metadata': { | |||
'name': 'a', | |||
'structural_type': 'numpy.float64', | |||
}, | |||
}, { | |||
'selector': ['__ALL_ELEMENTS__', 2], | |||
'metadata': { | |||
'name': 'b', | |||
'structural_type': 'numpy.float64', | |||
}, | |||
}]) | |||
def _test_drop_duplication(self, data_value): | |||
self.assertEqual(True in list(data_value.duplicated('timestamp')), False) | |||