fixed merge conflict on BKFilter Former-commit-id:master268b810550
[formerlyc533dc41f0
] [formerly4b7350332a
[formerly91a1dfa29a
]] [formerlyf0ac3f1dfc
[formerly75cc47630f
] [formerly97c0ab1f2c
[formerly244269481f
]]] [formerly407f577123
[formerlya5bea99500
] [formerlyd90dd7a5df
[formerlyf3f7ff9cbe
]] [formerlyf6b79ee96a
[formerly2b0eaf8f26
] [formerlydcb34f9924
[formerlycbcac31a7c
]]]] [formerly276805fd92
[formerly3ffa9cc927
] [formerly11750432d5
[formerly63492199be
]] [formerlyf225a4d340
[formerly34ffc73c25
] [formerlycfe8553fb1
[formerly52b80cba4d
]]] [formerly2045f00d84
[formerlyeb41c739de
] [formerlybae9207571
[formerly7de12dda8a
]] [formerly6c97b93ebc
[formerly5a75188044
] [formerly5f0d45454c
[formerly685cdb979b
]]]]] [formerlydde7e94bb7
[formerlyf6b5266236
] [formerly8cf5ebc7a8
[formerly5c9dd9ae27
]] [formerly8dd7e6467a
[formerly5bd4d0fb0b
] [formerly916e0b6eca
[formerly41e55088dc
]]] [formerlyfb04845556
[formerlyd8b444e0ed
] [formerlyaec60bbd3e
[formerly8d1c160a9a
]] [formerly1943bcb1c9
[formerly2d3cc48fc4
] [formerly58e75575a6
[formerly458469ece2
]]]] [formerlyaa010466ae
[formerlydf086a293d
] [formerly813ce80e0a
[formerlyfb3a25caf0
]] [formerly3b85298123
[formerly148fea965b
] [formerly4ac043cac1
[formerly47dead1fad
]]] [formerly6c799a58ff
[formerlyc0b13c8e50
] [formerly9a4bd12e2e
[formerlyc0dd11a023
]] [formerly1f8c1e3bca
[formerly04db072fa0
] [formerly01d0d8d6a4
[formerly52c56a7453
]]]]]] Former-commit-id:28c0c1bc62
[formerlyca095e2fa2
] [formerly7c73154b90
[formerly22ba8c7e21
]] [formerly56b42d344d
[formerly0deec139cb
] [formerly140524a81c
[formerlye05dfbd701
]]] [formerlyff57ce3469
[formerly8bd9bebc7d
] [formerly5eece021d1
[formerlyab504afde5
]] [formerly16414f8e84
[formerly427bec8abb
] [formerlya92733bafe
[formerly7f9779e073
]]]] [formerly7c47a5ecf4
[formerly003210baa6
] [formerly7235d0ecbb
[formerlyf1d9089804
]] [formerlyaf042a5779
[formerlyd93b0274af
] [formerly651c0e5913
[formerly80a23c8a08
]]] [formerlyc0360a3f39
[formerlye860588e5c
] [formerly92cd46b9b2
[formerly1bb0313b8c
]] [formerlye8cf2fc680
[formerlydaeb7f052e
] [formerly01d0d8d6a4
]]]] Former-commit-id:5a14b9b6c9
[formerlyf6940aff9f
] [formerlyb07d1f51e9
[formerlydad55b1667
]] [formerly2e97a29a05
[formerlyc507fda4e3
] [formerlybca0c8f763
[formerlyeb8363bd42
]]] [formerlya2c98357bc
[formerly9f9de7fa69
] [formerlya0464b7de6
[formerly3766130a24
]] [formerly3ea9371a48
[formerlyb207c05add
] [formerly45043829a6
[formerly34286e1eca
]]]] Former-commit-id:20546a8523
[formerly4c4a4df332
] [formerly1966c0ef91
[formerlyfa8e5393cf
]] [formerlyef2311fcb0
[formerly0c3554978e
] [formerly3399a89945
[formerly4a80a0961a
]]] Former-commit-id:d3d5bd6ddc
[formerlya2ecdd5a7a
] [formerlyf13021e449
[formerly1a182c95e1
]] Former-commit-id:e441e5c7f0
[formerlyeec58f1502
] Former-commit-id:45972a4aa3
@@ -0,0 +1 @@ | |||||
Subproject commit af54e6970476a081bf0cd65990c9f56a1200d8a2 |
@@ -0,0 +1 @@ | |||||
Subproject commit 046b20d2f6d4543dcbe18f0a1d4bcbb1f61cf518 |
@@ -0,0 +1 @@ | |||||
Subproject commit 70aeefed6b7307941581357c4b7858bb3f88e1da |
@@ -170,9 +170,3 @@ class ContinuityValidation(transformer.TransformerPrimitiveBase[Inputs, Outputs, | |||||
inputs['d3mIndex'] = list(range(inputs.shape[0])) | inputs['d3mIndex'] = list(range(inputs.shape[0])) | ||||
return inputs | return inputs | ||||
def _write(self, inputs:Inputs): | |||||
""" | |||||
write inputs to current directory, only for test | |||||
""" | |||||
inputs.to_csv(str(time.time())+'.csv') |
@@ -91,8 +91,3 @@ class DuplicationValidation(transformer.TransformerPrimitiveBase[Inputs, Outputs | |||||
return inputs | return inputs | ||||
def _write(self, inputs:Inputs): | |||||
""" | |||||
write inputs to current directory, only for test | |||||
""" | |||||
inputs.to_csv(str(time.time())+'.csv') |
@@ -169,7 +169,7 @@ class LSTMOutlierDetector(CollectiveBaseDetector): | |||||
# print(danger_coefficient, averaged_relative_error) | # print(danger_coefficient, averaged_relative_error) | ||||
else: | |||||
else: # pragma: no cover | |||||
danger_coefficient = np.zeros(relative_error.shape) | danger_coefficient = np.zeros(relative_error.shape) | ||||
averaged_relative_error = np.zeros(relative_error.shape) | averaged_relative_error = np.zeros(relative_error.shape) | ||||
@@ -210,7 +210,7 @@ class LSTMOutlierDetector(CollectiveBaseDetector): | |||||
if __name__ == "__main__": | |||||
if __name__ == "__main__": # pragma: no cover | |||||
X_train = np.asarray( | X_train = np.asarray( | ||||
[3., 4., 8., 16, 18, 13., 22., 36., 59., 128, 62, 67, 78, 100]).reshape(-1, 1) | [3., 4., 8., 16, 18, 13., 22., 36., 59., 128, 62, 67, 78, 100]).reshape(-1, 1) | ||||
@@ -0,0 +1,376 @@ | |||||
from typing import Any, Callable, List, Dict, Union, Optional, Sequence, Tuple | |||||
from numpy import ndarray | |||||
from collections import OrderedDict | |||||
from scipy import sparse | |||||
import os | |||||
import sklearn | |||||
import numpy | |||||
import typing | |||||
import time | |||||
from d3m import container | |||||
from d3m.primitive_interfaces import base, transformer | |||||
from d3m.metadata import base as metadata_base, hyperparams | |||||
from d3m.container.numpy import ndarray as d3m_ndarray | |||||
from d3m.container import DataFrame as d3m_dataframe | |||||
from d3m.metadata import hyperparams, params, base as metadata_base | |||||
from d3m import utils | |||||
from d3m.base import utils as base_utils | |||||
from d3m.exceptions import PrimitiveNotFittedError | |||||
from d3m.primitive_interfaces.base import CallResult, DockerContainer | |||||
import os.path | |||||
import time | |||||
import statsmodels.api as sm | |||||
__all__ = ('BKFilter',) | |||||
Inputs = container.DataFrame | |||||
Outputs = container.DataFrame | |||||
class Hyperparams(hyperparams.Hyperparams): | |||||
# Tuning | |||||
low = hyperparams.UniformInt( | |||||
lower=0, | |||||
upper=100000000, | |||||
default=6, | |||||
semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'], | |||||
description="Minimum period for oscillations, ie., Baxter and King suggest that the Burns-Mitchell U.S. business cycle has 6 for quarterly data and 1.5 for annual data.", | |||||
) | |||||
high = hyperparams.UniformInt( | |||||
lower=0, | |||||
upper=100000000, | |||||
default=32, | |||||
semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'], | |||||
description="Maximum period for oscillations BK suggest that the U.S. business cycle has 32 for quarterly data and 8 for annual data.", | |||||
) | |||||
K = hyperparams.UniformInt( | |||||
lower=0, | |||||
upper=100000000, | |||||
default=1, | |||||
semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'], | |||||
description="Lead-lag length of the filter. Baxter and King propose a truncation length of 12 for quarterly data and 3 for annual data.", | |||||
) | |||||
# Control | |||||
columns_using_method= hyperparams.Enumeration( | |||||
values=['name', 'index'], | |||||
default='index', | |||||
semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], | |||||
description="Choose to use columns by names or indecies. If 'name', \"use_columns\" or \"exclude_columns\" is used. If 'index', \"use_columns_name\" or \"exclude_columns_name\" is used." | |||||
) | |||||
use_columns_name = hyperparams.Set( | |||||
elements=hyperparams.Hyperparameter[str](''), | |||||
default=(), | |||||
semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], | |||||
description="A set of column names to force primitive to operate on. If any specified column cannot be parsed, it is skipped.", | |||||
) | |||||
exclude_columns_name = hyperparams.Set( | |||||
elements=hyperparams.Hyperparameter[str](''), | |||||
default=(), | |||||
semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], | |||||
description="A set of column names to not operate on. Applicable only if \"use_columns_name\" is not provided.", | |||||
) | |||||
use_columns = hyperparams.Set( | |||||
elements=hyperparams.Hyperparameter[int](-1), | |||||
default=(), | |||||
semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], | |||||
description="A set of column indices to force primitive to operate on. If any specified column cannot be parsed, it is skipped.", | |||||
) | |||||
exclude_columns = hyperparams.Set( | |||||
elements=hyperparams.Hyperparameter[int](-1), | |||||
default=(), | |||||
semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], | |||||
description="A set of column indices to not operate on. Applicable only if \"use_columns\" is not provided.", | |||||
) | |||||
return_result = hyperparams.Enumeration( | |||||
values=['append', 'replace', 'new'], | |||||
default='append', | |||||
semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], | |||||
description="Should parsed columns be appended, should they replace original columns, or should only parsed columns be returned? This hyperparam is ignored if use_semantic_types is set to false.", | |||||
) | |||||
use_semantic_types = hyperparams.UniformBool( | |||||
default=False, | |||||
semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], | |||||
description="Controls whether semantic_types metadata will be used for filtering columns in input dataframe. Setting this to false makes the code ignore return_result and will produce only the output dataframe" | |||||
) | |||||
add_index_columns = hyperparams.UniformBool( | |||||
default=False, | |||||
semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], | |||||
description="Also include primary index columns if input data has them. Applicable only if \"return_result\" is set to \"new\".", | |||||
) | |||||
error_on_no_input = hyperparams.UniformBool( | |||||
default=True, | |||||
semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], | |||||
description="Throw an exception if no input column is selected/provided. Defaults to true to behave like sklearn. To prevent pipelines from breaking set this to False.", | |||||
) | |||||
return_semantic_type = hyperparams.Enumeration[str]( | |||||
values=['https://metadata.datadrivendiscovery.org/types/Attribute', 'https://metadata.datadrivendiscovery.org/types/ConstructedAttribute'], | |||||
default='https://metadata.datadrivendiscovery.org/types/Attribute', | |||||
description='Decides what semantic type to attach to generated attributes', | |||||
semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'] | |||||
) | |||||
class BKFilter(transformer.TransformerPrimitiveBase[Inputs, Outputs, Hyperparams]): | |||||
""" | |||||
Filter a time series using the Baxter-King bandpass filter. | |||||
Parameters | |||||
---------- | |||||
low: int | |||||
Minimum period for oscillations, ie., Baxter and King suggest that the Burns-Mitchell U.S. business cycle has 6 for quarterly data and 1.5 for annual data. | |||||
high: int | |||||
Maximum period for oscillations BK suggest that the U.S. business cycle has 32 for quarterly data and 8 for annual data. | |||||
K: int | |||||
Lead-lag length of the filter. Baxter and King propose a truncation length of 12 for quarterly data and 3 for annual data. | |||||
use_columns: Set | |||||
A set of column indices to force primitive to operate on. If any specified column cannot be parsed, it is skipped. | |||||
exclude_columns: Set | |||||
A set of column indices to not operate on. Applicable only if \"use_columns\" is not provided. | |||||
return_result: Enumeration | |||||
Should parsed columns be appended, should they replace original columns, or should only parsed columns be returned? This hyperparam is ignored if use_semantic_types is set to false. | |||||
use_semantic_types: Bool | |||||
Controls whether semantic_types metadata will be used for filtering columns in input dataframe. Setting this to false makes the code ignore return_result and will produce only the output dataframe. | |||||
add_index_columns: Bool | |||||
Also include primary index columns if input data has them. Applicable only if \"return_result\" is set to \"new\". | |||||
error_on_no_input: Bool( | |||||
Throw an exception if no input column is selected/provided. Defaults to true to behave like sklearn. To prevent pipelines from breaking set this to False. | |||||
return_semantic_type: Enumeration[str]( | |||||
Decides what semantic type to attach to generated attributes' | |||||
""" | |||||
__author__: "DATA Lab at Texas A&M University" | |||||
metadata = metadata_base.PrimitiveMetadata({ | |||||
"name": "Baxter-King Filter Primitive", | |||||
"python_path": "d3m.primitives.tods.feature_analysis.bk_filter", | |||||
"source": {'name': 'DATA Lab at Texas A&M University', 'contact': 'mailto:khlai037@tamu.edu', | |||||
'uris': ['https://gitlab.com/lhenry15/tods.git', 'https://gitlab.com/lhenry15/tods/-/blob/Junjie/anomaly-primitives/anomaly_primitives/DuplicationValidation.py']}, | |||||
"algorithm_types": [metadata_base.PrimitiveAlgorithmType.BK_FILTER,], | |||||
"primitive_family": metadata_base.PrimitiveFamily.FEATURE_CONSTRUCTION, | |||||
"id": "b2bfadc5-dbca-482c-b188-8585e5f245c4", | |||||
"hyperparams_to_tune": ['low', 'high', 'K'], | |||||
"version": "0.0.1", | |||||
}) | |||||
def produce(self, *, inputs: Inputs, timeout: float = None, iterations: int = None) -> CallResult[Outputs]: | |||||
""" | |||||
Process the testing data. | |||||
Args: | |||||
inputs: Container DataFrame. | |||||
Returns: | |||||
Container DataFrame after BKFilter. | |||||
""" | |||||
# Get cols to fit. | |||||
self._fitted = False | |||||
self._training_inputs, self._training_indices = self._get_columns_to_fit(inputs, self.hyperparams) | |||||
self._input_column_names = self._training_inputs.columns | |||||
if len(self._training_indices) > 0: | |||||
# self._clf.fit(self._training_inputs) | |||||
self._fitted = True | |||||
else: | |||||
if self.hyperparams['error_on_no_input']: | |||||
raise RuntimeError("No input columns were selected") | |||||
self.logger.warn("No input columns were selected") | |||||
if not self._fitted: | |||||
raise PrimitiveNotFittedError("Primitive not fitted.") | |||||
sk_inputs = inputs | |||||
if self.hyperparams['use_semantic_types']: | |||||
sk_inputs = inputs.iloc[:, self._training_indices] | |||||
output_columns = [] | |||||
if len(self._training_indices) > 0: | |||||
sk_output = self._bkfilter(sk_inputs, low=self.hyperparams['low'], high=self.hyperparams['high'], K=self.hyperparams['K']) | |||||
if sparse.issparse(sk_output): | |||||
sk_output = sk_output.toarray() | |||||
outputs = self._wrap_predictions(inputs, sk_output) | |||||
if len(outputs.columns) == len(self._input_column_names): | |||||
outputs.columns = self._input_column_names | |||||
output_columns = [outputs] | |||||
else: | |||||
if self.hyperparams['error_on_no_input']: | |||||
raise RuntimeError("No input columns were selected") | |||||
self.logger.warn("No input columns were selected") | |||||
outputs = base_utils.combine_columns(return_result=self.hyperparams['return_result'], | |||||
add_index_columns=self.hyperparams['add_index_columns'], | |||||
inputs=inputs, column_indices=self._training_indices, | |||||
columns_list=output_columns) | |||||
# self._write(outputs) | |||||
# self.logger.warning('produce was called3') | |||||
return CallResult(outputs) | |||||
@classmethod | |||||
def _get_columns_to_fit(cls, inputs: Inputs, hyperparams: Hyperparams): | |||||
""" | |||||
Select columns to fit. | |||||
Args: | |||||
inputs: Container DataFrame | |||||
hyperparams: d3m.metadata.hyperparams.Hyperparams | |||||
Returns: | |||||
list | |||||
""" | |||||
if not hyperparams['use_semantic_types']: | |||||
return inputs, list(range(len(inputs.columns))) | |||||
inputs_metadata = inputs.metadata | |||||
def can_produce_column(column_index: int) -> bool: | |||||
return cls._can_produce_column(inputs_metadata, column_index, hyperparams) | |||||
use_columns = [] | |||||
exclude_columns = [] | |||||
# if hyperparams['columns_using_method'] == 'name': | |||||
# inputs_cols = inputs.columns.values.tolist() | |||||
# for i in range(len(inputs_cols)): | |||||
# if inputs_cols[i] in hyperparams['use_columns_name']: | |||||
# use_columns.append(i) | |||||
# elif inputs_cols[i] in hyperparams['exclude_columns_name']: | |||||
# exclude_columns.append(i) | |||||
# else: | |||||
use_columns=hyperparams['use_columns'] | |||||
exclude_columns=hyperparams['exclude_columns'] | |||||
columns_to_produce, columns_not_to_produce = base_utils.get_columns_to_use(inputs_metadata, use_columns=use_columns, exclude_columns=exclude_columns, can_use_column=can_produce_column) | |||||
return inputs.iloc[:, columns_to_produce], columns_to_produce | |||||
# return columns_to_produce | |||||
@classmethod | |||||
def _can_produce_column(cls, inputs_metadata: metadata_base.DataMetadata, column_index: int, hyperparams: Hyperparams) -> bool: | |||||
""" | |||||
Output whether a column can be processed. | |||||
Args: | |||||
inputs_metadata: d3m.metadata.base.DataMetadata | |||||
column_index: int | |||||
Returns: | |||||
bool | |||||
""" | |||||
column_metadata = inputs_metadata.query((metadata_base.ALL_ELEMENTS, column_index)) | |||||
accepted_structural_types = (int, float, numpy.integer, numpy.float64) | |||||
accepted_semantic_types = set() | |||||
accepted_semantic_types.add("https://metadata.datadrivendiscovery.org/types/Attribute") | |||||
if not issubclass(column_metadata['structural_type'], accepted_structural_types): | |||||
return False | |||||
semantic_types = set(column_metadata.get('semantic_types', [])) | |||||
if len(semantic_types) == 0: | |||||
cls.logger.warning("No semantic types found in column metadata") | |||||
return False | |||||
# Making sure all accepted_semantic_types are available in semantic_types | |||||
if len(accepted_semantic_types - semantic_types) == 0: | |||||
return True | |||||
return False | |||||
@classmethod | |||||
def _update_predictions_metadata(cls, inputs_metadata: metadata_base.DataMetadata, outputs: Optional[Outputs], | |||||
target_columns_metadata: List[OrderedDict]) -> metadata_base.DataMetadata: | |||||
""" | |||||
Updata metadata for selected columns. | |||||
Args: | |||||
inputs_metadata: metadata_base.DataMetadata | |||||
outputs: Container Dataframe | |||||
target_columns_metadata: list | |||||
Returns: | |||||
d3m.metadata.base.DataMetadata | |||||
""" | |||||
outputs_metadata = metadata_base.DataMetadata().generate(value=outputs) | |||||
for column_index, column_metadata in enumerate(target_columns_metadata): | |||||
column_metadata.pop("structural_type", None) | |||||
outputs_metadata = outputs_metadata.update_column(column_index, column_metadata) | |||||
return outputs_metadata | |||||
def _wrap_predictions(self, inputs: Inputs, predictions: ndarray) -> Outputs: | |||||
""" | |||||
Wrap predictions into dataframe | |||||
Args: | |||||
inputs: Container Dataframe | |||||
predictions: array-like data (n_samples, n_features) | |||||
Returns: | |||||
Dataframe | |||||
""" | |||||
outputs = d3m_dataframe(predictions, generate_metadata=True) | |||||
target_columns_metadata = self._add_target_columns_metadata(outputs.metadata, self.hyperparams) | |||||
outputs.metadata = self._update_predictions_metadata(inputs.metadata, outputs, target_columns_metadata) | |||||
return outputs | |||||
@classmethod | |||||
def _add_target_columns_metadata(cls, outputs_metadata: metadata_base.DataMetadata, hyperparams): | |||||
""" | |||||
Add target columns metadata | |||||
Args: | |||||
outputs_metadata: metadata.base.DataMetadata | |||||
hyperparams: d3m.metadata.hyperparams.Hyperparams | |||||
Returns: | |||||
List[OrderedDict] | |||||
""" | |||||
outputs_length = outputs_metadata.query((metadata_base.ALL_ELEMENTS,))['dimension']['length'] | |||||
target_columns_metadata: List[OrderedDict] = [] | |||||
for column_index in range(outputs_length): | |||||
column_name = "output_{}".format(column_index) | |||||
column_metadata = OrderedDict() | |||||
semantic_types = set() | |||||
semantic_types.add(hyperparams["return_semantic_type"]) | |||||
column_metadata['semantic_types'] = list(semantic_types) | |||||
column_metadata["name"] = str(column_name) | |||||
target_columns_metadata.append(column_metadata) | |||||
return target_columns_metadata | |||||
def _write(self, inputs:Inputs): | |||||
inputs.to_csv(str(time.time())+'.csv') | |||||
def _bkfilter(self, X, low, high, K): | |||||
""" | |||||
Perform BKFilter | |||||
Args: | |||||
X: slected rows to be performed | |||||
K, low, high: Parameters of BKFilter | |||||
Returns: | |||||
Dataframe, results of BKFilter | |||||
""" | |||||
transformed_X = utils.pandas.DataFrame() | |||||
for col in X.columns: | |||||
cycle = sm.tsa.filters.bkfilter(X[col], low=low, high=high, K=K) | |||||
cycle_df = utils.pandas.DataFrame(cycle) | |||||
transformed_X = utils.pandas.concat([transformed_X,cycle_df], axis=1) | |||||
return transformed_X |
@@ -163,14 +163,14 @@ class HPFilter(transformer.TransformerPrimitiveBase[Inputs, Outputs, Hyperparams | |||||
if len(self._training_indices) > 0: | if len(self._training_indices) > 0: | ||||
# self._clf.fit(self._training_inputs) | # self._clf.fit(self._training_inputs) | ||||
self._fitted = True | self._fitted = True | ||||
else: | |||||
else: # pragma: no cover | |||||
if self.hyperparams['error_on_no_input']: | if self.hyperparams['error_on_no_input']: | ||||
raise RuntimeError("No input columns were selected") | raise RuntimeError("No input columns were selected") | ||||
self.logger.warn("No input columns were selected") | self.logger.warn("No input columns were selected") | ||||
if not self._fitted: | |||||
if not self._fitted: # pragma: no cover | |||||
raise PrimitiveNotFittedError("Primitive not fitted.") | raise PrimitiveNotFittedError("Primitive not fitted.") | ||||
sk_inputs = inputs | sk_inputs = inputs | ||||
if self.hyperparams['use_semantic_types']: | if self.hyperparams['use_semantic_types']: | ||||
@@ -186,7 +186,7 @@ class HPFilter(transformer.TransformerPrimitiveBase[Inputs, Outputs, Hyperparams | |||||
outputs.columns = self._input_column_names | outputs.columns = self._input_column_names | ||||
output_columns = [outputs] | output_columns = [outputs] | ||||
else: | |||||
else: # pragma: no cover | |||||
if self.hyperparams['error_on_no_input']: | if self.hyperparams['error_on_no_input']: | ||||
raise RuntimeError("No input columns were selected") | raise RuntimeError("No input columns were selected") | ||||
self.logger.warn("No input columns were selected") | self.logger.warn("No input columns were selected") | ||||
@@ -194,14 +194,11 @@ class HPFilter(transformer.TransformerPrimitiveBase[Inputs, Outputs, Hyperparams | |||||
add_index_columns=self.hyperparams['add_index_columns'], | add_index_columns=self.hyperparams['add_index_columns'], | ||||
inputs=inputs, column_indices=self._training_indices, | inputs=inputs, column_indices=self._training_indices, | ||||
columns_list=output_columns) | columns_list=output_columns) | ||||
# self._write(outputs) | |||||
# self.logger.warning('produce was called3') | |||||
return CallResult(outputs) | return CallResult(outputs) | ||||
@classmethod | @classmethod | ||||
def _get_columns_to_fit(cls, inputs: Inputs, hyperparams: Hyperparams): | |||||
def _get_columns_to_fit(cls, inputs: Inputs, hyperparams: Hyperparams): # pragma: no cover | |||||
""" | """ | ||||
Select columns to fit. | Select columns to fit. | ||||
Args: | Args: | ||||
@@ -238,7 +235,7 @@ class HPFilter(transformer.TransformerPrimitiveBase[Inputs, Outputs, Hyperparams | |||||
# return columns_to_produce | # return columns_to_produce | ||||
@classmethod | @classmethod | ||||
def _can_produce_column(cls, inputs_metadata: metadata_base.DataMetadata, column_index: int, hyperparams: Hyperparams) -> bool: | |||||
def _can_produce_column(cls, inputs_metadata: metadata_base.DataMetadata, column_index: int, hyperparams: Hyperparams) -> bool: # pragma: no cover | |||||
""" | """ | ||||
Output whether a column can be processed. | Output whether a column can be processed. | ||||
Args: | Args: | ||||
@@ -331,9 +328,6 @@ class HPFilter(transformer.TransformerPrimitiveBase[Inputs, Outputs, Hyperparams | |||||
return target_columns_metadata | return target_columns_metadata | ||||
def _write(self, inputs:Inputs): | |||||
inputs.to_csv(str(time.time())+'.csv') | |||||
def _hpfilter(self, X, lamb): | def _hpfilter(self, X, lamb): | ||||
""" | """ | ||||
Perform HPFilter | Perform HPFilter | ||||
@@ -224,7 +224,7 @@ class SKTruncatedSVD(UnsupervisedLearnerPrimitiveBase[Inputs, Outputs, Params, H | |||||
Returns: | Returns: | ||||
None | None | ||||
""" | """ | ||||
if self._fitted: | |||||
if self._fitted: # pragma: no cover | |||||
return CallResult(None) | return CallResult(None) | ||||
# Get cols to fit. | # Get cols to fit. | ||||
@@ -239,7 +239,7 @@ class SKTruncatedSVD(UnsupervisedLearnerPrimitiveBase[Inputs, Outputs, Params, H | |||||
if len(self._training_indices) > 0: | if len(self._training_indices) > 0: | ||||
self._clf.fit(self._training_inputs) | self._clf.fit(self._training_inputs) | ||||
self._fitted = True | self._fitted = True | ||||
else: | |||||
else: # pragma: no cover | |||||
if self.hyperparams['error_on_no_input']: | if self.hyperparams['error_on_no_input']: | ||||
raise RuntimeError("No input columns were selected") | raise RuntimeError("No input columns were selected") | ||||
self.logger.warn("No input columns were selected") | self.logger.warn("No input columns were selected") | ||||
@@ -257,7 +257,7 @@ class SKTruncatedSVD(UnsupervisedLearnerPrimitiveBase[Inputs, Outputs, Params, H | |||||
# self.logger.warning(str(self.metadata.query()['name'])) | # self.logger.warning(str(self.metadata.query()['name'])) | ||||
if not self._fitted: | |||||
if not self._fitted: # pragma: no cover | |||||
raise PrimitiveNotFittedError("Primitive not fitted.") | raise PrimitiveNotFittedError("Primitive not fitted.") | ||||
sk_inputs = inputs | sk_inputs = inputs | ||||
if self.hyperparams['use_semantic_types']: | if self.hyperparams['use_semantic_types']: | ||||
@@ -272,7 +272,7 @@ class SKTruncatedSVD(UnsupervisedLearnerPrimitiveBase[Inputs, Outputs, Params, H | |||||
if len(outputs.columns) == len(self._input_column_names): | if len(outputs.columns) == len(self._input_column_names): | ||||
outputs.columns = self._input_column_names | outputs.columns = self._input_column_names | ||||
output_columns = [outputs] | output_columns = [outputs] | ||||
else: | |||||
else: # pragma: no cover | |||||
if self.hyperparams['error_on_no_input']: | if self.hyperparams['error_on_no_input']: | ||||
raise RuntimeError("No input columns were selected") | raise RuntimeError("No input columns were selected") | ||||
self.logger.warn("No input columns were selected") | self.logger.warn("No input columns were selected") | ||||
@@ -286,7 +286,7 @@ class SKTruncatedSVD(UnsupervisedLearnerPrimitiveBase[Inputs, Outputs, Params, H | |||||
return CallResult(outputs) | return CallResult(outputs) | ||||
def get_params(self) -> Params: | |||||
def get_params(self) -> Params: # pragma: no cover | |||||
""" | """ | ||||
Return parameters. | Return parameters. | ||||
Args: | Args: | ||||
@@ -320,7 +320,7 @@ class SKTruncatedSVD(UnsupervisedLearnerPrimitiveBase[Inputs, Outputs, Params, H | |||||
target_columns_metadata_=self._target_columns_metadata | target_columns_metadata_=self._target_columns_metadata | ||||
) | ) | ||||
def set_params(self, *, params: Params) -> None: | |||||
def set_params(self, *, params: Params) -> None: # pragma: no cover | |||||
""" | """ | ||||
Set parameters for SKTruncatedSVD. | Set parameters for SKTruncatedSVD. | ||||
Args: | Args: | ||||
@@ -351,7 +351,7 @@ class SKTruncatedSVD(UnsupervisedLearnerPrimitiveBase[Inputs, Outputs, Params, H | |||||
@classmethod | @classmethod | ||||
def _get_columns_to_fit(cls, inputs: Inputs, hyperparams: Hyperparams): | |||||
def _get_columns_to_fit(cls, inputs: Inputs, hyperparams: Hyperparams): # pragma: no cover | |||||
""" | """ | ||||
Select columns to fit. | Select columns to fit. | ||||
Args: | Args: | ||||
@@ -377,7 +377,7 @@ class SKTruncatedSVD(UnsupervisedLearnerPrimitiveBase[Inputs, Outputs, Params, H | |||||
# return columns_to_produce | # return columns_to_produce | ||||
@classmethod | @classmethod | ||||
def _can_produce_column(cls, inputs_metadata: metadata_base.DataMetadata, column_index: int, hyperparams: Hyperparams) -> bool: | |||||
def _can_produce_column(cls, inputs_metadata: metadata_base.DataMetadata, column_index: int, hyperparams: Hyperparams) -> bool: # pragma: no cover | |||||
""" | """ | ||||
Output whether a column can be processed. | Output whether a column can be processed. | ||||
Args: | Args: | ||||
@@ -408,35 +408,35 @@ class SKTruncatedSVD(UnsupervisedLearnerPrimitiveBase[Inputs, Outputs, Params, H | |||||
return False | return False | ||||
@classmethod | |||||
def _get_target_columns_metadata(cls, outputs_metadata: metadata_base.DataMetadata, hyperparams) -> List[OrderedDict]: | |||||
""" | |||||
Output metadata of selected columns. | |||||
Args: | |||||
outputs_metadata: metadata_base.DataMetadata | |||||
hyperparams: d3m.metadata.hyperparams.Hyperparams | |||||
Returns: | |||||
d3m.metadata.base.DataMetadata | |||||
""" | |||||
outputs_length = outputs_metadata.query((metadata_base.ALL_ELEMENTS,))['dimension']['length'] | |||||
target_columns_metadata: List[OrderedDict] = [] | |||||
for column_index in range(outputs_length): | |||||
column_metadata = OrderedDict(outputs_metadata.query_column(column_index)) | |||||
# Update semantic types and prepare it for predicted targets. | |||||
semantic_types = set(column_metadata.get('semantic_types', [])) | |||||
semantic_types_to_remove = set([]) | |||||
add_semantic_types = [] | |||||
add_semantic_types.add(hyperparams["return_semantic_type"]) | |||||
semantic_types = semantic_types - semantic_types_to_remove | |||||
semantic_types = semantic_types.union(add_semantic_types) | |||||
column_metadata['semantic_types'] = list(semantic_types) | |||||
target_columns_metadata.append(column_metadata) | |||||
return target_columns_metadata | |||||
# @classmethod | |||||
# def _get_target_columns_metadata(cls, outputs_metadata: metadata_base.DataMetadata, hyperparams) -> List[OrderedDict]: | |||||
# """ | |||||
# Output metadata of selected columns. | |||||
# Args: | |||||
# outputs_metadata: metadata_base.DataMetadata | |||||
# hyperparams: d3m.metadata.hyperparams.Hyperparams | |||||
# Returns: | |||||
# d3m.metadata.base.DataMetadata | |||||
# """ | |||||
# outputs_length = outputs_metadata.query((metadata_base.ALL_ELEMENTS,))['dimension']['length'] | |||||
# target_columns_metadata: List[OrderedDict] = [] | |||||
# for column_index in range(outputs_length): | |||||
# column_metadata = OrderedDict(outputs_metadata.query_column(column_index)) | |||||
# # Update semantic types and prepare it for predicted targets. | |||||
# semantic_types = set(column_metadata.get('semantic_types', [])) | |||||
# semantic_types_to_remove = set([]) | |||||
# add_semantic_types = [] | |||||
# add_semantic_types.add(hyperparams["return_semantic_type"]) | |||||
# semantic_types = semantic_types - semantic_types_to_remove | |||||
# semantic_types = semantic_types.union(add_semantic_types) | |||||
# column_metadata['semantic_types'] = list(semantic_types) | |||||
# target_columns_metadata.append(column_metadata) | |||||
# return target_columns_metadata | |||||
@classmethod | @classmethod | ||||
def _update_predictions_metadata(cls, inputs_metadata: metadata_base.DataMetadata, outputs: Optional[Outputs], | def _update_predictions_metadata(cls, inputs_metadata: metadata_base.DataMetadata, outputs: Optional[Outputs], | ||||
@@ -500,11 +500,3 @@ class SKTruncatedSVD(UnsupervisedLearnerPrimitiveBase[Inputs, Outputs, Params, H | |||||
return target_columns_metadata | return target_columns_metadata | ||||
def _write(self, inputs:Inputs): | |||||
""" | |||||
write inputs to current directory, only for test | |||||
""" | |||||
inputs.to_csv(str(time.time())+'.csv') | |||||
# SKTruncatedSVD.__doc__ = TruncatedSVD.__doc__ |
@@ -276,14 +276,14 @@ class TRMF(transformer.TransformerPrimitiveBase[Inputs, Outputs, Hyperparams]): | |||||
if len(self._training_indices) > 0: | if len(self._training_indices) > 0: | ||||
self._clf.fit(self._training_inputs) | self._clf.fit(self._training_inputs) | ||||
self._fitted = True | self._fitted = True | ||||
else: | |||||
else: # pragma: no cover | |||||
if self.hyperparams['error_on_no_input']: | if self.hyperparams['error_on_no_input']: | ||||
raise RuntimeError("No input columns were selected") | raise RuntimeError("No input columns were selected") | ||||
self.logger.warn("No input columns were selected") | self.logger.warn("No input columns were selected") | ||||
if not self._fitted: | |||||
if not self._fitted: # pragma: no cover | |||||
raise PrimitiveNotFittedError("Primitive not fitted.") | raise PrimitiveNotFittedError("Primitive not fitted.") | ||||
sk_inputs = inputs | sk_inputs = inputs | ||||
@@ -301,7 +301,7 @@ class TRMF(transformer.TransformerPrimitiveBase[Inputs, Outputs, Hyperparams]): | |||||
if len(outputs.columns) == len(self._input_column_names): | if len(outputs.columns) == len(self._input_column_names): | ||||
outputs.columns = self._input_column_names | outputs.columns = self._input_column_names | ||||
output_columns = [outputs] | output_columns = [outputs] | ||||
else: | |||||
else: # pragma: no cover | |||||
if self.hyperparams['error_on_no_input']: | if self.hyperparams['error_on_no_input']: | ||||
raise RuntimeError("No input columns were selected") | raise RuntimeError("No input columns were selected") | ||||
self.logger.warn("No input columns were selected") | self.logger.warn("No input columns were selected") | ||||
@@ -316,7 +316,7 @@ class TRMF(transformer.TransformerPrimitiveBase[Inputs, Outputs, Hyperparams]): | |||||
@classmethod | @classmethod | ||||
def _get_columns_to_fit(cls, inputs: Inputs, hyperparams: Hyperparams): | |||||
def _get_columns_to_fit(cls, inputs: Inputs, hyperparams: Hyperparams): # pragma: no cover | |||||
""" | """ | ||||
Select columns to fit. | Select columns to fit. | ||||
Args: | Args: | ||||
@@ -342,7 +342,7 @@ class TRMF(transformer.TransformerPrimitiveBase[Inputs, Outputs, Hyperparams]): | |||||
# return columns_to_produce | # return columns_to_produce | ||||
@classmethod | @classmethod | ||||
def _can_produce_column(cls, inputs_metadata: metadata_base.DataMetadata, column_index: int, hyperparams: Hyperparams) -> bool: | |||||
def _can_produce_column(cls, inputs_metadata: metadata_base.DataMetadata, column_index: int, hyperparams: Hyperparams) -> bool: # pragma: no cover | |||||
""" | """ | ||||
Output whether a column can be processed. | Output whether a column can be processed. | ||||
Args: | Args: | ||||
@@ -373,35 +373,35 @@ class TRMF(transformer.TransformerPrimitiveBase[Inputs, Outputs, Hyperparams]): | |||||
return False | return False | ||||
@classmethod | |||||
def _get_target_columns_metadata(cls, outputs_metadata: metadata_base.DataMetadata, hyperparams) -> List[OrderedDict]: | |||||
""" | |||||
Output metadata of selected columns. | |||||
Args: | |||||
outputs_metadata: metadata_base.DataMetadata | |||||
hyperparams: d3m.metadata.hyperparams.Hyperparams | |||||
Returns: | |||||
d3m.metadata.base.DataMetadata | |||||
""" | |||||
outputs_length = outputs_metadata.query((metadata_base.ALL_ELEMENTS,))['dimension']['length'] | |||||
target_columns_metadata: List[OrderedDict] = [] | |||||
for column_index in range(outputs_length): | |||||
column_metadata = OrderedDict(outputs_metadata.query_column(column_index)) | |||||
# Update semantic types and prepare it for predicted targets. | |||||
semantic_types = set(column_metadata.get('semantic_types', [])) | |||||
semantic_types_to_remove = set([]) | |||||
add_semantic_types = [] | |||||
add_semantic_types.add(hyperparams["return_semantic_type"]) | |||||
semantic_types = semantic_types - semantic_types_to_remove | |||||
semantic_types = semantic_types.union(add_semantic_types) | |||||
column_metadata['semantic_types'] = list(semantic_types) | |||||
target_columns_metadata.append(column_metadata) | |||||
return target_columns_metadata | |||||
# @classmethod | |||||
# def _get_target_columns_metadata(cls, outputs_metadata: metadata_base.DataMetadata, hyperparams) -> List[OrderedDict]: | |||||
# """ | |||||
# Output metadata of selected columns. | |||||
# Args: | |||||
# outputs_metadata: metadata_base.DataMetadata | |||||
# hyperparams: d3m.metadata.hyperparams.Hyperparams | |||||
# Returns: | |||||
# d3m.metadata.base.DataMetadata | |||||
# """ | |||||
# outputs_length = outputs_metadata.query((metadata_base.ALL_ELEMENTS,))['dimension']['length'] | |||||
# target_columns_metadata: List[OrderedDict] = [] | |||||
# for column_index in range(outputs_length): | |||||
# column_metadata = OrderedDict(outputs_metadata.query_column(column_index)) | |||||
# # Update semantic types and prepare it for predicted targets. | |||||
# semantic_types = set(column_metadata.get('semantic_types', [])) | |||||
# semantic_types_to_remove = set([]) | |||||
# add_semantic_types = [] | |||||
# add_semantic_types.add(hyperparams["return_semantic_type"]) | |||||
# semantic_types = semantic_types - semantic_types_to_remove | |||||
# semantic_types = semantic_types.union(add_semantic_types) | |||||
# column_metadata['semantic_types'] = list(semantic_types) | |||||
# target_columns_metadata.append(column_metadata) | |||||
# return target_columns_metadata | |||||
@classmethod | @classmethod | ||||
def _update_predictions_metadata(cls, inputs_metadata: metadata_base.DataMetadata, outputs: Optional[Outputs], | def _update_predictions_metadata(cls, inputs_metadata: metadata_base.DataMetadata, outputs: Optional[Outputs], | ||||
@@ -465,12 +465,6 @@ class TRMF(transformer.TransformerPrimitiveBase[Inputs, Outputs, Hyperparams]): | |||||
return target_columns_metadata | return target_columns_metadata | ||||
def _write(self, inputs:Inputs): | |||||
""" | |||||
write inputs to current directory, only for test | |||||
""" | |||||
inputs.to_csv(str(time.time())+'.csv') | |||||
""" | """ | ||||
Temporal Regularized Matrix Factorization | Temporal Regularized Matrix Factorization | ||||
@@ -564,7 +558,7 @@ class trmf: | |||||
return np.dot(self.F, X_preds) | return np.dot(self.F, X_preds) | ||||
def _predict_X(self, h): | |||||
def _predict_X(self, h): # pragma: no cover | |||||
"""Predict X h timepoints ahead. | """Predict X h timepoints ahead. | ||||
Evaluates matrix X with the help of matrix W. | Evaluates matrix X with the help of matrix W. | ||||
@@ -59,7 +59,7 @@ class ContinuityValidationTest(unittest.TestCase): | |||||
hyperparams_class = ContinuityValidation.ContinuityValidation.metadata.get_hyperparams() | hyperparams_class = ContinuityValidation.ContinuityValidation.metadata.get_hyperparams() | ||||
primitive = ContinuityValidation.ContinuityValidation(hyperparams=hyperparams_class.defaults()) | primitive = ContinuityValidation.ContinuityValidation(hyperparams=hyperparams_class.defaults()) | ||||
new_main = primitive.produce(inputs=main).value | new_main = primitive.produce(inputs=main).value | ||||
# print(new_main) | |||||
expected_output = container.DataFrame({'d3mIndex': [0, 1, 2, 3], | expected_output = container.DataFrame({'d3mIndex': [0, 1, 2, 3], | ||||
'timestamp': [1., 2., 3., 4.], | 'timestamp': [1., 2., 3., 4.], | ||||
@@ -124,6 +124,67 @@ class ContinuityValidationTest(unittest.TestCase): | |||||
self._test_continuity(new_main) | self._test_continuity(new_main) | ||||
hyperparams = hyperparams_class.defaults() | |||||
hyperparams = hyperparams.replace({'continuity_option': 'ablation'}) | |||||
primitive2 = ContinuityValidation.ContinuityValidation(hyperparams=hyperparams) | |||||
new_main2 = primitive2.produce(inputs=main).value | |||||
print(new_main2) | |||||
self.assertEqual(utils.to_json_structure(new_main2.metadata.to_internal_simple_structure()), [{ | |||||
'selector': [], | |||||
'metadata': { | |||||
# 'top_level': 'main', | |||||
'schema': metadata_base.CONTAINER_SCHEMA_VERSION, | |||||
'structural_type': 'd3m.container.pandas.DataFrame', | |||||
'semantic_types': ['https://metadata.datadrivendiscovery.org/types/Table'], | |||||
'dimension': { | |||||
'name': 'rows', | |||||
'semantic_types': ['https://metadata.datadrivendiscovery.org/types/TabularRow'], | |||||
'length': 2, | |||||
}, | |||||
}, | |||||
}, { | |||||
'selector': ['__ALL_ELEMENTS__'], | |||||
'metadata': { | |||||
'dimension': { | |||||
'name': 'columns', | |||||
'semantic_types': ['https://metadata.datadrivendiscovery.org/types/TabularColumn'], | |||||
'length': 5, | |||||
}, | |||||
}, | |||||
}, { | |||||
'selector': ['__ALL_ELEMENTS__', 0], | |||||
'metadata': { | |||||
'name': 'd3mIndex', | |||||
'structural_type': 'numpy.int64', | |||||
}, | |||||
}, { | |||||
'selector': ['__ALL_ELEMENTS__', 1], | |||||
'metadata': { | |||||
'name': 'timestamp', | |||||
'structural_type': 'numpy.float64', | |||||
}, | |||||
}, { | |||||
'selector': ['__ALL_ELEMENTS__', 2], | |||||
'metadata': { | |||||
'name': 'a', | |||||
'structural_type': 'numpy.float64', | |||||
}, | |||||
}, { | |||||
'selector': ['__ALL_ELEMENTS__', 3], | |||||
'metadata': { | |||||
'name': 'b', | |||||
'structural_type': 'numpy.float64', | |||||
}, | |||||
}, { | |||||
'selector': ['__ALL_ELEMENTS__', 4], | |||||
'metadata': { | |||||
'name': 'ground_truth', | |||||
'structural_type': 'numpy.int64', | |||||
}, | |||||
}]) | |||||
def _test_continuity(self, data_value): | def _test_continuity(self, data_value): | ||||
tmp_col = data_value['timestamp'] | tmp_col = data_value['timestamp'] | ||||
@@ -102,6 +102,54 @@ class DuplicationValidationTest(unittest.TestCase): | |||||
self._test_drop_duplication(new_main) | self._test_drop_duplication(new_main) | ||||
hyperparams = hyperparams_class.defaults() | |||||
hyperparams = hyperparams.replace({'keep_option': 'average'}) | |||||
primitive2 = DuplicationValidation.DuplicationValidation(hyperparams=hyperparams) | |||||
new_main2 = primitive2.produce(inputs=main).value | |||||
print(new_main2) | |||||
self.assertEqual(utils.to_json_structure(new_main.metadata.to_internal_simple_structure()), [{ | |||||
'selector': [], | |||||
'metadata': { | |||||
# 'top_level': 'main', | |||||
'schema': metadata_base.CONTAINER_SCHEMA_VERSION, | |||||
'structural_type': 'd3m.container.pandas.DataFrame', | |||||
'semantic_types': ['https://metadata.datadrivendiscovery.org/types/Table'], | |||||
'dimension': { | |||||
'name': 'rows', | |||||
'semantic_types': ['https://metadata.datadrivendiscovery.org/types/TabularRow'], | |||||
'length': 2, | |||||
}, | |||||
}, | |||||
}, { | |||||
'selector': ['__ALL_ELEMENTS__'], | |||||
'metadata': { | |||||
'dimension': { | |||||
'name': 'columns', | |||||
'semantic_types': ['https://metadata.datadrivendiscovery.org/types/TabularColumn'], | |||||
'length': 3, | |||||
}, | |||||
}, | |||||
}, { | |||||
'selector': ['__ALL_ELEMENTS__', 0], | |||||
'metadata': { | |||||
'name': 'timestamp', | |||||
'structural_type': 'numpy.float64', | |||||
}, | |||||
}, { | |||||
'selector': ['__ALL_ELEMENTS__', 1], | |||||
'metadata': { | |||||
'name': 'a', | |||||
'structural_type': 'numpy.float64', | |||||
}, | |||||
}, { | |||||
'selector': ['__ALL_ELEMENTS__', 2], | |||||
'metadata': { | |||||
'name': 'b', | |||||
'structural_type': 'numpy.float64', | |||||
}, | |||||
}]) | |||||
def _test_drop_duplication(self, data_value): | def _test_drop_duplication(self, data_value): | ||||
self.assertEqual(True in list(data_value.duplicated('timestamp')), False) | self.assertEqual(True in list(data_value.duplicated('timestamp')), False) | ||||