Former-commit-id:master219f70b1a2
[formerly99f9e1eb69
] [formerlyf5f16ce532
[formerly50352b3ff3
]] [formerly2e5704ad73
[formerly5b29820537
] [formerly431b0ddb22
[formerlyd798a8e4e3
]]] [formerlyb0d5d4d534
[formerly44f4678454
] [formerlyee808e7e44
[formerly9859d0b436
]] [formerly50c9d7cea1
[formerly601849d7bd
] [formerly5345efd370
[formerlye1d4a59360
]]]] [formerly7b34cd8f6c
[formerly9e168f96d0
] [formerlyf2571a9a02
[formerlyf7858302f9
]] [formerlyb1528fd27d
[formerlyc2a264017d
] [formerly1a2af1e1fd
[formerly62d7e9b2ed
]]] [formerlya953785cfa
[formerly27aa0afbf7
] [formerlyeee3141c0a
[formerly74cc095a2a
]] [formerly7a838661c9
[formerly826360a453
] [formerly19b2607cc6
[formerly9497930570
]]]]] [formerly601c051e8f
[formerly13bb1bb849
] [formerlybb89c1bded
[formerly28b683f6e7
]] [formerly68265b09c5
[formerly4ec648ef37
] [formerlyd76b272cc0
[formerly2ee66794ec
]]] [formerly47aab2a490
[formerly659e827756
] [formerly9dc3a20fd5
[formerlyc4d0adb03f
]] [formerlybf4c2ef7ff
[formerlybdec0bac94
] [formerly0cae18b065
[formerly4c3c172e47
]]]] [formerly2972f27f73
[formerly84acd0a1b6
] [formerly232b8924f8
[formerlyd925d6f4d3
]] [formerly5217d26dd9
[formerly658eb7e5c8
] [formerlyc5c3d4f714
[formerly2a0f1ff8c3
]]] [formerly1f89ca49a9
[formerly7ae1036fb3
] [formerly10e5070850
[formerly12ab0b78cd
]] [formerlydb6a58bc98
[formerlydb92fd3772
] [formerly37fb024b8f
[formerlyc23effffc6
]]]]]] Former-commit-id:ef4232d8cb
[formerly77b26e468d
] [formerly3aaaca10c1
[formerlybea844e99d
]] [formerlyde3fd235c3
[formerlyd920adf559
] [formerly13d3b270da
[formerlyd698f233e5
]]] [formerly6d767a08aa
[formerly46e3c38fde
] [formerly75d1c6fb5c
[formerly01103f969d
]] [formerly54a08fcd21
[formerly41a1571a38
] [formerly20dcf58162
[formerly343002311a
]]]] [formerly4feb51d60b
[formerly97764bb9f3
] [formerly57a357434f
[formerlyf61d9c7dba
]] [formerly24e3fc024d
[formerly0d3c84a67f
] [formerly5d363f96a1
[formerly7910b082c3
]]] [formerly741cfa0b09
[formerly6fcfe42121
] [formerly86df6392c0
[formerly82d4124b2b
]] [formerly3120bc060c
[formerly839f6e0c8a
] [formerly37fb024b8f
]]]] Former-commit-id:a817081230
[formerly2cbd88db2f
] [formerly11b1b009d2
[formerlycbc5bd4988
]] [formerly594c594b0c
[formerlyf7b92eb97e
] [formerlyc54cb5cb71
[formerlyb88b1fa18c
]]] [formerly044a2fbb3f
[formerly1efe53be7f
] [formerly99c091f7db
[formerly734e2740f6
]] [formerly9dc8407f7e
[formerly4545ca14d8
] [formerlye1bfd4d1da
[formerly6898b4a6a4
]]]] Former-commit-id:3eaf18de9d
[formerlydd877a066e
] [formerlya4a8ae76c4
[formerlyce174d3cce
]] [formerlya57fb2e9e1
[formerlydf4f539b32
] [formerly48464db993
[formerlyb37110284f
]]] Former-commit-id:07096c4efb
[formerlyef0477b822
] [formerly7944fb187e
[formerly56e573e9ef
]] Former-commit-id:fe6a976ab6
[formerly6ce5c42aa8
] Former-commit-id:da363bbcc3
@@ -1,99 +1,57 @@ | |||
from typing import Any, Callable, List, Dict, Union, Optional, Sequence, Tuple | |||
from numpy import ndarray | |||
from collections import OrderedDict | |||
from scipy import sparse | |||
import os | |||
import sklearn | |||
import numpy | |||
import typing | |||
import time | |||
from scipy import sparse | |||
from numpy import ndarray | |||
from collections import OrderedDict | |||
from typing import Any, Callable, List, Dict, Union, Optional, Sequence, Tuple | |||
import pandas as pd | |||
# Custom import commands if any | |||
import warnings | |||
import numpy as np | |||
import pandas as pd | |||
import logging, uuid | |||
from scipy import sparse | |||
from numpy import ndarray | |||
from collections import OrderedDict | |||
from common_primitives import dataframe_utils, utils | |||
from sklearn.utils import check_array | |||
from sklearn.exceptions import NotFittedError | |||
# from numba import njit | |||
from pyod.utils.utility import argmaxn | |||
from d3m.container.numpy import ndarray as d3m_ndarray | |||
from d3m.container import DataFrame as d3m_dataframe | |||
from d3m.metadata import hyperparams, params, base as metadata_base | |||
from d3m import utils | |||
from d3m import container | |||
from d3m.base import utils as base_utils | |||
from d3m.exceptions import PrimitiveNotFittedError | |||
from d3m.container import DataFrame as d3m_dataframe | |||
from d3m.container.numpy import ndarray as d3m_ndarray | |||
from d3m.primitive_interfaces import base, transformer | |||
from d3m.metadata import base as metadata_base, hyperparams | |||
from d3m.metadata import hyperparams, params, base as metadata_base | |||
from d3m.primitive_interfaces.base import CallResult, DockerContainer | |||
# from d3m.primitive_interfaces.supervised_learning import SupervisedLearnerPrimitiveBase | |||
from d3m.primitive_interfaces.unsupervised_learning import UnsupervisedLearnerPrimitiveBase | |||
from d3m.primitive_interfaces.transformer import TransformerPrimitiveBase | |||
from d3m.primitive_interfaces.base import ProbabilisticCompositionalityMixin, ContinueFitMixin | |||
from d3m import exceptions | |||
import pandas | |||
import uuid | |||
from d3m import container, utils as d3m_utils | |||
from .UODBasePrimitive import Params_ODBase, Hyperparams_ODBase, UnsupervisedOutlierDetectorBase | |||
import stumpy | |||
# from typing import Union | |||
__all__ = ('MatrixProfile',) | |||
Inputs = d3m_dataframe | |||
Outputs = d3m_dataframe | |||
Inputs = container.DataFrame | |||
Outputs = container.DataFrame | |||
class PrimitiveCount: | |||
primitive_no = 0 | |||
class Params(Params_ODBase): | |||
######## Add more Attributes ####### | |||
pass | |||
class Hyperparams(hyperparams.Hyperparams): | |||
window_size = hyperparams.UniformInt( | |||
lower = 0, | |||
upper = 100, #TODO: Define the correct the upper bound | |||
default=50, | |||
semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], | |||
description="window size to calculate" | |||
) | |||
# Keep previous | |||
dataframe_resource = hyperparams.Hyperparameter[typing.Union[str, None]]( | |||
default=None, | |||
semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], | |||
description="Resource ID of a DataFrame to extract if there are multiple tabular resources inside a Dataset and none is a dataset entry point.", | |||
) | |||
use_columns = hyperparams.Set( | |||
elements=hyperparams.Hyperparameter[int](-1), | |||
default=(2,), | |||
semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], | |||
description="A set of column indices to force primitive to operate on. If any specified column cannot be parsed, it is skipped.", | |||
) | |||
exclude_columns = hyperparams.Set( | |||
elements=hyperparams.Hyperparameter[int](-1), | |||
default=(0,1,3,), | |||
semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], | |||
description="A set of column indices to not operate on. Applicable only if \"use_columns\" is not provided.", | |||
) | |||
return_result = hyperparams.Enumeration( | |||
values=['append', 'replace', 'new'], | |||
default='new', | |||
semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], | |||
description="Should parsed columns be appended, should they replace original columns, or should only parsed columns be returned? This hyperparam is ignored if use_semantic_types is set to false.", | |||
) | |||
use_semantic_types = hyperparams.UniformBool( | |||
default=False, | |||
semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], | |||
description="Controls whether semantic_types metadata will be used for filtering columns in input dataframe. Setting this to false makes the code ignore return_result and will produce only the output dataframe" | |||
) | |||
add_index_columns = hyperparams.UniformBool( | |||
default=False, | |||
semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], | |||
description="Also include primary index columns if input data has them. Applicable only if \"return_result\" is set to \"new\".", | |||
) | |||
error_on_no_input = hyperparams.UniformBool( | |||
default=True, | |||
semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], | |||
description="Throw an exception if no input column is selected/provided. Defaults to true to behave like sklearn. To prevent pipelines from breaking set this to False.", | |||
) | |||
return_semantic_type = hyperparams.Enumeration[str]( | |||
values=['https://metadata.datadrivendiscovery.org/types/Attribute', | |||
'https://metadata.datadrivendiscovery.org/types/ConstructedAttribute'], | |||
default='https://metadata.datadrivendiscovery.org/types/Attribute', | |||
description='Decides what semantic type to attach to generated attributes', | |||
semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'] | |||
) | |||
class Hyperparams(Hyperparams_ODBase): | |||
######## Add more Attributes ####### | |||
pass | |||
class MP: | |||
""" | |||
@@ -103,6 +61,28 @@ class MP: | |||
self._window_size = window_size | |||
return | |||
def fit(self, X, y=None): | |||
"""Fit detector. y is ignored in unsupervised methods. | |||
Parameters | |||
---------- | |||
X : numpy array of shape (n_samples, n_features) | |||
The input samples. | |||
y : Ignored | |||
Not used, present for API consistency by convention. | |||
Returns | |||
------- | |||
self : object | |||
Fitted estimator. | |||
""" | |||
# validate inputs X and y (optional) | |||
# X = check_array(X) | |||
# self._set_n_classes(y) | |||
# self.decision_scores_ = self.decision_function(X) | |||
# self._process_decision_scores() | |||
return self | |||
def produce(self, data): | |||
""" | |||
@@ -113,15 +93,47 @@ class MP: | |||
nparray | |||
""" | |||
""" | |||
#print(data.shape[0s]) | |||
rows = data.shape[0] | |||
columns = data.shape[1] | |||
convert_data = np.reshape(data, (columns, rows)) | |||
T_data = data.transpose() | |||
#print(T_data) | |||
transformed_columns=utils.pandas.DataFrame() | |||
for col in data.columns: | |||
transformed_columns=d3m_dataframe | |||
print(len(data)) | |||
for col in range(len(data)): | |||
output = stumpy.stump(data[col], m = self._window_size) | |||
output = pd.DataFrame(output) | |||
#print("output", output) | |||
transformed_columns=pd.concat([transformed_columns,output],axis=1) | |||
#transformed_columns[col]=output | |||
#print(transformed_columns) | |||
return transformed_columns | |||
# transformed_data = [] | |||
# for row in T_data: | |||
# print(row) | |||
# output = stumpy.stump(row, m = self._window_size) | |||
# print(output) | |||
""" | |||
#input from UODBasePrimitive is np.ndarray not dataframe | |||
print("data ",type(data)) | |||
transformed_columns=utils.pandas.DataFrame() | |||
for col in data: | |||
print(col) | |||
output = stumpy.stump(col, m = self._window_size) | |||
output = pd.DataFrame(output) | |||
transformed_columns=pd.concat([transformed_columns,output]) | |||
#print(transformed_columns) | |||
return transformed_columns | |||
class MatrixProfilePrimitive(transformer.TransformerPrimitiveBase[Inputs, Outputs, Hyperparams]): | |||
def predict(self, data): | |||
return self.produce(data) | |||
class MatrixProfile(UnsupervisedOutlierDetectorBase[Inputs, Outputs, Params, Hyperparams]): | |||
""" | |||
A primitive that performs matrix profile on a DataFrame using Stumpy package | |||
Stumpy documentation: https://stumpy.readthedocs.io/en/latest/index.html | |||
@@ -137,7 +149,7 @@ class MatrixProfilePrimitive(transformer.TransformerPrimitiveBase[Inputs, Output | |||
ignore_trivial : bool | |||
Set to `True` if this is a self-join. Otherwise, for AB-join, set this | |||
to `False`. Default is `True`. | |||
Returns | |||
Returnsfdsf | |||
------- | |||
out : ndarray | |||
The first column consists of the matrix profile, the second column | |||
@@ -147,7 +159,6 @@ class MatrixProfilePrimitive(transformer.TransformerPrimitiveBase[Inputs, Output | |||
""" | |||
metadata = metadata_base.PrimitiveMetadata({ | |||
'__author__': "DATA Lab @Texas A&M University", | |||
'name': "Matrix Profile", | |||
@@ -163,219 +174,67 @@ class MatrixProfilePrimitive(transformer.TransformerPrimitiveBase[Inputs, Output | |||
}) | |||
def __init__(self, *, hyperparams: Hyperparams) -> None: | |||
super().__init__(hyperparams=hyperparams) | |||
self._clf = MP(window_size = hyperparams['window_size']) | |||
self.primitiveNo = PrimitiveCount.primitive_no | |||
PrimitiveCount.primitive_no+=1 | |||
def __init__(self, *, | |||
hyperparams: Hyperparams, # | |||
random_seed: int = 0, | |||
docker_containers: Dict[str, DockerContainer] = None) -> None: | |||
super().__init__(hyperparams=hyperparams, random_seed=random_seed, docker_containers=docker_containers) | |||
def produce(self, *, inputs: Inputs, timeout: float = None, iterations: int = None) -> base.CallResult[Outputs]: | |||
self._clf = MP(window_size=hyperparams['window_size']) | |||
def set_training_data(self, *, inputs: Inputs) -> None: | |||
""" | |||
Set training data for outlier detection. | |||
Args: | |||
inputs: Container DataFrame | |||
timeout: Default | |||
iterations: Default | |||
Returns: | |||
Container DataFrame containing Matrix Profile of selected columns | |||
None | |||
""" | |||
super().set_training_data(inputs=inputs) | |||
# Get cols to fit. | |||
self._fitted = False | |||
self._training_inputs, self._training_indices = self._get_columns_to_fit(inputs, self.hyperparams) | |||
self._input_column_names = self._training_inputs.columns | |||
if len(self._training_indices) > 0: | |||
self._fitted = True | |||
else: # pragma: no cover | |||
if self.hyperparams['error_on_no_input']: | |||
raise RuntimeError("No input columns were selected") | |||
self.logger.warn("No input columns were selected") | |||
if not self._fitted: # pragma: no cover | |||
raise PrimitiveNotFittedError("Primitive not fitted.") | |||
sk_inputs = inputs | |||
if self.hyperparams['use_semantic_types']: # pragma: no cover | |||
sk_inputs = inputs.iloc[:, self._training_indices] | |||
output_columns = [] | |||
if len(self._training_indices) > 0: | |||
sk_output = self._clf.produce(sk_inputs) | |||
if sparse.issparse(sk_output): # pragma: no cover | |||
sk_output = sk_output.toarray() | |||
outputs = self._wrap_predictions(inputs, sk_output) | |||
if len(outputs.columns) == len(self._input_column_names): # pragma: no cover | |||
outputs.columns = self._input_column_names | |||
output_columns = [outputs] | |||
else: # pragma: no cover | |||
if self.hyperparams['error_on_no_input']: | |||
raise RuntimeError("No input columns were selected") | |||
self.logger.warn("No input columns were selected") | |||
outputs = base_utils.combine_columns(return_result=self.hyperparams['return_result'], | |||
add_index_columns=self.hyperparams['add_index_columns'], | |||
inputs=inputs, column_indices=self._training_indices, | |||
columns_list=output_columns) | |||
#print(outputs.columns) | |||
#outputs.columns = [str(x) for x in outputs.columns] | |||
return CallResult(outputs) | |||
def _update_metadata(self, outputs): # pragma: no cover | |||
outputs.metadata = outputs.metadata.generate(outputs) | |||
@classmethod | |||
def _get_columns_to_fit(cls, inputs: Inputs, hyperparams: Hyperparams): # pragma: no cover | |||
def fit(self, *, timeout: float = None, iterations: int = None) -> CallResult[None]: | |||
""" | |||
Fit model with training data. | |||
Args: | |||
*: Container DataFrame. Time series data up to fit. | |||
Select columns to fit. | |||
Args: | |||
inputs: Container DataFrame | |||
hyperparams: d3m.metadata.hyperparams.Hyperparams | |||
Returns: | |||
list | |||
""" | |||
if not hyperparams['use_semantic_types']: | |||
return inputs, list(range(len(inputs.columns))) | |||
inputs_metadata = inputs.metadata | |||
def can_produce_column(column_index: int) -> bool: | |||
return cls._can_produce_column(inputs_metadata, column_index, hyperparams) | |||
columns_to_produce, columns_not_to_produce = base_utils.get_columns_to_use(inputs_metadata, | |||
use_columns=hyperparams['use_columns'], | |||
exclude_columns=hyperparams['exclude_columns'], | |||
can_use_column=can_produce_column) | |||
""" | |||
Encountered error: when hyperparams['use_columns'] = (2,3) and hyperparams['exclude_columns'] is (1,2) | |||
columns_to_produce is still [2] | |||
""" | |||
return inputs.iloc[:, columns_to_produce], columns_to_produce | |||
@classmethod | |||
def _can_produce_column(cls, inputs_metadata: metadata_base.DataMetadata, column_index: int, hyperparams: Hyperparams) -> bool: # pragma: no cover | |||
Returns: | |||
None | |||
""" | |||
return super().fit() | |||
Output whether a column can be processed. | |||
Args: | |||
inputs_metadata: d3m.metadata.base.DataMetadata | |||
column_index: int | |||
Returns: | |||
bool | |||
def produce(self, *, inputs: Inputs, timeout: float = None, iterations: int = None) -> CallResult[Outputs]: | |||
""" | |||
column_metadata = inputs_metadata.query((metadata_base.ALL_ELEMENTS, column_index)) | |||
accepted_structural_types = (int, float, np.integer, np.float64) #changed numpy to np | |||
accepted_semantic_types = set() | |||
accepted_semantic_types.add("https://metadata.datadrivendiscovery.org/types/Attribute") | |||
if not issubclass(column_metadata['structural_type'], accepted_structural_types): | |||
return False | |||
semantic_types = set(column_metadata.get('semantic_types', [])) | |||
if len(semantic_types) == 0: | |||
cls.logger.warning("No semantic types found in column metadata") | |||
return False | |||
# Making sure all accepted_semantic_types are available in semantic_types | |||
if len(accepted_semantic_types - semantic_types) == 0: | |||
return True | |||
return False | |||
def _wrap_predictions(self, inputs: Inputs, predictions: ndarray) -> Outputs: | |||
""" | |||
Wrap predictions into dataframe | |||
Process the testing data. | |||
Args: | |||
inputs: Container Dataframe | |||
predictions: array-like data (n_samples, n_features) | |||
inputs: Container DataFrame. Time series data up to outlier detection. | |||
Returns: | |||
Dataframe | |||
Container DataFrame | |||
1 marks Outliers, 0 marks normal. | |||
""" | |||
print("inputs ",type(inputs)) | |||
return super().produce(inputs=inputs, timeout=timeout, iterations=iterations) | |||
outputs = d3m_dataframe(predictions, generate_metadata=True) | |||
target_columns_metadata = self._add_target_columns_metadata(outputs.metadata, self.hyperparams, self.primitiveNo) | |||
outputs.metadata = self._update_predictions_metadata(inputs.metadata, outputs, target_columns_metadata) | |||
return outputs | |||
@classmethod | |||
def _update_predictions_metadata(cls, inputs_metadata: metadata_base.DataMetadata, outputs: Optional[Outputs], | |||
target_columns_metadata: List[OrderedDict]) -> metadata_base.DataMetadata: | |||
def get_params(self) -> Params: | |||
""" | |||
Return parameters. | |||
Args: | |||
None | |||
Updata metadata for selected columns. | |||
Args: | |||
inputs_metadata: metadata_base.DataMetadata | |||
outputs: Container Dataframe | |||
target_columns_metadata: list | |||
Returns: | |||
d3m.metadata.base.DataMetadata | |||
Returns: | |||
class Params | |||
""" | |||
return super().get_params() | |||
outputs_metadata = metadata_base.DataMetadata().generate(value=outputs) | |||
for column_index, column_metadata in enumerate(target_columns_metadata): | |||
column_metadata.pop("structural_type", None) | |||
outputs_metadata = outputs_metadata.update_column(column_index, column_metadata) | |||
return outputs_metadata | |||
@classmethod | |||
def _add_target_columns_metadata(cls, outputs_metadata: metadata_base.DataMetadata, hyperparams, primitiveNo): | |||
def set_params(self, *, params: Params) -> None: | |||
""" | |||
Add target columns metadata | |||
Set parameters for outlier detection. | |||
Args: | |||
outputs_metadata: metadata.base.DataMetadata | |||
hyperparams: d3m.metadata.hyperparams.Hyperparams | |||
params: class Params | |||
Returns: | |||
List[OrderedDict] | |||
None | |||
""" | |||
outputs_length = outputs_metadata.query((metadata_base.ALL_ELEMENTS,))['dimension']['length'] | |||
target_columns_metadata: List[OrderedDict] = [] | |||
for column_index in range(outputs_length): | |||
column_name = "{0}{1}_{2}".format(cls.metadata.query()['name'], primitiveNo, column_index) | |||
column_metadata = OrderedDict() | |||
semantic_types = set() | |||
semantic_types.add(hyperparams["return_semantic_type"]) | |||
column_metadata['semantic_types'] = list(semantic_types) | |||
column_metadata["name"] = str(column_name) | |||
target_columns_metadata.append(column_metadata) | |||
return target_columns_metadata | |||
super().set_params(params=params) |
@@ -0,0 +1,381 @@ | |||
import os | |||
import sklearn | |||
import numpy | |||
import typing | |||
import time | |||
from scipy import sparse | |||
from numpy import ndarray | |||
from collections import OrderedDict | |||
from typing import Any, Callable, List, Dict, Union, Optional, Sequence, Tuple | |||
import numpy as np | |||
import pandas as pd | |||
import logging, uuid | |||
from scipy import sparse | |||
from numpy import ndarray | |||
from collections import OrderedDict | |||
from common_primitives import dataframe_utils, utils | |||
from d3m import utils | |||
from d3m import container | |||
from d3m.base import utils as base_utils | |||
from d3m.exceptions import PrimitiveNotFittedError | |||
from d3m.container import DataFrame as d3m_dataframe | |||
from d3m.container.numpy import ndarray as d3m_ndarray | |||
from d3m.primitive_interfaces import base, transformer | |||
from d3m.metadata import base as metadata_base, hyperparams | |||
from d3m.metadata import hyperparams, params, base as metadata_base | |||
from d3m.primitive_interfaces.base import CallResult, DockerContainer | |||
import stumpy | |||
__all__ = ('MatrixProfile',) | |||
Inputs = container.DataFrame | |||
Outputs = container.DataFrame | |||
class PrimitiveCount: | |||
primitive_no = 0 | |||
class Hyperparams(hyperparams.Hyperparams): | |||
window_size = hyperparams.UniformInt( | |||
lower = 0, | |||
upper = 100, #TODO: Define the correct the upper bound | |||
default=50, | |||
semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], | |||
description="window size to calculate" | |||
) | |||
# Keep previous | |||
dataframe_resource = hyperparams.Hyperparameter[typing.Union[str, None]]( | |||
default=None, | |||
semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], | |||
description="Resource ID of a DataFrame to extract if there are multiple tabular resources inside a Dataset and none is a dataset entry point.", | |||
) | |||
use_columns = hyperparams.Set( | |||
elements=hyperparams.Hyperparameter[int](-1), | |||
default=(2,), | |||
semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], | |||
description="A set of column indices to force primitive to operate on. If any specified column cannot be parsed, it is skipped.", | |||
) | |||
exclude_columns = hyperparams.Set( | |||
elements=hyperparams.Hyperparameter[int](-1), | |||
default=(0,1,3,), | |||
semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], | |||
description="A set of column indices to not operate on. Applicable only if \"use_columns\" is not provided.", | |||
) | |||
return_result = hyperparams.Enumeration( | |||
values=['append', 'replace', 'new'], | |||
default='new', | |||
semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], | |||
description="Should parsed columns be appended, should they replace original columns, or should only parsed columns be returned? This hyperparam is ignored if use_semantic_types is set to false.", | |||
) | |||
use_semantic_types = hyperparams.UniformBool( | |||
default=False, | |||
semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], | |||
description="Controls whether semantic_types metadata will be used for filtering columns in input dataframe. Setting this to false makes the code ignore return_result and will produce only the output dataframe" | |||
) | |||
add_index_columns = hyperparams.UniformBool( | |||
default=False, | |||
semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], | |||
description="Also include primary index columns if input data has them. Applicable only if \"return_result\" is set to \"new\".", | |||
) | |||
error_on_no_input = hyperparams.UniformBool( | |||
default=True, | |||
semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], | |||
description="Throw an exception if no input column is selected/provided. Defaults to true to behave like sklearn. To prevent pipelines from breaking set this to False.", | |||
) | |||
return_semantic_type = hyperparams.Enumeration[str]( | |||
values=['https://metadata.datadrivendiscovery.org/types/Attribute', | |||
'https://metadata.datadrivendiscovery.org/types/ConstructedAttribute'], | |||
default='https://metadata.datadrivendiscovery.org/types/Attribute', | |||
description='Decides what semantic type to attach to generated attributes', | |||
semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'] | |||
) | |||
class MP: | |||
""" | |||
This is the class for matrix profile function | |||
""" | |||
def __init__(self, window_size): | |||
self._window_size = window_size | |||
return | |||
def produce(self, data): | |||
""" | |||
Args: | |||
data: dataframe column | |||
Returns: | |||
nparray | |||
""" | |||
transformed_columns=utils.pandas.DataFrame() | |||
for col in data.columns: | |||
output = stumpy.stump(data[col], m = self._window_size) | |||
output = pd.DataFrame(output) | |||
transformed_columns=pd.concat([transformed_columns,output],axis=1) | |||
return transformed_columns | |||
class MatrixProfile(transformer.TransformerPrimitiveBase[Inputs, Outputs, Hyperparams]): | |||
""" | |||
A primitive that performs matrix profile on a DataFrame using Stumpy package | |||
Stumpy documentation: https://stumpy.readthedocs.io/en/latest/index.html | |||
Parameters | |||
---------- | |||
T_A : ndarray | |||
The time series or sequence for which to compute the matrix profile | |||
m : int | |||
Window size | |||
T_B : ndarray | |||
The time series or sequence that contain your query subsequences | |||
of interest. Default is `None` which corresponds to a self-join. | |||
ignore_trivial : bool | |||
Set to `True` if this is a self-join. Otherwise, for AB-join, set this | |||
to `False`. Default is `True`. | |||
Returns | |||
------- | |||
out : ndarray | |||
The first column consists of the matrix profile, the second column | |||
consists of the matrix profile indices, the third column consists of | |||
the left matrix profile indices, and the fourth column consists of | |||
the right matrix profile indices. | |||
""" | |||
metadata = metadata_base.PrimitiveMetadata({ | |||
'__author__': "DATA Lab @Texas A&M University", | |||
'name': "Matrix Profile", | |||
#'python_path': 'd3m.primitives.tods.feature_analysis.matrix_profile', | |||
'python_path': 'd3m.primitives.tods.detection_algorithm.matrix_profile', | |||
'source': {'name': "DATALAB @Taxes A&M University", 'contact': 'mailto:khlai037@tamu.edu', | |||
'uris': ['https://gitlab.com/lhenry15/tods/-/blob/Yile/anomaly-primitives/anomaly_primitives/MatrixProfile.py']}, | |||
'algorithm_types': [metadata_base.PrimitiveAlgorithmType.MATRIX_PROFILE,], | |||
'primitive_family': metadata_base.PrimitiveFamily.FEATURE_CONSTRUCTION, | |||
'id': str(uuid.uuid3(uuid.NAMESPACE_DNS, 'MatrixProfilePrimitive')), | |||
'hyperparams_to_tune': ['window_size'], | |||
'version': '0.0.2', | |||
}) | |||
def __init__(self, *, hyperparams: Hyperparams) -> None: | |||
super().__init__(hyperparams=hyperparams) | |||
self._clf = MP(window_size = hyperparams['window_size']) | |||
self.primitiveNo = PrimitiveCount.primitive_no | |||
PrimitiveCount.primitive_no+=1 | |||
def produce(self, *, inputs: Inputs, timeout: float = None, iterations: int = None) -> base.CallResult[Outputs]: | |||
""" | |||
Args: | |||
inputs: Container DataFrame | |||
timeout: Default | |||
iterations: Default | |||
Returns: | |||
Container DataFrame containing Matrix Profile of selected columns | |||
""" | |||
# Get cols to fit. | |||
self._fitted = False | |||
self._training_inputs, self._training_indices = self._get_columns_to_fit(inputs, self.hyperparams) | |||
self._input_column_names = self._training_inputs.columns | |||
if len(self._training_indices) > 0: | |||
self._fitted = True | |||
else: # pragma: no cover | |||
if self.hyperparams['error_on_no_input']: | |||
raise RuntimeError("No input columns were selected") | |||
self.logger.warn("No input columns were selected") | |||
if not self._fitted: # pragma: no cover | |||
raise PrimitiveNotFittedError("Primitive not fitted.") | |||
sk_inputs = inputs | |||
if self.hyperparams['use_semantic_types']: # pragma: no cover | |||
sk_inputs = inputs.iloc[:, self._training_indices] | |||
output_columns = [] | |||
if len(self._training_indices) > 0: | |||
sk_output = self._clf.produce(sk_inputs) | |||
if sparse.issparse(sk_output): # pragma: no cover | |||
sk_output = sk_output.toarray() | |||
outputs = self._wrap_predictions(inputs, sk_output) | |||
if len(outputs.columns) == len(self._input_column_names): # pragma: no cover | |||
outputs.columns = self._input_column_names | |||
output_columns = [outputs] | |||
else: # pragma: no cover | |||
if self.hyperparams['error_on_no_input']: | |||
raise RuntimeError("No input columns were selected") | |||
self.logger.warn("No input columns were selected") | |||
outputs = base_utils.combine_columns(return_result=self.hyperparams['return_result'], | |||
add_index_columns=self.hyperparams['add_index_columns'], | |||
inputs=inputs, column_indices=self._training_indices, | |||
columns_list=output_columns) | |||
#print(outputs.columns) | |||
#outputs.columns = [str(x) for x in outputs.columns] | |||
return CallResult(outputs) | |||
def _update_metadata(self, outputs): # pragma: no cover | |||
outputs.metadata = outputs.metadata.generate(outputs) | |||
@classmethod | |||
def _get_columns_to_fit(cls, inputs: Inputs, hyperparams: Hyperparams): # pragma: no cover | |||
""" | |||
Select columns to fit. | |||
Args: | |||
inputs: Container DataFrame | |||
hyperparams: d3m.metadata.hyperparams.Hyperparams | |||
Returns: | |||
list | |||
""" | |||
if not hyperparams['use_semantic_types']: | |||
return inputs, list(range(len(inputs.columns))) | |||
inputs_metadata = inputs.metadata | |||
def can_produce_column(column_index: int) -> bool: | |||
return cls._can_produce_column(inputs_metadata, column_index, hyperparams) | |||
columns_to_produce, columns_not_to_produce = base_utils.get_columns_to_use(inputs_metadata, | |||
use_columns=hyperparams['use_columns'], | |||
exclude_columns=hyperparams['exclude_columns'], | |||
can_use_column=can_produce_column) | |||
""" | |||
Encountered error: when hyperparams['use_columns'] = (2,3) and hyperparams['exclude_columns'] is (1,2) | |||
columns_to_produce is still [2] | |||
""" | |||
return inputs.iloc[:, columns_to_produce], columns_to_produce | |||
@classmethod | |||
def _can_produce_column(cls, inputs_metadata: metadata_base.DataMetadata, column_index: int, hyperparams: Hyperparams) -> bool: # pragma: no cover | |||
""" | |||
Output whether a column can be processed. | |||
Args: | |||
inputs_metadata: d3m.metadata.base.DataMetadata | |||
column_index: int | |||
Returns: | |||
bool | |||
""" | |||
column_metadata = inputs_metadata.query((metadata_base.ALL_ELEMENTS, column_index)) | |||
accepted_structural_types = (int, float, np.integer, np.float64) #changed numpy to np | |||
accepted_semantic_types = set() | |||
accepted_semantic_types.add("https://metadata.datadrivendiscovery.org/types/Attribute") | |||
if not issubclass(column_metadata['structural_type'], accepted_structural_types): | |||
return False | |||
semantic_types = set(column_metadata.get('semantic_types', [])) | |||
if len(semantic_types) == 0: | |||
cls.logger.warning("No semantic types found in column metadata") | |||
return False | |||
# Making sure all accepted_semantic_types are available in semantic_types | |||
if len(accepted_semantic_types - semantic_types) == 0: | |||
return True | |||
return False | |||
def _wrap_predictions(self, inputs: Inputs, predictions: ndarray) -> Outputs: | |||
""" | |||
Wrap predictions into dataframe | |||
Args: | |||
inputs: Container Dataframe | |||
predictions: array-like data (n_samples, n_features) | |||
Returns: | |||
Dataframe | |||
""" | |||
outputs = d3m_dataframe(predictions, generate_metadata=True) | |||
target_columns_metadata = self._add_target_columns_metadata(outputs.metadata, self.hyperparams, self.primitiveNo) | |||
outputs.metadata = self._update_predictions_metadata(inputs.metadata, outputs, target_columns_metadata) | |||
return outputs | |||
@classmethod | |||
def _update_predictions_metadata(cls, inputs_metadata: metadata_base.DataMetadata, outputs: Optional[Outputs], | |||
target_columns_metadata: List[OrderedDict]) -> metadata_base.DataMetadata: | |||
""" | |||
Updata metadata for selected columns. | |||
Args: | |||
inputs_metadata: metadata_base.DataMetadata | |||
outputs: Container Dataframe | |||
target_columns_metadata: list | |||
Returns: | |||
d3m.metadata.base.DataMetadata | |||
""" | |||
outputs_metadata = metadata_base.DataMetadata().generate(value=outputs) | |||
for column_index, column_metadata in enumerate(target_columns_metadata): | |||
column_metadata.pop("structural_type", None) | |||
outputs_metadata = outputs_metadata.update_column(column_index, column_metadata) | |||
return outputs_metadata | |||
@classmethod | |||
def _add_target_columns_metadata(cls, outputs_metadata: metadata_base.DataMetadata, hyperparams, primitiveNo): | |||
""" | |||
Add target columns metadata | |||
Args: | |||
outputs_metadata: metadata.base.DataMetadata | |||
hyperparams: d3m.metadata.hyperparams.Hyperparams | |||
Returns: | |||
List[OrderedDict] | |||
""" | |||
outputs_length = outputs_metadata.query((metadata_base.ALL_ELEMENTS,))['dimension']['length'] | |||
target_columns_metadata: List[OrderedDict] = [] | |||
for column_index in range(outputs_length): | |||
column_name = "{0}{1}_{2}".format(cls.metadata.query()['name'], primitiveNo, column_index) | |||
column_metadata = OrderedDict() | |||
semantic_types = set() | |||
semantic_types.add(hyperparams["return_semantic_type"]) | |||
column_metadata['semantic_types'] = list(semantic_types) | |||
column_metadata["name"] = str(column_name) | |||
target_columns_metadata.append(column_metadata) | |||
return target_columns_metadata |
@@ -58,10 +58,17 @@ class MatrixProfileTest(unittest.TestCase): | |||
hyperparams_class = MatrixProfilePrimitive.metadata.get_hyperparams() | |||
hyperparams = hyperparams_class.defaults() | |||
hyperparams = hyperparams.replace({'window_size': 3}) | |||
<<<<<<< Updated upstream:tods/tests/detection_algorithm/test_MatrixProfile.py | |||
primitive = MatrixProfilePrimitive(hyperparams=hyperparams) | |||
#primitive.set_training_data(inputs=main) | |||
#primitive.fit() | |||
======= | |||
#print(type(main)) | |||
primitive = MatrixProfile(hyperparams=hyperparams) | |||
primitive.set_training_data(inputs=main) | |||
primitive.fit() | |||
>>>>>>> Stashed changes:tods/tests/test_MatrixProfile.py | |||
new_main = primitive.produce(inputs=main).value | |||
print(new_main) | |||