From 29c3d7451d0ffc7ffabf583b000512d5a8d143c2 Mon Sep 17 00:00:00 2001 From: YileAllenChen1 Date: Mon, 28 Sep 2020 22:37:03 -0500 Subject: [PATCH] fix AC and TIT test Former-commit-id: a9109fc8ed43cba07b47efe154310d5ae7947be0 [formerly 215a44a04907087b2d75af3f5a1536c811249a73] [formerly 2cce6c8581f5a23c9a12f983c208713afca161a6 [formerly e2ae5f6c306ff404ac34ae6b7b4c63a97dde926c]] [formerly 28d35dc45383e8dc91c7b168e2754c49ce7503da [formerly 8eaa951a68f2584e00e74842becd468a66b47375] [formerly 6e28f8b832468f8d862bae91bafaad1d277016cf [formerly 286e1a5fbcc1f7a7868d67f20651c48d2bf05d37]]] [formerly 5d224b535fd17c765ce49f9d84dff3e816be0757 [formerly bbdb89e515b63765d7c630a7eec1bff326b74075] [formerly d6fa965de136273004edb491d8b9998f63c24e4e [formerly 75c69c2559a7d6f69fa3e0c93ccbbd82c5f6b7cb]] [formerly f322227a9816574465fe44bcc9c719d635f2011d [formerly 55c4977ca4ef2ae31749a8f93aad5e6e5d6b4282] [formerly 3a3c86d6575c38ad83a65df504b127c08c066d53 [formerly de354539d1f3a9794ecfc06fbc841792d0877e50]]]] [formerly 9bc24fdc7a399fa22a3d068487852ce0ef7ada96 [formerly 9f4fd781bf003237af4e72a2550402968bee85aa] [formerly 568e27b0cb90780200ba7f9714f97861e1c3a8ed [formerly f9150d1285ebec1fce890dadd22ddd865cc57cf0]] [formerly d0c59a3d395784b7ee545095cfd5bb92f0e77788 [formerly d96aa22b75d7798978c1ed2926b92622a2342f20] [formerly 3a14c1238630265488750c673172afdee2fb89c8 [formerly 5565f029557c5cf0c40d0befcd0391ff718b9634]]] [formerly 44818c7375ad93b2733d2aee70c20de199c8ddfe [formerly 483ebc15e4bd2cc4b781272ec7e66a5b190fba35] [formerly af5eec3d2a770d35fdd1c717a048f6469c8a4e1a [formerly 4b55575e93946aa19733e0ac46903504fc742bb7]] [formerly 442756f4654c2888bc90ab4837d29afdd9e5aca3 [formerly 28a50772f59993f26635c9b64fab91c8d95630c7] [formerly 0c43624c5086192351ed78d7a0f20412878037b5 [formerly 7d0997018106f9a9f6bce40997a0f4635b8073e3]]]]] [formerly 9218208fdc9988569cc300a9cd90ea49bb10bbb7 [formerly 2f4106fbd8da43f93ddcf6a62124e406097eaa57] [formerly 40dedeb4ba2daf51d06b15de5c682a8032accc03 [formerly 01da1d9951ef8fed9146dc3057771bb7601e3e8b]] [formerly ea3c8baca8790c0cfd3901ccb3120f0ee19e9663 [formerly 85aa8bbafece7d3255632986859b9573886c0bef] [formerly 0a94840cc0593deea97002d619a8146859f30465 [formerly 45bf7748c77121324e70b8f42b9514645d84e7ee]]] [formerly 4d11b25571a29068385192beded73728aafbef2f [formerly 4fdd88dbcbee6f671580ad2b052be2573664e002] [formerly 186fc780be4d518c3719d54b044c828bdc6d5dfa [formerly bb0c48712bd32e6b711657dff3b030559d725f6b]] [formerly 433c94b45d0a3e9fde99f972c51bd878acb0612d [formerly 6ea63132493894df487ad57f1abee4b92dedc4db] [formerly 319bea5e873f8d2aa0865f288de1a02ae38eebbb [formerly 29ac7fa939e10d6333675c962225f43395a981ea]]]] [formerly 13410f823fcbc0b5206778c3d811c15f07093301 [formerly 77a09d7431a002a0d37ae98b81bb2a4f3dc5037c] [formerly e879b986c53f022cb00c2304965c8a04e2cc6c18 [formerly 023b7b3bb4a75dc5afa4ab96af4ea0eec4f1ba07]] [formerly d6a225917aecfa0571b64fcd83977900cc21a9c9 [formerly 20433a6a34f655f4d3aa6f235845c906d855835f] [formerly ceb8aa0048092208c5a835ceebb5cb96340e4fc7 [formerly 4560220dacc66156943b3511d29f9b774f6816a7]]] [formerly 32f3541b975927a75211c3a497ae29079f67f769 [formerly f59a7ce2e28335d624216c630b969b3badf91c7d] [formerly cea2e3dc6cfc4b2b3a983df3d5dda340a1a48644 [formerly 5f45f30c001532488aa49dd8bb26c0634c52f0a4]] [formerly 5c3f0347b845fa0de7c093ae583ed2c6832c9307 [formerly bbed88198294aeaa647ccdc581b82959ef09de22] [formerly 7dc0ec0332e6e30e3f11cdfdb93d56d6a884df7c [formerly e8206694e116f0205878a4b691a062bf0cc210e8]]]]]] Former-commit-id: 16b81cc24d89cf55a07d067efaa0b9457a2e56d6 [formerly 241a03fd2f8af1ebf8b41394077d4808edf4d7d5] [formerly ddef9c0ab0d85452cb4f912b6e8c94eb0a3aeb86 [formerly 6d9ff8081a0b206430c16e3a14f0113a461cec9d]] [formerly 7562094b19caefdcda146e53dcf180408bbac394 [formerly a2a85b0f5847e6d933cde5676698a94b5e87aa6e] [formerly ce262c75d75899d084caff7834715dbeaf418e8c [formerly ac4c7e5aa26fe1ee93f170d5267a888268372f1f]]] [formerly 2204112a93d4a6f15ba455e01bba4b3df96d3a54 [formerly 0f1dc0b2e83e2541a2b9ddcb5714e4f38384539e] [formerly 4ad00dbd8d352a36780157d74d5fce283da804e2 [formerly 7bb6bd6b0cc4dda7fe9dcbe285828228a8cea5b9]] [formerly c6be710c7bed1359e019c6890550507b35ea0fb5 [formerly 6de8e751a69567d44810292117c67cf078476372] [formerly 72d778944b893e23fdc22546f639f9c2c6a6e55b [formerly bb71b7be3429e30015e586689cd07e0d89aa8eb4]]]] [formerly 079fb0fd9d60ed056ac55409b9dc43bb9949090d [formerly 0e29488246f7e6352b613fdaa8e093f3a3258d6f] [formerly 205d775c2be57f287a6bad818ad729e4621474a1 [formerly ff378c71a2a31fee969783baaacdd6ceb3599cbe]] [formerly 4665223bc9b9254509e6fb8cecf74070d2c244e4 [formerly dbf451bc32ed29ff8ec555b09027e3100f82dacc] [formerly 8b15af4b334aca1dd7b76f55791657d8b3a72c2f [formerly ba3156b895a2c00e6baaeb2e423d0c3eec2783f0]]] [formerly b68f43143e98307ac97ceda320d9d997c0a26aaf [formerly 21d6f48108c1f689eb4a0e6bfe70a804011dec43] [formerly 53fcdc7f9f126f99a9c424567a75507b9596bd28 [formerly 83ea20396fbc9928c364c6fb1975864f40da718b]] [formerly e1394c337d95eb9d9f923109dfba57668a2ec079 [formerly 8caba2392e97a9b02c4a791efd96227de5006d06] [formerly 7dc0ec0332e6e30e3f11cdfdb93d56d6a884df7c]]]] Former-commit-id: 353661cfe97a611a348b205ae8756cde583e9408 [formerly 319bc1a772ac3a9307ea5bf262ee17df8ab4d0e1] [formerly 21c0284b47d716d3a864a97f4295971081953f48 [formerly 48ecf2dac873479b697738db810dff8fcbb62865]] [formerly 7195cd4cd2b26392eb50578d3396bad5c405bd7e [formerly 91d85dca707d30f3dfb6d366a208030fc1d5d4f2] [formerly 5071952208ea6efb460947033c2b73f71a5b315e [formerly 3939d7fde830c54f90a4519fff05a706b72cda64]]] [formerly 03dd1b056212f1dcf8aaedc6ced3a5c581d9e991 [formerly ffa4e94c911123e0503c2c872db8db9284672b78] [formerly 6daebd02db7aa8ceefd36e8cd44ee5730e856c5f [formerly f66beb1013ab93eb249a18a9a019cec1d2e047ce]] [formerly bf63fc220be3c89e9b683c567188c8d58ed8609b [formerly bcb7c40277a7914048f24d8c5ba23889a31ac70d] [formerly 5249167cbccc6d9b4d34a00b7125dde7b2b0add6 [formerly 14655c50714a8c89cf1ece85a9d36455c824c477]]]] Former-commit-id: 86aa2c1419dce06c304df6f72989a3d269cd89e0 [formerly 08743e769135c8d66c597545a8c255fca251be25] [formerly 03a36b9f230255daefe1b6675ad1f10cdaad9808 [formerly 02ad0e57eda647aca35aff7d9d3a39c9c713b7ed]] [formerly 55587d533d5c29cca912ed35f5d4ea11595098bf [formerly 7a431da27719c96e76cf17d6fdd34126ded55f0d] [formerly dcb48102b25450cdad9a3b66610d13126d2d4ca8 [formerly cc68e7ad3f4354dc422f81fd4bf6d904005f037f]]] Former-commit-id: eb3a9adac6de8d1b8a8e64ba397856e3ad82fc48 [formerly e004b9c0b395fc786bb2f01d8a155b285691db90] [formerly 942c83a7b6594b144ea97210023d62ceed44b74d [formerly dfa340a476365eac9b083508b68b531456b3d52c]] Former-commit-id: 42d3de0a0c3ae596033f80dad93816e5370f6aae [formerly b69d33468dcb95147b5f54f7ae023bc0e70b96a7] Former-commit-id: 7c37a2c00d2d73b13eb8af6f423248e158f6caf4 --- tods/detection_algorithm/MP.py | 189 +++++++++++++++ tods/detection_algorithm/MatrixProfile.py | 377 ++++++------------------------ tods/tests/test_Autocorrelation.py | 2 +- tods/tests/test_TimeIntervalTransform.py | 2 +- 4 files changed, 264 insertions(+), 306 deletions(-) create mode 100644 tods/detection_algorithm/MP.py diff --git a/tods/detection_algorithm/MP.py b/tods/detection_algorithm/MP.py new file mode 100644 index 0000000..3f08509 --- /dev/null +++ b/tods/detection_algorithm/MP.py @@ -0,0 +1,189 @@ +from typing import Any, Callable, List, Dict, Union, Optional, Sequence, Tuple +from numpy import ndarray +from collections import OrderedDict +from scipy import sparse +import os +import sklearn +import numpy +import typing + +# Custom import commands if any +import warnings +import numpy as np +from sklearn.utils import check_array +from sklearn.exceptions import NotFittedError +# from numba import njit +from pyod.utils.utility import argmaxn + +from d3m.container.numpy import ndarray as d3m_ndarray +from d3m.container import DataFrame as d3m_dataframe +from d3m.metadata import hyperparams, params, base as metadata_base +from d3m import utils +from d3m.base import utils as base_utils +from d3m.exceptions import PrimitiveNotFittedError +from d3m.primitive_interfaces.base import CallResult, DockerContainer + +# from d3m.primitive_interfaces.supervised_learning import SupervisedLearnerPrimitiveBase +from d3m.primitive_interfaces.unsupervised_learning import UnsupervisedLearnerPrimitiveBase +from d3m.primitive_interfaces.transformer import TransformerPrimitiveBase + +from d3m.primitive_interfaces.base import ProbabilisticCompositionalityMixin, ContinueFitMixin +from d3m import exceptions +import pandas +import uuid + +from d3m import container, utils as d3m_utils + +from .UODBasePrimitive import Params_ODBase, Hyperparams_ODBase, UnsupervisedOutlierDetectorBase +import stumpy +# from typing import Union + +Inputs = d3m_dataframe +Outputs = d3m_dataframe + + + +class Params(Params_ODBase): + ######## Add more Attributes ####### + pass + + +class Hyperparams(Hyperparams_ODBase): + ######## Add more Attributes ####### + pass + +class MP: + """ + This is the class for matrix profile function + """ + def __init__(self, window_size): + self._window_size = window_size + return + + def produce(self, data): + + """ + + Args: + data: dataframe column + Returns: + nparray + + """ + transformed_columns=utils.pandas.DataFrame() + #transformed_columns=d3m_dataframe + for col in data.columns: + output = stumpy.stump(data[col], m = self._window_size) + output = pd.DataFrame(output) + #print("output", output) + transformed_columns=pd.concat([transformed_columns,output],axis=1) + #transformed_columns[col]=output + #print(transformed_columns) + return transformed_columns + +class MatrixProfile(UnsupervisedOutlierDetectorBase[Inputs, Outputs, Params, Hyperparams]): + """ + + A primitive that performs matrix profile on a DataFrame using Stumpy package + Stumpy documentation: https://stumpy.readthedocs.io/en/latest/index.html + + Parameters + ---------- + T_A : ndarray + The time series or sequence for which to compute the matrix profile + m : int + Window size + T_B : ndarray + The time series or sequence that contain your query subsequences + of interest. Default is `None` which corresponds to a self-join. + ignore_trivial : bool + Set to `True` if this is a self-join. Otherwise, for AB-join, set this + to `False`. Default is `True`. + Returnsfdsf + ------- + out : ndarray + The first column consists of the matrix profile, the second column + consists of the matrix profile indices, the third column consists of + the left matrix profile indices, and the fourth column consists of + the right matrix profile indices. + + """ + + metadata = metadata_base.PrimitiveMetadata({ + '__author__': "DATA Lab @Texas A&M University", + 'name': "Matrix Profile", + #'python_path': 'd3m.primitives.tods.feature_analysis.matrix_profile', + 'python_path': 'd3m.primitives.tods.detection_algorithm.matrix_profile', + 'source': {'name': "DATALAB @Taxes A&M University", 'contact': 'mailto:khlai037@tamu.edu', + 'uris': ['https://gitlab.com/lhenry15/tods/-/blob/Yile/anomaly-primitives/anomaly_primitives/MatrixProfile.py']}, + 'algorithm_types': [metadata_base.PrimitiveAlgorithmType.MATRIX_PROFILE,], + 'primitive_family': metadata_base.PrimitiveFamily.FEATURE_CONSTRUCTION, + 'id': str(uuid.uuid3(uuid.NAMESPACE_DNS, 'MatrixProfilePrimitive')), + 'hyperparams_to_tune': ['window_size'], + 'version': '0.0.2', + }) + + + def __init__(self, *, + hyperparams: Hyperparams, # + random_seed: int = 0, + docker_containers: Dict[str, DockerContainer] = None) -> None: + super().__init__(hyperparams=hyperparams, random_seed=random_seed, docker_containers=docker_containers) + + self._clf = MP(window_size=hyperparams['window_size']) + + def set_training_data(self, *, inputs: Inputs) -> None: + """ + Set training data for outlier detection. + Args: + inputs: Container DataFrame + + Returns: + None + """ + super().set_training_data(inputs=inputs) + + def fit(self, *, timeout: float = None, iterations: int = None) -> CallResult[None]: + """ + Fit model with training data. + Args: + *: Container DataFrame. Time series data up to fit. + + Returns: + None + """ + return super().fit() + + def produce(self, *, inputs: Inputs, timeout: float = None, iterations: int = None) -> CallResult[Outputs]: + """ + Process the testing data. + Args: + inputs: Container DataFrame. Time series data up to outlier detection. + + Returns: + Container DataFrame + 1 marks Outliers, 0 marks normal. + """ + return super().produce(inputs=inputs, timeout=timeout, iterations=iterations) + + def get_params(self) -> Params: + """ + Return parameters. + Args: + None + + Returns: + class Params + """ + return super().get_params() + + def set_params(self, *, params: Params) -> None: + """ + Set parameters for outlier detection. + Args: + params: class Params + + Returns: + None + """ + super().set_params(params=params) diff --git a/tods/detection_algorithm/MatrixProfile.py b/tods/detection_algorithm/MatrixProfile.py index 93d9ef3..da50b67 100644 --- a/tods/detection_algorithm/MatrixProfile.py +++ b/tods/detection_algorithm/MatrixProfile.py @@ -1,99 +1,56 @@ +from typing import Any, Callable, List, Dict, Union, Optional, Sequence, Tuple +from numpy import ndarray +from collections import OrderedDict +from scipy import sparse import os import sklearn import numpy import typing -import time -from scipy import sparse -from numpy import ndarray -from collections import OrderedDict -from typing import Any, Callable, List, Dict, Union, Optional, Sequence, Tuple +# Custom import commands if any +import warnings import numpy as np -import pandas as pd -import logging, uuid -from scipy import sparse -from numpy import ndarray -from collections import OrderedDict -from common_primitives import dataframe_utils, utils +from sklearn.utils import check_array +from sklearn.exceptions import NotFittedError +# from numba import njit +from pyod.utils.utility import argmaxn +from d3m.container.numpy import ndarray as d3m_ndarray +from d3m.container import DataFrame as d3m_dataframe +from d3m.metadata import hyperparams, params, base as metadata_base from d3m import utils -from d3m import container from d3m.base import utils as base_utils from d3m.exceptions import PrimitiveNotFittedError -from d3m.container import DataFrame as d3m_dataframe -from d3m.container.numpy import ndarray as d3m_ndarray -from d3m.primitive_interfaces import base, transformer -from d3m.metadata import base as metadata_base, hyperparams -from d3m.metadata import hyperparams, params, base as metadata_base from d3m.primitive_interfaces.base import CallResult, DockerContainer +# from d3m.primitive_interfaces.supervised_learning import SupervisedLearnerPrimitiveBase +from d3m.primitive_interfaces.unsupervised_learning import UnsupervisedLearnerPrimitiveBase +from d3m.primitive_interfaces.transformer import TransformerPrimitiveBase + +from d3m.primitive_interfaces.base import ProbabilisticCompositionalityMixin, ContinueFitMixin +from d3m import exceptions +import pandas +import uuid + +from d3m import container, utils as d3m_utils + +from .UODBasePrimitive import Params_ODBase, Hyperparams_ODBase, UnsupervisedOutlierDetectorBase import stumpy +# from typing import Union -__all__ = ('MatrixProfile',) +Inputs = d3m_dataframe +Outputs = d3m_dataframe -Inputs = container.DataFrame -Outputs = container.DataFrame -class PrimitiveCount: - primitive_no = 0 +class Params(Params_ODBase): + ######## Add more Attributes ####### + pass -class Hyperparams(hyperparams.Hyperparams): - window_size = hyperparams.UniformInt( - lower = 0, - upper = 100, #TODO: Define the correct the upper bound - default=50, - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="window size to calculate" - ) - - # Keep previous - dataframe_resource = hyperparams.Hyperparameter[typing.Union[str, None]]( - default=None, - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="Resource ID of a DataFrame to extract if there are multiple tabular resources inside a Dataset and none is a dataset entry point.", - ) - use_columns = hyperparams.Set( - elements=hyperparams.Hyperparameter[int](-1), - default=(2,), - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="A set of column indices to force primitive to operate on. If any specified column cannot be parsed, it is skipped.", - ) - exclude_columns = hyperparams.Set( - elements=hyperparams.Hyperparameter[int](-1), - default=(0,1,3,), - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="A set of column indices to not operate on. Applicable only if \"use_columns\" is not provided.", - ) - return_result = hyperparams.Enumeration( - values=['append', 'replace', 'new'], - default='new', - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="Should parsed columns be appended, should they replace original columns, or should only parsed columns be returned? This hyperparam is ignored if use_semantic_types is set to false.", - ) - use_semantic_types = hyperparams.UniformBool( - default=False, - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="Controls whether semantic_types metadata will be used for filtering columns in input dataframe. Setting this to false makes the code ignore return_result and will produce only the output dataframe" - ) - add_index_columns = hyperparams.UniformBool( - default=False, - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="Also include primary index columns if input data has them. Applicable only if \"return_result\" is set to \"new\".", - ) - error_on_no_input = hyperparams.UniformBool( - default=True, - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="Throw an exception if no input column is selected/provided. Defaults to true to behave like sklearn. To prevent pipelines from breaking set this to False.", - ) - return_semantic_type = hyperparams.Enumeration[str]( - values=['https://metadata.datadrivendiscovery.org/types/Attribute', - 'https://metadata.datadrivendiscovery.org/types/ConstructedAttribute'], - default='https://metadata.datadrivendiscovery.org/types/Attribute', - description='Decides what semantic type to attach to generated attributes', - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'] - ) +class Hyperparams(Hyperparams_ODBase): + ######## Add more Attributes ####### + pass class MP: """ @@ -123,9 +80,10 @@ class MP: #transformed_columns[col]=output #print(transformed_columns) return transformed_columns - -class MatrixProfile(transformer.TransformerPrimitiveBase[Inputs, Outputs, Hyperparams]): + +class MatrixProfile(UnsupervisedOutlierDetectorBase[Inputs, Outputs, Params, Hyperparams]): """ + A primitive that performs matrix profile on a DataFrame using Stumpy package Stumpy documentation: https://stumpy.readthedocs.io/en/latest/index.html @@ -141,7 +99,7 @@ class MatrixProfile(transformer.TransformerPrimitiveBase[Inputs, Outputs, Hyperp ignore_trivial : bool Set to `True` if this is a self-join. Otherwise, for AB-join, set this to `False`. Default is `True`. - Returns + Returnsfdsf ------- out : ndarray The first column consists of the matrix profile, the second column @@ -151,7 +109,6 @@ class MatrixProfile(transformer.TransformerPrimitiveBase[Inputs, Outputs, Hyperp """ - metadata = metadata_base.PrimitiveMetadata({ '__author__': "DATA Lab @Texas A&M University", 'name': "Matrix Profile", @@ -167,254 +124,66 @@ class MatrixProfile(transformer.TransformerPrimitiveBase[Inputs, Outputs, Hyperp }) - def __init__(self, *, hyperparams: Hyperparams) -> None: - super().__init__(hyperparams=hyperparams) - self._clf = MP(window_size = hyperparams['window_size']) - self.primitiveNo = PrimitiveCount.primitive_no - PrimitiveCount.primitive_no+=1 + def __init__(self, *, + hyperparams: Hyperparams, # + random_seed: int = 0, + docker_containers: Dict[str, DockerContainer] = None) -> None: + super().__init__(hyperparams=hyperparams, random_seed=random_seed, docker_containers=docker_containers) - def produce(self, *, inputs: Inputs, timeout: float = None, iterations: int = None) -> base.CallResult[Outputs]: + self._clf = MP(window_size=hyperparams['window_size']) + def set_training_data(self, *, inputs: Inputs) -> None: """ - + Set training data for outlier detection. Args: - inputs: Container DataFrame - timeout: Default - - iterations: Default - Returns: - - Container DataFrame containing Matrix Profile of selected columns - - """ - - # Get cols to fit. - self._fitted = False - self._training_inputs, self._training_indices = self._get_columns_to_fit(inputs, self.hyperparams) - self._input_column_names = self._training_inputs.columns - - - if len(self._training_indices) > 0: - self._fitted = True - else: - if self.hyperparams['error_on_no_input']: - raise RuntimeError("No input columns were selected") - self.logger.warn("No input columns were selected") - - if not self._fitted: - raise PrimitiveNotFittedError("Primitive not fitted.") - - sk_inputs = inputs - if self.hyperparams['use_semantic_types']: - sk_inputs = inputs.iloc[:, self._training_indices] - output_columns = [] - if len(self._training_indices) > 0: - sk_output = self._clf.produce(sk_inputs) - if sparse.issparse(sk_output): - sk_output = sk_output.toarray() - outputs = self._wrap_predictions(inputs, sk_output) - - if len(outputs.columns) == len(self._input_column_names): - outputs.columns = self._input_column_names - output_columns = [outputs] - - else: - if self.hyperparams['error_on_no_input']: - raise RuntimeError("No input columns were selected") - self.logger.warn("No input columns were selected") - - outputs = base_utils.combine_columns(return_result=self.hyperparams['return_result'], - add_index_columns=self.hyperparams['add_index_columns'], - inputs=inputs, column_indices=self._training_indices, - columns_list=output_columns) - #print(outputs) - #CallResult(outputs) - #print("___") - print(outputs.columns) - #outputs.columns = [str(x) for x in outputs.columns] - - return CallResult(outputs) - - # assert isinstance(inputs, container.DataFrame), type(container.DataFrame) - # _, self._columns_to_produce = self._get_columns_to_fit(inputs, self.hyperparams) - - # #print("columns_to_produce ", self._columns_to_produce) - - # outputs = inputs - # if len(self._columns_to_produce) > 0: - # for col in self.hyperparams['use_columns']: - # output = self._clf.produce(inputs.iloc[ : ,col]) - - # outputs = pd.concat((outputs, pd.DataFrame({inputs.columns[col]+'_matrix_profile': output[:,0], - # inputs.columns[col]+'_matrix_profile_indices': output[:,1], - # inputs.columns[col]+'_left_matrix_profile_indices': output[:,2], - # inputs.columns[col]+'_right_matrix_profile_indices': output[:,3]})), axis = 1) - - # else: - # if self.hyperparams['error_on_no_input']: - # raise RuntimeError("No input columns were selected") - # self.logger.warn("No input columns were selected") - - # #print(outputs) - # self._update_metadata(outputs) - - # return base.CallResult(outputs) - - - - def _update_metadata(self, outputs): - outputs.metadata = outputs.metadata.generate(outputs) - - @classmethod - def _get_columns_to_fit(cls, inputs: Inputs, hyperparams: Hyperparams): - - """ - - Select columns to fit. - Args: - inputs: Container DataFrame - hyperparams: d3m.metadata.hyperparams.Hyperparams - - Returns: - list - + None """ + super().set_training_data(inputs=inputs) - if not hyperparams['use_semantic_types']: - return inputs, list(range(len(inputs.columns))) - - inputs_metadata = inputs.metadata - - - - def can_produce_column(column_index: int) -> bool: - return cls._can_produce_column(inputs_metadata, column_index, hyperparams) - - columns_to_produce, columns_not_to_produce = base_utils.get_columns_to_use(inputs_metadata, - use_columns=hyperparams['use_columns'], - exclude_columns=hyperparams['exclude_columns'], - can_use_column=can_produce_column) - - + def fit(self, *, timeout: float = None, iterations: int = None) -> CallResult[None]: """ - Encountered error: when hyperparams['use_columns'] = (2,3) and hyperparams['exclude_columns'] is (1,2) - columns_to_produce is still [2] - """ - return inputs.iloc[:, columns_to_produce], columns_to_produce - - - @classmethod - def _can_produce_column(cls, inputs_metadata: metadata_base.DataMetadata, column_index: int, hyperparams: Hyperparams) -> bool: + Fit model with training data. + Args: + *: Container DataFrame. Time series data up to fit. + Returns: + None """ + return super().fit() - Output whether a column can be processed. - Args: - inputs_metadata: d3m.metadata.base.DataMetadata - column_index: int - - Returns: - bool - + def produce(self, *, inputs: Inputs, timeout: float = None, iterations: int = None) -> CallResult[Outputs]: """ - - column_metadata = inputs_metadata.query((metadata_base.ALL_ELEMENTS, column_index)) - - accepted_structural_types = (int, float, np.integer, np.float64) #changed numpy to np - accepted_semantic_types = set() - accepted_semantic_types.add("https://metadata.datadrivendiscovery.org/types/Attribute") - - # print(column_metadata) - # print(column_metadata['structural_type'], accepted_structural_types) - - if not issubclass(column_metadata['structural_type'], accepted_structural_types): - return False - - semantic_types = set(column_metadata.get('semantic_types', [])) - - # print(column_metadata) - # print(semantic_types, accepted_semantic_types) - - if len(semantic_types) == 0: - cls.logger.warning("No semantic types found in column metadata") - return False - - # Making sure all accepted_semantic_types are available in semantic_types - if len(accepted_semantic_types - semantic_types) == 0: - return True - - return False - - def _wrap_predictions(self, inputs: Inputs, predictions: ndarray) -> Outputs: - - """ - - Wrap predictions into dataframe + Process the testing data. Args: - inputs: Container Dataframe - predictions: array-like data (n_samples, n_features) + inputs: Container DataFrame. Time series data up to outlier detection. Returns: - Dataframe - + Container DataFrame + 1 marks Outliers, 0 marks normal. """ + return super().produce(inputs=inputs, timeout=timeout, iterations=iterations) - outputs = d3m_dataframe(predictions, generate_metadata=True) - target_columns_metadata = self._add_target_columns_metadata(outputs.metadata, self.hyperparams, self.primitiveNo) - outputs.metadata = self._update_predictions_metadata(inputs.metadata, outputs, target_columns_metadata) - return outputs - - - - @classmethod - def _update_predictions_metadata(cls, inputs_metadata: metadata_base.DataMetadata, outputs: Optional[Outputs], - target_columns_metadata: List[OrderedDict]) -> metadata_base.DataMetadata: - + def get_params(self) -> Params: """ + Return parameters. + Args: + None - Updata metadata for selected columns. - Args: - inputs_metadata: metadata_base.DataMetadata - outputs: Container Dataframe - target_columns_metadata: list - - Returns: - d3m.metadata.base.DataMetadata - + Returns: + class Params """ + return super().get_params() - outputs_metadata = metadata_base.DataMetadata().generate(value=outputs) - - for column_index, column_metadata in enumerate(target_columns_metadata): - column_metadata.pop("structural_type", None) - outputs_metadata = outputs_metadata.update_column(column_index, column_metadata) - - return outputs_metadata - - - @classmethod - def _add_target_columns_metadata(cls, outputs_metadata: metadata_base.DataMetadata, hyperparams, primitiveNo): + def set_params(self, *, params: Params) -> None: """ - Add target columns metadata + Set parameters for outlier detection. Args: - outputs_metadata: metadata.base.DataMetadata - hyperparams: d3m.metadata.hyperparams.Hyperparams + params: class Params Returns: - List[OrderedDict] + None """ - outputs_length = outputs_metadata.query((metadata_base.ALL_ELEMENTS,))['dimension']['length'] - target_columns_metadata: List[OrderedDict] = [] - for column_index in range(outputs_length): - column_name = "{0}{1}_{2}".format(cls.metadata.query()['name'], primitiveNo, column_index) - column_metadata = OrderedDict() - semantic_types = set() - semantic_types.add(hyperparams["return_semantic_type"]) - column_metadata['semantic_types'] = list(semantic_types) - - column_metadata["name"] = str(column_name) - target_columns_metadata.append(column_metadata) - return target_columns_metadata + super().set_params(params=params) diff --git a/tods/tests/test_Autocorrelation.py b/tods/tests/test_Autocorrelation.py index bc82ff3..766743c 100644 --- a/tods/tests/test_Autocorrelation.py +++ b/tods/tests/test_Autocorrelation.py @@ -6,7 +6,7 @@ from datetime import datetime from d3m import container, utils from d3m.metadata import base as metadata_base -from feature_analysis import AutoCorrelation +from tods.feature_analysis import AutoCorrelation #import utils as test_utils diff --git a/tods/tests/test_TimeIntervalTransform.py b/tods/tests/test_TimeIntervalTransform.py index 62a48ce..84caab2 100644 --- a/tods/tests/test_TimeIntervalTransform.py +++ b/tods/tests/test_TimeIntervalTransform.py @@ -6,7 +6,7 @@ from datetime import datetime from d3m import container, utils from d3m.metadata import base as metadata_base -from data_processing import TimeIntervalTransform +from tods.data_processing import TimeIntervalTransform #import utils as test_utils