From a5baa5ed3b8f7820cb550abb4c16ac2ea31693c9 Mon Sep 17 00:00:00 2001 From: Purav Zumkhawala Date: Mon, 21 Sep 2020 00:58:51 -0500 Subject: [PATCH] Telemanom bug fix Former-commit-id: 80a039f4af183634c9591201dc1163d80c6b381d [formerly 2b10904e212109ef6f64b38d35be0bc8afa7532a] [formerly d1994758377604e62f26a85478493bc4709914c3 [formerly abf566fb2e1f34894180e292e944dc6fbec224b0]] [formerly 579f58cb3c153cfda22e2a195becac8e3b0a07db [formerly 0a82035a8b70c310aa576bd30bd836409d3264a7] [formerly ed2f8d26f4b4f8a0afaf0d2533b1f89d7c76e3e5 [formerly c0149e753c452ff3fcc2b5cd9f516e86d314a958]]] [formerly 0e629f381033068518f37ca6c723efaae5045a07 [formerly 9c2cd633e5e2b223849ae1faafecf7c3ab05d16c] [formerly 5f1bde63fe05375257d15e5e400f8199687c1916 [formerly 8c9bb9fcb6a029a2dfda323318f8098173dbb47a]] [formerly 5fdf09c8dd24defd6ac12586308a64030bcf74df [formerly 4de51403ed79f71fbd2ce1694c5a75f6054e1352] [formerly dae65491c8052eac277a31dd1be5ce6804087c65 [formerly 6f8fd9ecd87a30e95f71ec4ba228c92cb1846ca9]]]] [formerly 702cc2b9468b4cd3b8098c8017ab937452b3e65b [formerly bf04eafd2023bf0610df73dd0348ba2d3f7d4f63] [formerly 6ef5319fa4d7117c318d22cc774093c7ffc8e0c0 [formerly 0320cb0e74688f36447e9b4d55fa85f43e066d7c]] [formerly dcb4aadc7d12edecde685288dc321db55b8d454f [formerly bb677ae5077a1122cd62049897c3da318f33db46] [formerly 6de5848312840d6efaaea7a51b7308407b624a7c [formerly ca24f25d3eaa917dbfeae501569814647118d078]]] [formerly 88a268e7354c202617cb9efe31349b49e7dec86e [formerly 9ff2c5d359279bfe5091b6a6800dea33b95d5d35] [formerly 27020e8b8844f2c26473f2a2764651b24f213c6f [formerly c4853c1d3a9e3fc97150a101209927960312d984]] [formerly b23207788f1f27f07acb595a1a42eeff5d09372a [formerly 3ae7d175bc256abd2368e2e3c4310c1f289539f5] [formerly 1d3acb533dab823be4e7cad1fba52ae7148431d8 [formerly 1750cef14a14a6abb55a90f6d46553ea1815e9d3]]]]] [formerly b41cfd8d449607a76f83bb62ed0f3b82ca9cfd4c [formerly ad6888f5a261fd72f22b5395fe928a54e2e4d4cf] [formerly baf30c88397e8fa2578796f4bfe28bd45a8391d1 [formerly ad26c3269b0f7f77067cfd36d33d5d7af6dd42a2]] [formerly 59105acc97673f6ac48131547e2d828cfb5f4dae [formerly b0fbe6f1c8b840788a6debbe9d5e110c4f075d2a] [formerly 132be9081b7a4d16b0eb65229e14c6934b59e724 [formerly 7342e151fff8aee8fa8aa7dcfd8eb8b21caf65e0]]] [formerly 3e19691e01b50f324a986e7cb436387e6aebb1e6 [formerly 3dd9a0ccc9699c6a14ba1114e6ace7f6e32715d7] [formerly fdc134609211a74f7a178572cf5775558038b3ed [formerly df235dc4304121e470d2a05b3191967801bf6174]] [formerly de38ccf375d6d2a8f13933132cc4310ff4d7a04b [formerly 06315ef06d49e2f2abebaf2fd36ec679a6a86b58] [formerly 5cb8762ed40d4323dfdbdb0eb59f6529df38127e [formerly dbef43a708fd5ce363691eb0ec315f4c162a2f58]]]] [formerly 865e4a5e1ee27bb89a44a2782443ac9c97432747 [formerly 1a1b8f1e49aacfb7fb3125231fef1afe49964332] [formerly 7989a2bb091a04bb19803f05c6a96c55ab8b969b [formerly b3da6a3dda70e307bd2c127788c02a8b22e70d5f]] [formerly 14570cbd3b1baa33680cc3e4e109d3a228ca4803 [formerly e1931c4f0be6f3e256f5095b1dbe0a33356d5ceb] [formerly 0e5cb77a653eeff70a4551bcf263169048e32e41 [formerly 80b0901c5806867467347ade84158bd75ab0b9d7]]] [formerly dd57712a9b954501530c24944daa35c9cacfb830 [formerly 6c893b887942a6f0db8ab8fc9a9e7b82d61c5460] [formerly d0ff42618a6a1014df83d581925bdca48a3dc3ec [formerly 05fdbdfc4b9bb880069728d4534b21c74d3f722e]] [formerly 3d4d639084a9a7ec33acb7129d3af5f4d6d2364f [formerly e954c49652bd760e7badd2a8a2f88cd749680953] [formerly 44f766c8f58930459b162c81c6b56e37a17eb689 [formerly 40cb42fab44c711845c6d2b5f0e95d9c124c601f]]]]]] Former-commit-id: 05f96e228f37d637fb2eed98ec1dd38383872c23 [formerly 90a15e00fbbfa992f475dcfbbe0f6a98818cd1d1] [formerly 8b9b29140c7eeb961520dc69aafe54eb11d54020 [formerly ea592116213be09cb20fc9fc26ce1c69ef8f2fab]] [formerly 8dc2ed02f3b62e750c7142ec38e252106677448e [formerly 622c9208d33e297ec8c6b2b0acee79f896a492ac] [formerly f36abe812176b386714dd17d44fb495e0d27b1a5 [formerly 8187c622e176dbc94e5a38009ce6aa20f31da926]]] [formerly 3297553613ad03617dfa6ed92c7f324471bb2e8c [formerly a5ce9d176da376831457350d9070fbe5b791b902] [formerly 5b64df634d2ff2bf602173d67844913cf2d0c9f0 [formerly 6d62753eb49c26b6a7e61dbda0aa218ea6db0d73]] [formerly 79bfccc1e2e82b0a310753fb794dd866e6d1f35a [formerly d336fd5eb4a52fcaba3ec1394e8151d6b54c1712] [formerly 560e26844de62ca47a16bd2979303e73c927bb9f [formerly a14908228369c5c0b12d4d0d873e4d167045d60a]]]] [formerly 159f944bb89f6f8f6ec968d26936f2e69d717c04 [formerly 3f4c876f205f2492c48b6b56359779e49e960249] [formerly 31c09ce0146c61e77ce06bf47a7898151ff2ba17 [formerly b6fe9d505708b81c24d619a2ae14033b12ec1559]] [formerly 49f867db3eb38e92c687745089e9f0bdc7e7debc [formerly 2ea06e9e98669ab423b20a477f0722c8fd692246] [formerly 8c0c8a58d91d78a64b6aad668d3a9b8c7fa40041 [formerly fdd5279b81472997516675a9b53698e2f3eafb31]]] [formerly b7dae63ce20197c20b152352118e772bcd121405 [formerly cdc81a3ed254a282b9bcd10e72c41daa3bfeaac0] [formerly 8ae2f4ab72b69b4e6d97ac6c1ccd576dab5d44c5 [formerly 9c607d8a3cd59dd9ec1a5eebf3850aef4bbb232c]] [formerly 5ddd4d1d597572b188c880236f945d6cc8cd0fcb [formerly 47adb2e882e9194d74f8ae5e7f10035995350a90] [formerly 44f766c8f58930459b162c81c6b56e37a17eb689]]]] Former-commit-id: 9a148b9a33b67c854246e4ba7ff32c495448638a [formerly c86125559b25b5a6ce00749bcd95d846c614f61e] [formerly 460c43684924ffc0397b9fcb92f67ce95a8639d6 [formerly 8048072f5930869c9c7460dbee4067b6c67d301a]] [formerly ae3f4067862e5649d210912f2b33e167d934ef6c [formerly 3263287d0e227d9c5c83e5d775d8ff59e12839c4] [formerly aaba8fdc8b0b45c69003a4c3e31fc479e5ec4261 [formerly b92eaeb050b867696e480583d2ad529cedcfc9bd]]] [formerly ab37dabae2b27b2548f990a8b6369417bf2a3818 [formerly 6db1bba492fef6d37a31d483338c7fbaccacd64f] [formerly 5774aeee55e13d3ec78fb1848d58213a90cd08ef [formerly 3f7c686f32355c08a12475605bc37bed6fa8e7ba]] [formerly 4b5cf2cbfff6461062e5a89c4c0dbc00f3e559d0 [formerly 43d31d42dd8b8825f8e9ea8d78e079037b928dee] [formerly 454f18f8432921a6ce505358cf0fec7ebe481f88 [formerly 13e9edaa680f68e4ad64ccc5b7817bbe18a385eb]]]] Former-commit-id: 4cfe86a3e761eb7e7753ef8f1ef1715ec3d2718f [formerly 93da5517f2be814752a0bb1b771e4d73eadca3e7] [formerly fb2b5d7c50853baff1fb04d83f92fc841f61d841 [formerly 9f485c401bc523a76ce48a3bde0d34fc4670dfef]] [formerly f46d4595fd503671ee523953acf89afa0fd2cfe1 [formerly 871e0db010bef79e2dc1ec0654808ff01e6d8471] [formerly d6d78caf25b31a1d858109b216b33ebb39a85a39 [formerly fb1c088bd9fbe02a2638bac2641f4b5e453688a6]]] Former-commit-id: 14ec35b9b5a302dc2cf9bbf4124b051a0e0aa64d [formerly d84d848cbcef59ee5c71db5116e1f56c039db6c5] [formerly 44b03f9d69db0546d9dac777e9bcfa45f5402a31 [formerly ebe0f75a484ab2676af3396721c6ca02bf0997f0]] Former-commit-id: 9337124fb889b2e80b36edd42a293466324578aa [formerly 4fa80baf90b965b9eee947ca06ec7262fedbc61a] Former-commit-id: 786e4d12e27330847933b9c9fc800f524d8f296b --- tods/detection_algorithm/Telemanom.py | 718 +++++++++++++++++----------------- 1 file changed, 360 insertions(+), 358 deletions(-) diff --git a/tods/detection_algorithm/Telemanom.py b/tods/detection_algorithm/Telemanom.py index 4ffa6ac..7d0732f 100644 --- a/tods/detection_algorithm/Telemanom.py +++ b/tods/detection_algorithm/Telemanom.py @@ -27,9 +27,9 @@ from d3m.primitive_interfaces.base import CallResult, DockerContainer from d3m.primitive_interfaces.unsupervised_learning import UnsupervisedLearnerPrimitiveBase from d3m.primitive_interfaces.transformer import TransformerPrimitiveBase -from .UODBasePrimitive import Params_ODBase, Hyperparams_ODBase, UnsupervisedOutlierDetectorBase +from detection_algorithm.UODBasePrimitive import Params_ODBase, Hyperparams_ODBase, UnsupervisedOutlierDetectorBase -from .core.CollectiveBase import CollectiveBaseDetector +from detection_algorithm.core.CollectiveBase import CollectiveBaseDetector from sklearn.utils import check_array @@ -39,9 +39,9 @@ from d3m import exceptions # from detection_algorithm.UODBasePrimitive import Params_ODBase, Hyperparams_ODBase, UnsupervisedOutlierDetectorBase -from .core.utils.errors import Errors -from .core.utils.channel import Channel -from .core.utils.modeling import Model +from detection_algorithm.core.utils.errors import Errors +from detection_algorithm.core.utils.channel import Channel +from detection_algorithm.core.utils.modeling import Model # from pyod.models.base import BaseDetector @@ -53,394 +53,396 @@ Inputs = container.DataFrame Outputs = container.DataFrame class Params(Params_ODBase): - ######## Add more Attributes ####### + ######## Add more Attributes ####### - pass + pass class Hyperparams(Hyperparams_ODBase): + + smoothing_perc = hyperparams.Hyperparameter[float]( + default=0.05, + semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'], + description="determines window size used in EWMA smoothing (percentage of total values for channel)" + ) - smoothing_perc = hyperparams.Hyperparameter[float]( - default=0.05, - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'], - description="determines window size used in EWMA smoothing (percentage of total values for channel)" - ) - - - window_size_ = hyperparams.Hyperparameter[int]( - default=100, - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="number of trailing batches to use in error calculation" - ) - error_buffer = hyperparams.Hyperparameter[int]( - default=50, - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="number of values surrounding an error that are brought into the sequence (promotes grouping on nearby sequences" - ) - - batch_size = hyperparams.Hyperparameter[int]( - default=70, - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="Batch size while predicting" - ) + window_size_ = hyperparams.Hyperparameter[int]( + default=100, + semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], + description="number of trailing batches to use in error calculation" + ) + error_buffer = hyperparams.Hyperparameter[int]( + default=50, + semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], + description="number of values surrounding an error that are brought into the sequence (promotes grouping on nearby sequences" + ) + + batch_size = hyperparams.Hyperparameter[int]( + default=70, + semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], + description="Batch size while predicting" + ) - # LSTM Model Parameters - dropout = hyperparams.Hyperparameter[float]( - default=0.3, - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'], - description="Dropout rate" - ) - validation_split = hyperparams.Hyperparameter[float]( - default=0.2, - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="Validation split" - ) + # LSTM Model Parameters - optimizer = hyperparams.Hyperparameter[typing.Union[str, None]]( - default='Adam', - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="Optimizer" - ) + dropout = hyperparams.Hyperparameter[float]( + default=0.3, + semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'], + description="Dropout rate" + ) - lstm_batch_size = hyperparams.Hyperparameter[int]( - default=64, - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="lstm model training batch size" - ) + validation_split = hyperparams.Hyperparameter[float]( + default=0.2, + semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], + description="Validation split" + ) + optimizer = hyperparams.Hyperparameter[typing.Union[str, None]]( + default='Adam', + semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], + description="Optimizer" + ) - loss_metric = hyperparams.Hyperparameter[typing.Union[str, None]]( - default='mean_squared_error', - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="loss function" - ) + lstm_batch_size = hyperparams.Hyperparameter[int]( + default=64, + semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], + description="lstm model training batch size" + ) - layers = hyperparams.List( - elements=hyperparams.Hyperparameter[int](1), - default=[10,10], - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'], - description="No of units for the 2 lstm layers" - ) - # Training Parameters + loss_metric = hyperparams.Hyperparameter[typing.Union[str, None]]( + default='mean_squared_error', + semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], + description="loss function" + ) - epochs = hyperparams.Hyperparameter[int]( - default=1, - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'], - description="Epoch" - ) - patience = hyperparams.Hyperparameter[int]( - default=10, - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="Number of consequetive training iterations to allow without decreasing the val_loss by at least min_delta" - ) + layers = hyperparams.List( + elements=hyperparams.Hyperparameter[int](1), + default=[10,10], + semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'], + description="No of units for the 2 lstm layers" + ) - min_delta = hyperparams.Hyperparameter[float]( - default=0.0003, - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'], - description="Number of consequetive training iterations to allow without decreasing the val_loss by at least min_delta" - ) + # Training Parameters + epochs = hyperparams.Hyperparameter[int]( + default=1, + semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'], + description="Epoch" + ) - l_s = hyperparams.Hyperparameter[int]( - default=100, - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'], - description="num previous timesteps provided to model to predict future values" - ) + patience = hyperparams.Hyperparameter[int]( + default=10, + semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], + description="Number of consequetive training iterations to allow without decreasing the val_loss by at least min_delta" + ) - n_predictions = hyperparams.Hyperparameter[int]( - default=10, - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="number of steps ahead to predict" - ) + min_delta = hyperparams.Hyperparameter[float]( + default=0.0003, + semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'], + description="Number of consequetive training iterations to allow without decreasing the val_loss by at least min_delta" + ) - # Error thresholding parameters - # ================================== - - p = hyperparams.Hyperparameter[float]( - default=0.05, - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'], - description="minimum percent decrease between max errors in anomalous sequences (used for pruning)" - ) - - # Contamination + l_s = hyperparams.Hyperparameter[int]( + default=100, + semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'], + description="num previous timesteps provided to model to predict future values" + ) - contamination = hyperparams.Uniform( - lower=0., - upper=0.5, - default=0.1, - description='the amount of contamination of the data set, i.e.the proportion of outliers in the data set. Used when fitting to define the threshold on the decision function', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] - ) + n_predictions = hyperparams.Hyperparameter[int]( + default=10, + semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], + description="number of steps ahead to predict" + ) + + + # Error thresholding parameters + # ================================== + + p = hyperparams.Hyperparameter[float]( + default=0.05, + semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'], + description="minimum percent decrease between max errors in anomalous sequences (used for pruning)" + ) + + # Contamination + + contamination = hyperparams.Uniform( + lower=0., + upper=0.5, + default=0.1, + description='the amount of contamination of the data set, i.e.the proportion of outliers in the data set. Used when fitting to define the threshold on the decision function', + semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] + ) class TelemanomPrimitive(UnsupervisedOutlierDetectorBase[Inputs, Outputs, Params, Hyperparams]): - """ - A primitive that uses telmanom for outlier detection - - Parameters - ---------- - - - """ - - __author__ = "Data Lab" - metadata = metadata_base.PrimitiveMetadata( - { - '__author__' : "DATA Lab at Texas A&M University", - 'name': "Telemanom", - 'python_path': 'd3m.primitives.tods.detection_algorithm.telemanom', - 'source': { - 'name': 'DATA Lab at Texas A&M University', - 'contact': 'mailto:khlai037@tamu.edu', - 'uris': [ - 'https://gitlab.com/lhenry15/tods.git', - 'https://gitlab.com/lhenry15/tods/-/blob/purav/anomaly-primitives/anomaly_primitives/telemanom.py', - ], - }, - 'algorithm_types': [ - metadata_base.PrimitiveAlgorithmType.TELEMANOM, - ], - 'primitive_family': metadata_base.PrimitiveFamily.ANOMALY_DETECTION, - 'id': 'c7259da6-7ce6-42ad-83c6-15238679f5fa', - 'hyperparameters_to_tune':['layers','loss_metric','optimizer','epochs','p','l_s','patience','min_delta','dropout','smoothing_perc'], - 'version': '0.0.1', - }, - ) - - def __init__(self, *, - hyperparams: Hyperparams, # - random_seed: int = 0, - docker_containers: Dict[str, DockerContainer] = None) -> None: - - super().__init__(hyperparams=hyperparams, random_seed=random_seed, docker_containers=docker_containers) - - self._clf = Detector(smoothing_perc=self.hyperparams['smoothing_perc'], - window_size=self.hyperparams['window_size_'], - error_buffer=self.hyperparams['error_buffer'], - batch_size = self.hyperparams['batch_size'], - validation_split = self.hyperparams['validation_split'], - optimizer = self.hyperparams['optimizer'], - lstm_batch_size = self.hyperparams['lstm_batch_size'], - loss_metric = self.hyperparams['loss_metric'], - layers = self.hyperparams['layers'], - epochs = self.hyperparams['epochs'], - patience = self.hyperparams['patience'], - min_delta = self.hyperparams['min_delta'], - l_s = self.hyperparams['l_s'], - n_predictions = self.hyperparams['n_predictions'], - p = self.hyperparams['p'], - contamination=hyperparams['contamination'] - ) - - def set_training_data(self, *, inputs: Inputs) -> None: - """ - Set training data for outlier detection. - Args: - inputs: Container DataFrame - - Returns: - None - """ - super().set_training_data(inputs=inputs) - - def fit(self, *, timeout: float = None, iterations: int = None) -> CallResult[None]: - """ - Fit model with training data. - Args: - *: Container DataFrame. Time series data up to fit. - - Returns: - None - """ - return super().fit() - - def produce(self, *, inputs: Inputs, timeout: float = None, iterations: int = None) -> CallResult[Outputs]: - """ - Process the testing data. - Args: - inputs: Container DataFrame. Time series data up to outlier detection. - - Returns: - Container DataFrame - 1 marks Outliers, 0 marks normal. - """ - return super().produce(inputs=inputs, timeout=timeout, iterations=iterations) - - - def produce_score(self, *, inputs: Inputs, timeout: float = None, iterations: int = None) -> CallResult[Outputs]: - """ - Process the testing data. - Args: - inputs: Container DataFrame. Time series data up to outlier detection. - Returns: - Container DataFrame - Outlier score of input DataFrame. - """ - return super().produce_score(inputs=inputs, timeout=timeout, iterations=iterations) - - - def get_params(self) -> Params: - """ - Return parameters. - Args: - None - - Returns: - class Params - """ - return super().get_params() - - def set_params(self, *, params: Params) -> None: - """ - Set parameters for outlier detection. - Args: - params: class Params - - Returns: - None - """ - super().set_params(params=params) + """ + A primitive that uses telmanom for outlier detection + + Parameters + ---------- + + + """ + + __author__ = "Data Lab" + metadata = metadata_base.PrimitiveMetadata( + { + '__author__' : "DATA Lab at Texas A&M University", + 'name': "Telemanom", + 'python_path': 'd3m.primitives.tods.detection_algorithm.telemanom', + 'source': { + 'name': 'DATA Lab at Texas A&M University', + 'contact': 'mailto:khlai037@tamu.edu', + 'uris': [ + 'https://gitlab.com/lhenry15/tods.git', + 'https://gitlab.com/lhenry15/tods/-/blob/purav/anomaly-primitives/anomaly_primitives/telemanom.py', + ], + }, + 'algorithm_types': [ + metadata_base.PrimitiveAlgorithmType.TELEMANOM, + ], + 'primitive_family': metadata_base.PrimitiveFamily.ANOMALY_DETECTION, + 'id': 'c7259da6-7ce6-42ad-83c6-15238679f5fa', + 'hyperparameters_to_tune':['layers','loss_metric','optimizer','epochs','p','l_s','patience','min_delta','dropout','smoothing_perc'], + 'version': '0.0.1', + }, + ) + + def __init__(self, *, + hyperparams: Hyperparams, # + random_seed: int = 0, + docker_containers: Dict[str, DockerContainer] = None) -> None: + + super().__init__(hyperparams=hyperparams, random_seed=random_seed, docker_containers=docker_containers) + + self._clf = Detector(smoothing_perc=self.hyperparams['smoothing_perc'], + window_size=self.hyperparams['window_size_'], + error_buffer=self.hyperparams['error_buffer'], + batch_size = self.hyperparams['batch_size'], + validation_split = self.hyperparams['validation_split'], + optimizer = self.hyperparams['optimizer'], + lstm_batch_size = self.hyperparams['lstm_batch_size'], + loss_metric = self.hyperparams['loss_metric'], + layers = self.hyperparams['layers'], + epochs = self.hyperparams['epochs'], + patience = self.hyperparams['patience'], + min_delta = self.hyperparams['min_delta'], + l_s = self.hyperparams['l_s'], + n_predictions = self.hyperparams['n_predictions'], + p = self.hyperparams['p'], + contamination=hyperparams['contamination'] + ) + + def set_training_data(self, *, inputs: Inputs) -> None: + """ + Set training data for outlier detection. + Args: + inputs: Container DataFrame + + Returns: + None + """ + super().set_training_data(inputs=inputs) + + def fit(self, *, timeout: float = None, iterations: int = None) -> CallResult[None]: + """ + Fit model with training data. + Args: + *: Container DataFrame. Time series data up to fit. + + Returns: + None + """ + return super().fit() + + def produce(self, *, inputs: Inputs, timeout: float = None, iterations: int = None) -> CallResult[Outputs]: + """ + Process the testing data. + Args: + inputs: Container DataFrame. Time series data up to outlier detection. + + Returns: + Container DataFrame + 1 marks Outliers, 0 marks normal. + """ + return super().produce(inputs=inputs, timeout=timeout, iterations=iterations) + + + def produce_score(self, *, inputs: Inputs, timeout: float = None, iterations: int = None) -> CallResult[Outputs]: + """ + Process the testing data. + Args: + inputs: Container DataFrame. Time series data up to outlier detection. + Returns: + Container DataFrame + Outlier score of input DataFrame. + """ + return super().produce_score(inputs=inputs, timeout=timeout, iterations=iterations) + + + def get_params(self) -> Params: + """ + Return parameters. + Args: + None + + Returns: + class Params + """ + return super().get_params() + + def set_params(self, *, params: Params) -> None: + """ + Set parameters for outlier detection. + Args: + params: class Params + + Returns: + None + """ + super().set_params(params=params) class Detector(CollectiveBaseDetector): - """Class to Implement Deep Log LSTM based on "https://www.cs.utah.edu/~lifeifei/papers/deeplog.pdf - Only Parameter Value anomaly detection layer has been implemented for time series data""" - - def __init__(self,smoothing_perc=0.05,window_size = 10,error_buffer = 5,batch_size =30, \ - dropout = 0.3, validation_split=0.2,optimizer='adam',lstm_batch_size=64,loss_metric='mean_squared_error', \ - layers=[40,40],epochs = 1,patience =10,min_delta=0.0003,l_s=5,n_predictions=2,p = 0.05,contamination=0.1): - - # super(Detector, self).__init__(contamination=contamination) - super(Detector, self).__init__(contamination=contamination, - window_size=l_s, - step_size=1, - ) - - self._smoothin_perc = smoothing_perc - self._window_size =window_size - self._error_buffer = error_buffer - self._batch_size = batch_size - self._dropout = dropout - self._validation_split = validation_split - self._optimizer = optimizer - self._lstm_batch_size = lstm_batch_size - self._loss_metric = loss_metric - self._layers = layers - self._epochs = epochs - self._patience = patience - self._min_delta = min_delta - self._l_s = l_s - self._n_predictions = n_predictions - self._p = p - self.contamination = contamination - - # self.y_hat = None - self.results = [] - self.result_df = None - - self._model = None - self._channel = None - - - def fit(self,X,y=None): - """ - Fit data to LSTM model. - Args: - inputs : X , ndarray of size (number of sample,features) - - Returns: - return : self object with trained model - """ - X = check_array(X).astype(np.float) - self._set_n_classes(None) - - inputs = X - self._channel = Channel(n_predictions = self._n_predictions,l_s = self._l_s) - self._channel.shape_train_data(inputs) - - self._model = Model(self._channel,patience = self._patience, - min_delta =self._min_delta, - layers = self._layers, - dropout = self._dropout, - n_predictions = self._n_predictions, - loss_metric = self._loss_metric, - optimizer = self._optimizer, - lstm_batch_size = self._lstm_batch_size, - epochs = self._epochs, - validation_split = self._validation_split, - batch_size = self._batch_size, - l_s = self._l_s - ) - - self.decision_scores_, self.left_inds_, self.right_inds_ = self.decision_function(X) - self._process_decision_scores() - - return self - - - - def decision_function(self, X: np.array): - """Predict raw anomaly scores of X using the fitted detector. - - The anomaly score of an input sample is computed based on the fitted - detector. For consistency, outliers are assigned with - higher anomaly scores. - - Parameters - ---------- - X : numpy array of shape (n_samples, n_features) - The input samples. Sparse matrices are accepted only - if they are supported by the base estimator. - - Returns - ------- - anomaly_scores : numpy array of shape (n_samples,) - The anomaly score of the input samples. - """ - - X = check_array(X).astype(np.float) - self._set_n_classes(None) - - inputs = X - self._channel.shape_test_data(inputs) - self._channel = self._model.batch_predict(channel = self._channel) - - errors = Errors(channel = self._channel, - window_size = self._window_size, - batch_size = self._batch_size, - smoothing_perc = self._smoothin_perc, - n_predictions = self._n_predictions, - l_s = self._l_s, - error_buffer = self._error_buffer, - p = self._p - ) - - # prediciton smoothed error - prediction_errors = np.reshape(errors.e_s,(self._channel.X_test.shape[0],self._channel.X_test.shape[2])) - prediction_errors = np.sum(prediction_errors,axis=1) - - left_indices = [] - right_indices = [] - scores = [] - for i in range(len(prediction_errors)): - left_indices.append(i) - right_indices.append(i+self._l_s) - scores.append(prediction_errors[i]) - - - - return np.asarray(scores),np.asarray(left_indices),np.asarray(right_indices) + """Class to Implement Deep Log LSTM based on "https://www.cs.utah.edu/~lifeifei/papers/deeplog.pdf + Only Parameter Value anomaly detection layer has been implemented for time series data""" + + def __init__(self,smoothing_perc=0.05,window_size = 10,error_buffer = 5,batch_size =30, \ + dropout = 0.3, validation_split=0.2,optimizer='adam',lstm_batch_size=64,loss_metric='mean_squared_error', \ + layers=[40,40],epochs = 1,patience =10,min_delta=0.0003,l_s=5,n_predictions=2,p = 0.05,contamination=0.1): + + # super(Detector, self).__init__(contamination=contamination) + super(Detector, self).__init__(contamination=contamination, + window_size=l_s, + step_size=1, + ) + + self._smoothin_perc = smoothing_perc + self._window_size =window_size + self._error_buffer = error_buffer + self._batch_size = batch_size + self._dropout = dropout + self._validation_split = validation_split + self._optimizer = optimizer + self._lstm_batch_size = lstm_batch_size + self._loss_metric = loss_metric + self._layers = layers + self._epochs = epochs + self._patience = patience + self._min_delta = min_delta + self._l_s = l_s + self._n_predictions = n_predictions + self._p = p + self.contamination = contamination + + # self.y_hat = None + self.results = [] + self.result_df = None + + self._model = None + self._channel = None + + + def fit(self,X,y=None): + """ + Fit data to LSTM model. + Args: + inputs : X , ndarray of size (number of sample,features) + + Returns: + return : self object with trained model + """ + X = check_array(X).astype(np.float) + self._set_n_classes(None) + + inputs = X + self._channel = Channel(n_predictions = self._n_predictions,l_s = self._l_s) + self._channel.shape_train_data(inputs) + + self._model = Model(self._channel,patience = self._patience, + min_delta =self._min_delta, + layers = self._layers, + dropout = self._dropout, + n_predictions = self._n_predictions, + loss_metric = self._loss_metric, + optimizer = self._optimizer, + lstm_batch_size = self._lstm_batch_size, + epochs = self._epochs, + validation_split = self._validation_split, + batch_size = self._batch_size, + l_s = self._l_s + ) + + self.decision_scores_, self.left_inds_, self.right_inds_ = self.decision_function(X) + self._process_decision_scores() + + return self + + + + def decision_function(self, X: np.array): + """Predict raw anomaly scores of X using the fitted detector. + + The anomaly score of an input sample is computed based on the fitted + detector. For consistency, outliers are assigned with + higher anomaly scores. + + Parameters + ---------- + X : numpy array of shape (n_samples, n_features) + The input samples. Sparse matrices are accepted only + if they are supported by the base estimator. + + Returns + ------- + anomaly_scores : numpy array of shape (n_samples,) + The anomaly score of the input samples. + """ + + X = check_array(X).astype(np.float) + self._set_n_classes(None) + + inputs = X + self._channel.shape_test_data(inputs) + self._channel = self._model.batch_predict(channel = self._channel) + + errors = Errors(channel = self._channel, + window_size = self._window_size, + batch_size = self._batch_size, + smoothing_perc = self._smoothin_perc, + n_predictions = self._n_predictions, + l_s = self._l_s, + error_buffer = self._error_buffer, + p = self._p + ) + + # prediciton smoothed error + prediction_errors = np.reshape(errors.e_s,(self._channel.X_test.shape[0],self._channel.X_test.shape[2])) + prediction_errors = np.sum(prediction_errors,axis=1) + + left_indices = [] + right_indices = [] + scores = [] + for i in range(len(prediction_errors)): + left_indices.append(i) + right_indices.append(i+self._l_s) + scores.append(prediction_errors[i]) + + + + return np.asarray(scores),np.asarray(left_indices),np.asarray(right_indices)