From 9d5a8f950e15fa9263fd015b6f02b850bfc5d433 Mon Sep 17 00:00:00 2001 From: Purav Zumkhawala Date: Tue, 22 Sep 2020 01:57:30 -0500 Subject: [PATCH] Telemanom path fix Former-commit-id: c4185bd548abc4ef6be3453dd16244091f8a36de [formerly b4d81a674636d3f96a68e913922b43ffd0c0efe9] [formerly 84c3ff6dfae3d7ba92b33eae2988e383f18eaf5d [formerly dd1fd6449e05b95cc10d67ba5f777cefe0015667]] [formerly 86f9f4d628a2182ae68190d4239be153d601cb3c [formerly 75be2974ecadb54dd559948bae9f7b0e7ed329b9] [formerly 41d92f069877379d0f2da28d91fe946185fa40c6 [formerly bba7489f283c246fe34ff19e952585f7df73ba8f]]] [formerly cfbd5756f511b9af5aebbb91825af3e66353088f [formerly f23c1ff7e7c841fced8da1dcd6863a528fb5bb93] [formerly 1d2af5df06f83324eec2edad3d1c31dd3c7f24ac [formerly d61e97bf37128bbad48deee64b9fa1abdff2106b]] [formerly c432c5ab2769b8e22c29d967c3e72c8a465f861d [formerly c56362a6b4450510c26c667c0f552cacd6fecbaf] [formerly e120f6c4ad830d783e4cd018d62f728d1589b6c2 [formerly 0427062e569f82c44c54b4c0d77ca0130db8d3c7]]]] [formerly 8deb9ce581fc59c38ad35ea961dff6f926e551d5 [formerly f2e535bcf0fc40b378b8d5a5a79e7234b888eaaa] [formerly 6db57cf2c5d4f6f23141daa42d8438a824e7414a [formerly 93423625b6d043b3d44dc9520d927bdf7a58780c]] [formerly b74ef5f6ecb5ce98b0060967d1df5ba32f8807d1 [formerly 807d67d4543c6d9bf567f907a5d75813f102f5ed] [formerly 181d6cc7e00190ebb0e34c396a64adf7f9a042d1 [formerly 5aec61f138fcd18f0a99e5c8ddbda2722e0c14e6]]] [formerly b945790484b8327d2a2a0d90795cca6307b371df [formerly a2f6991688b2f3f11eb3fa6ae8894d48736749ca] [formerly a74bde9b187ef9823e005f6e264250a1536912b6 [formerly 3e9efaa04dc520fe1608c292b8672fdfa403e38a]] [formerly 500f042353a2430ecd027194d72114e3f11831ed [formerly 2dc081d7c9bb0f4bbc53745cec3244b4b8b59ac4] [formerly 5a6034daa3afb445c34ccc82e9453eb037c113c5 [formerly c5f51f5261ad7799e42cbe36a7775f05e6240a01]]]]] [formerly ee62111652ccd35a9588597da2b9306993706303 [formerly 441bd68afea3eb8f2c7d4723ccab7d46dc79bf79] [formerly ed0b2e35d51ea97a5b559abc75af6f31465c1e1b [formerly 69d888cf94f00f3d3e9e2e49e8a05b8e526f942a]] [formerly 5b678d628b519b70fd9846060fcf7a0ac3c9c1b0 [formerly c6f6b9fc1bf30a13b681b3b454691fef695b10b1] [formerly 0120702435b4ac2e128f26a1c7f1b17d17f8bf98 [formerly 6b80e82d1d94afb4dd3287db3a17732be3ed2f6e]]] [formerly 9212bf16e11726da95f03f1f40778fd04d95242e [formerly 7d2ab4abe57cc089cab82350ba3169ece114d5c3] [formerly a38d51664d6b8bbb6abc773be863bc59e5ef386e [formerly e7d406cdf2a96f117778c0481c23cec212c5a2a1]] [formerly d4c4a2bbb0eee9ef1745d4400c6b0aefe501c0a0 [formerly ab92abb308128e8593f02394884b1b52a5859cb2] [formerly 184210bdd5bfd52c612101b7e7ad13b686961f01 [formerly 63e1c9f6be2aa51e768c4daf8f82cb5b140c0ead]]]] [formerly 9061e0b24120be59fc622a08b779cab2a8d5dd23 [formerly 2ff80082892dcd86b99c623507e35cb3f9efcf18] [formerly 87c8530443d89c9e1ed6396fd7d1b86d8c37b740 [formerly d8070c6e6e794878a85474c11d6a3386694e5dfe]] [formerly 8bdb3fa9abe820fa0310497b896428cc1de6073b [formerly 2db6253b04c73281eae4143f02295ee2c4b56225] [formerly e18326e7a86a4e24a8b530a89082ab8140ce1f00 [formerly 81937db6775510bf1077d73a187bf7514fe996a6]]] [formerly df260834cc28a94393c65f83a23fda21e9175cf8 [formerly f86d3b38aee8f3e2eeb335b19bf9c7fadb25b5cf] [formerly 9618df29b24f20bd1c867e4895cd0e0bf9bf4644 [formerly 0504f0aeeeeaffad090f88427c75d78083ae3847]] [formerly 5846415da2250e0edf76ddacbf1c971080d13d42 [formerly fba99dd83bf5fa821dc4798dd4b0de697b065392] [formerly b95a93dcac09e4a3d4ff3da91e76ad0ab7c3e197 [formerly 34e04292bfcf02b429575d2e8970db610d4cc5ae]]]]]] Former-commit-id: ffa3f53d82181e378befaba98b8de3eafd52b4f7 [formerly 3c04f87bb78d9640d544507a4b349f2d64fb0bbb] [formerly a8896f20553f5216041d3f7e0ea85d228b5487e5 [formerly 2354f3a1a59c549b46d86121f7aae6fd8b2a6151]] [formerly 3b349bdb347c9516580c37fbf24a01441e0e5a06 [formerly 28ec2b5d22455d471c9d0e9c7ecc6da5fd2e28a5] [formerly 1d1590492304cf64e3d223bce44ef3181656e188 [formerly ddcb5850c3e7b9031faae2fa378763034900c988]]] [formerly 4e50e7511f6df9c50a5ac08a88fd72e05a266b5f [formerly 6640caac560e01e8fd8bcee2fb238d9b5173427f] [formerly 860eea51607304a325e8ba074e7b606c7420ce63 [formerly 99b9baf2df9398760ac3aba72a1cf55cbfbefc96]] [formerly 2187e44cfbc6d2e7f068037a4d595ce03da60feb [formerly d8d1e2c2aab53372368809b47dfd4702e54dbe5c] [formerly 387678173aa0676e334864f0c2a807550065d0eb [formerly b91ef24631b03b2ea5d95ba587997e2ea75015cb]]]] [formerly d4e4c60b041faff356898190f638565facded66c [formerly d9d067b89020eb2a5f66428588debc0137900c9c] [formerly 79f91bef59a1ec00bf9b6c241f24b5504f8ab635 [formerly 8537371dd671a8e864a89760f9d98e498145e912]] [formerly fd2b4a2b5bd27ce17a610a291761652e212fc8cc [formerly cb30a30243f79caed69de848c29a2e1421e02546] [formerly 521b76cfb84a04897df9573171cd61ecf30d7b89 [formerly e1ea17fcf86d45bc96135fd5fe5c7ff236ac5138]]] [formerly bec3b8a25a41d7b3752cb7a663589d4a285ef2de [formerly 26076698689e2f3f1ae148565682a4f8125d75ed] [formerly d78c0bdc3968a0ff324a49342261683209c5060c [formerly b7050a3980b854c3788d99d14ec89fff0560ce1d]] [formerly 249aba8cbbe791ca80d920f55cb66420f08ac087 [formerly 12d2a7e7b11d01694aadca612cc69d7d73e39dc9] [formerly b95a93dcac09e4a3d4ff3da91e76ad0ab7c3e197]]]] Former-commit-id: eb6334dce19ca57e252aa61144695ffe95875121 [formerly 954203cdfd891b6c12b3e066b0add97af5a780a6] [formerly a8ca1ca93dd6d91ea513af954b108ffd19e62efa [formerly fb5e0be1adcba66cfd4734408fc34d83fc600152]] [formerly e41e5b017c738e2a29cb65168b8899dd45d52ec3 [formerly b0c082c84c7a352e440c724876ae48c9425baf2c] [formerly 3fb7bf2ea0efc8fd9e9cbd43f09e77f042a3fdac [formerly d558f9cb5d327f0162955d5be3f5a3acf3d734a4]]] [formerly 5054a61a8ede986fce516c0b1b1b5a3e721ad4db [formerly 0f2460711ea0469c78e915870aaf85b974e0cebc] [formerly bc44e372b4b22bb834da2a6c495e5f9af824bb57 [formerly 69d2224d6233c9c81b51180a71789871a3fae2ca]] [formerly 7034187d3776f46064bfec03130f1b94556e4b08 [formerly 6cc12a31169b438c5e4ee9fbea8787952f1c3a54] [formerly e372fca91a9db8a3c90060d4b829eeaec2dd62e8 [formerly 6778a9825ab635c3253d67d1ed547c5c6355ad76]]]] Former-commit-id: 5fa52c837d396fc5e807a935822ca485deb4fd9d [formerly c5697f97e2031e2b266f8407c6023a78b17e3a44] [formerly d9b86f0ed07956479d7448e7c67d1d8c001f78a3 [formerly b83cba73b630a20e642c9759b77f491149241e60]] [formerly 4201c28cbd45b446b8d0e7f50d9c05ce2d0debb0 [formerly 7f5f7ec543c1e2ea9da303b8ab76f974fe08f4b0] [formerly 7fb46af7a0f2159a706e1136638dbc29592bc378 [formerly e8350408634227a960a8162e1dc57a0713b46836]]] Former-commit-id: ecb685237ca03b4584bc387565988abc78d7608c [formerly 6c2d794b1d7640d438417a2c741e3f031ed2b558] [formerly 9407340a7b53d1862ecb5eec0513abb2d55b6246 [formerly 29997699efc2d0d0824ad6c3e1627c53cf381b3c]] Former-commit-id: 44523ae25db6fb1b2a0a75c8a66fc90367397878 [formerly 45676d73cc2a6a89fa661dba4b29b8d5620d23f0] Former-commit-id: bcea16b63ef766d361c10c0ebc947b88b4f68df7 --- tods/detection_algorithm/Telemanom.py | 708 +++++++++++++++++----------------- 1 file changed, 355 insertions(+), 353 deletions(-) diff --git a/tods/detection_algorithm/Telemanom.py b/tods/detection_algorithm/Telemanom.py index 4ffa6ac..12e3aea 100644 --- a/tods/detection_algorithm/Telemanom.py +++ b/tods/detection_algorithm/Telemanom.py @@ -53,394 +53,396 @@ Inputs = container.DataFrame Outputs = container.DataFrame class Params(Params_ODBase): - ######## Add more Attributes ####### + ######## Add more Attributes ####### - pass + pass class Hyperparams(Hyperparams_ODBase): + + smoothing_perc = hyperparams.Hyperparameter[float]( + default=0.05, + semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'], + description="determines window size used in EWMA smoothing (percentage of total values for channel)" + ) - smoothing_perc = hyperparams.Hyperparameter[float]( - default=0.05, - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'], - description="determines window size used in EWMA smoothing (percentage of total values for channel)" - ) - - - window_size_ = hyperparams.Hyperparameter[int]( - default=100, - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="number of trailing batches to use in error calculation" - ) - error_buffer = hyperparams.Hyperparameter[int]( - default=50, - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="number of values surrounding an error that are brought into the sequence (promotes grouping on nearby sequences" - ) - - batch_size = hyperparams.Hyperparameter[int]( - default=70, - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="Batch size while predicting" - ) + window_size_ = hyperparams.Hyperparameter[int]( + default=100, + semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], + description="number of trailing batches to use in error calculation" + ) + error_buffer = hyperparams.Hyperparameter[int]( + default=50, + semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], + description="number of values surrounding an error that are brought into the sequence (promotes grouping on nearby sequences" + ) + + batch_size = hyperparams.Hyperparameter[int]( + default=70, + semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], + description="Batch size while predicting" + ) - # LSTM Model Parameters - dropout = hyperparams.Hyperparameter[float]( - default=0.3, - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'], - description="Dropout rate" - ) - validation_split = hyperparams.Hyperparameter[float]( - default=0.2, - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="Validation split" - ) + # LSTM Model Parameters - optimizer = hyperparams.Hyperparameter[typing.Union[str, None]]( - default='Adam', - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="Optimizer" - ) + dropout = hyperparams.Hyperparameter[float]( + default=0.3, + semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'], + description="Dropout rate" + ) - lstm_batch_size = hyperparams.Hyperparameter[int]( - default=64, - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="lstm model training batch size" - ) + validation_split = hyperparams.Hyperparameter[float]( + default=0.2, + semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], + description="Validation split" + ) + optimizer = hyperparams.Hyperparameter[typing.Union[str, None]]( + default='Adam', + semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], + description="Optimizer" + ) - loss_metric = hyperparams.Hyperparameter[typing.Union[str, None]]( - default='mean_squared_error', - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="loss function" - ) + lstm_batch_size = hyperparams.Hyperparameter[int]( + default=64, + semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], + description="lstm model training batch size" + ) - layers = hyperparams.List( - elements=hyperparams.Hyperparameter[int](1), - default=[10,10], - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'], - description="No of units for the 2 lstm layers" - ) - # Training Parameters + loss_metric = hyperparams.Hyperparameter[typing.Union[str, None]]( + default='mean_squared_error', + semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], + description="loss function" + ) - epochs = hyperparams.Hyperparameter[int]( - default=1, - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'], - description="Epoch" - ) - patience = hyperparams.Hyperparameter[int]( - default=10, - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="Number of consequetive training iterations to allow without decreasing the val_loss by at least min_delta" - ) + layers = hyperparams.List( + elements=hyperparams.Hyperparameter[int](1), + default=[10,10], + semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'], + description="No of units for the 2 lstm layers" + ) - min_delta = hyperparams.Hyperparameter[float]( - default=0.0003, - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'], - description="Number of consequetive training iterations to allow without decreasing the val_loss by at least min_delta" - ) + # Training Parameters + epochs = hyperparams.Hyperparameter[int]( + default=1, + semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'], + description="Epoch" + ) - l_s = hyperparams.Hyperparameter[int]( - default=100, - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'], - description="num previous timesteps provided to model to predict future values" - ) + patience = hyperparams.Hyperparameter[int]( + default=10, + semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], + description="Number of consequetive training iterations to allow without decreasing the val_loss by at least min_delta" + ) - n_predictions = hyperparams.Hyperparameter[int]( - default=10, - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="number of steps ahead to predict" - ) + min_delta = hyperparams.Hyperparameter[float]( + default=0.0003, + semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'], + description="Number of consequetive training iterations to allow without decreasing the val_loss by at least min_delta" + ) - # Error thresholding parameters - # ================================== - - p = hyperparams.Hyperparameter[float]( - default=0.05, - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'], - description="minimum percent decrease between max errors in anomalous sequences (used for pruning)" - ) - - # Contamination + l_s = hyperparams.Hyperparameter[int]( + default=100, + semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'], + description="num previous timesteps provided to model to predict future values" + ) - contamination = hyperparams.Uniform( - lower=0., - upper=0.5, - default=0.1, - description='the amount of contamination of the data set, i.e.the proportion of outliers in the data set. Used when fitting to define the threshold on the decision function', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] - ) + n_predictions = hyperparams.Hyperparameter[int]( + default=10, + semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], + description="number of steps ahead to predict" + ) + + + # Error thresholding parameters + # ================================== + + p = hyperparams.Hyperparameter[float]( + default=0.05, + semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'], + description="minimum percent decrease between max errors in anomalous sequences (used for pruning)" + ) + + # Contamination + + contamination = hyperparams.Uniform( + lower=0., + upper=0.5, + default=0.1, + description='the amount of contamination of the data set, i.e.the proportion of outliers in the data set. Used when fitting to define the threshold on the decision function', + semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] + ) class TelemanomPrimitive(UnsupervisedOutlierDetectorBase[Inputs, Outputs, Params, Hyperparams]): - """ - A primitive that uses telmanom for outlier detection - - Parameters - ---------- - - - """ - - __author__ = "Data Lab" - metadata = metadata_base.PrimitiveMetadata( - { - '__author__' : "DATA Lab at Texas A&M University", - 'name': "Telemanom", - 'python_path': 'd3m.primitives.tods.detection_algorithm.telemanom', - 'source': { - 'name': 'DATA Lab at Texas A&M University', - 'contact': 'mailto:khlai037@tamu.edu', - 'uris': [ - 'https://gitlab.com/lhenry15/tods.git', - 'https://gitlab.com/lhenry15/tods/-/blob/purav/anomaly-primitives/anomaly_primitives/telemanom.py', - ], - }, - 'algorithm_types': [ - metadata_base.PrimitiveAlgorithmType.TELEMANOM, - ], - 'primitive_family': metadata_base.PrimitiveFamily.ANOMALY_DETECTION, - 'id': 'c7259da6-7ce6-42ad-83c6-15238679f5fa', - 'hyperparameters_to_tune':['layers','loss_metric','optimizer','epochs','p','l_s','patience','min_delta','dropout','smoothing_perc'], - 'version': '0.0.1', - }, - ) - - def __init__(self, *, - hyperparams: Hyperparams, # - random_seed: int = 0, - docker_containers: Dict[str, DockerContainer] = None) -> None: - - super().__init__(hyperparams=hyperparams, random_seed=random_seed, docker_containers=docker_containers) - - self._clf = Detector(smoothing_perc=self.hyperparams['smoothing_perc'], - window_size=self.hyperparams['window_size_'], - error_buffer=self.hyperparams['error_buffer'], - batch_size = self.hyperparams['batch_size'], - validation_split = self.hyperparams['validation_split'], - optimizer = self.hyperparams['optimizer'], - lstm_batch_size = self.hyperparams['lstm_batch_size'], - loss_metric = self.hyperparams['loss_metric'], - layers = self.hyperparams['layers'], - epochs = self.hyperparams['epochs'], - patience = self.hyperparams['patience'], - min_delta = self.hyperparams['min_delta'], - l_s = self.hyperparams['l_s'], - n_predictions = self.hyperparams['n_predictions'], - p = self.hyperparams['p'], - contamination=hyperparams['contamination'] - ) - - def set_training_data(self, *, inputs: Inputs) -> None: - """ - Set training data for outlier detection. - Args: - inputs: Container DataFrame - - Returns: - None - """ - super().set_training_data(inputs=inputs) - - def fit(self, *, timeout: float = None, iterations: int = None) -> CallResult[None]: - """ - Fit model with training data. - Args: - *: Container DataFrame. Time series data up to fit. - - Returns: - None - """ - return super().fit() - - def produce(self, *, inputs: Inputs, timeout: float = None, iterations: int = None) -> CallResult[Outputs]: - """ - Process the testing data. - Args: - inputs: Container DataFrame. Time series data up to outlier detection. - - Returns: - Container DataFrame - 1 marks Outliers, 0 marks normal. - """ - return super().produce(inputs=inputs, timeout=timeout, iterations=iterations) - - - def produce_score(self, *, inputs: Inputs, timeout: float = None, iterations: int = None) -> CallResult[Outputs]: - """ - Process the testing data. - Args: - inputs: Container DataFrame. Time series data up to outlier detection. - Returns: - Container DataFrame - Outlier score of input DataFrame. - """ - return super().produce_score(inputs=inputs, timeout=timeout, iterations=iterations) - - - def get_params(self) -> Params: - """ - Return parameters. - Args: - None - - Returns: - class Params - """ - return super().get_params() - - def set_params(self, *, params: Params) -> None: - """ - Set parameters for outlier detection. - Args: - params: class Params - - Returns: - None - """ - super().set_params(params=params) + """ + A primitive that uses telmanom for outlier detection + + Parameters + ---------- + + + """ + + __author__ = "Data Lab" + metadata = metadata_base.PrimitiveMetadata( + { + '__author__' : "DATA Lab at Texas A&M University", + 'name': "Telemanom", + 'python_path': 'd3m.primitives.tods.detection_algorithm.telemanom', + 'source': { + 'name': 'DATA Lab at Texas A&M University', + 'contact': 'mailto:khlai037@tamu.edu', + 'uris': [ + 'https://gitlab.com/lhenry15/tods.git', + 'https://gitlab.com/lhenry15/tods/-/blob/purav/anomaly-primitives/anomaly_primitives/telemanom.py', + ], + }, + 'algorithm_types': [ + metadata_base.PrimitiveAlgorithmType.TELEMANOM, + ], + 'primitive_family': metadata_base.PrimitiveFamily.ANOMALY_DETECTION, + 'id': 'c7259da6-7ce6-42ad-83c6-15238679f5fa', + 'hyperparameters_to_tune':['layers','loss_metric','optimizer','epochs','p','l_s','patience','min_delta','dropout','smoothing_perc'], + 'version': '0.0.1', + }, + ) + + def __init__(self, *, + hyperparams: Hyperparams, # + random_seed: int = 0, + docker_containers: Dict[str, DockerContainer] = None) -> None: + + super().__init__(hyperparams=hyperparams, random_seed=random_seed, docker_containers=docker_containers) + + self._clf = Detector(smoothing_perc=self.hyperparams['smoothing_perc'], + window_size=self.hyperparams['window_size_'], + error_buffer=self.hyperparams['error_buffer'], + batch_size = self.hyperparams['batch_size'], + validation_split = self.hyperparams['validation_split'], + optimizer = self.hyperparams['optimizer'], + lstm_batch_size = self.hyperparams['lstm_batch_size'], + loss_metric = self.hyperparams['loss_metric'], + layers = self.hyperparams['layers'], + epochs = self.hyperparams['epochs'], + patience = self.hyperparams['patience'], + min_delta = self.hyperparams['min_delta'], + l_s = self.hyperparams['l_s'], + n_predictions = self.hyperparams['n_predictions'], + p = self.hyperparams['p'], + contamination=hyperparams['contamination'] + ) + + def set_training_data(self, *, inputs: Inputs) -> None: + """ + Set training data for outlier detection. + Args: + inputs: Container DataFrame + + Returns: + None + """ + super().set_training_data(inputs=inputs) + + def fit(self, *, timeout: float = None, iterations: int = None) -> CallResult[None]: + """ + Fit model with training data. + Args: + *: Container DataFrame. Time series data up to fit. + + Returns: + None + """ + return super().fit() + + def produce(self, *, inputs: Inputs, timeout: float = None, iterations: int = None) -> CallResult[Outputs]: + """ + Process the testing data. + Args: + inputs: Container DataFrame. Time series data up to outlier detection. + + Returns: + Container DataFrame + 1 marks Outliers, 0 marks normal. + """ + return super().produce(inputs=inputs, timeout=timeout, iterations=iterations) + + + def produce_score(self, *, inputs: Inputs, timeout: float = None, iterations: int = None) -> CallResult[Outputs]: + """ + Process the testing data. + Args: + inputs: Container DataFrame. Time series data up to outlier detection. + Returns: + Container DataFrame + Outlier score of input DataFrame. + """ + return super().produce_score(inputs=inputs, timeout=timeout, iterations=iterations) + + + def get_params(self) -> Params: + """ + Return parameters. + Args: + None + + Returns: + class Params + """ + return super().get_params() + + def set_params(self, *, params: Params) -> None: + """ + Set parameters for outlier detection. + Args: + params: class Params + + Returns: + None + """ + super().set_params(params=params) class Detector(CollectiveBaseDetector): - """Class to Implement Deep Log LSTM based on "https://www.cs.utah.edu/~lifeifei/papers/deeplog.pdf - Only Parameter Value anomaly detection layer has been implemented for time series data""" - - def __init__(self,smoothing_perc=0.05,window_size = 10,error_buffer = 5,batch_size =30, \ - dropout = 0.3, validation_split=0.2,optimizer='adam',lstm_batch_size=64,loss_metric='mean_squared_error', \ - layers=[40,40],epochs = 1,patience =10,min_delta=0.0003,l_s=5,n_predictions=2,p = 0.05,contamination=0.1): - - # super(Detector, self).__init__(contamination=contamination) - super(Detector, self).__init__(contamination=contamination, - window_size=l_s, - step_size=1, - ) - - self._smoothin_perc = smoothing_perc - self._window_size =window_size - self._error_buffer = error_buffer - self._batch_size = batch_size - self._dropout = dropout - self._validation_split = validation_split - self._optimizer = optimizer - self._lstm_batch_size = lstm_batch_size - self._loss_metric = loss_metric - self._layers = layers - self._epochs = epochs - self._patience = patience - self._min_delta = min_delta - self._l_s = l_s - self._n_predictions = n_predictions - self._p = p - self.contamination = contamination - - # self.y_hat = None - self.results = [] - self.result_df = None - - self._model = None - self._channel = None - - - def fit(self,X,y=None): - """ - Fit data to LSTM model. - Args: - inputs : X , ndarray of size (number of sample,features) - - Returns: - return : self object with trained model - """ - X = check_array(X).astype(np.float) - self._set_n_classes(None) - - inputs = X - self._channel = Channel(n_predictions = self._n_predictions,l_s = self._l_s) - self._channel.shape_train_data(inputs) - - self._model = Model(self._channel,patience = self._patience, - min_delta =self._min_delta, - layers = self._layers, - dropout = self._dropout, - n_predictions = self._n_predictions, - loss_metric = self._loss_metric, - optimizer = self._optimizer, - lstm_batch_size = self._lstm_batch_size, - epochs = self._epochs, - validation_split = self._validation_split, - batch_size = self._batch_size, - l_s = self._l_s - ) - - self.decision_scores_, self.left_inds_, self.right_inds_ = self.decision_function(X) - self._process_decision_scores() - - return self - - - - def decision_function(self, X: np.array): - """Predict raw anomaly scores of X using the fitted detector. - - The anomaly score of an input sample is computed based on the fitted - detector. For consistency, outliers are assigned with - higher anomaly scores. - - Parameters - ---------- - X : numpy array of shape (n_samples, n_features) - The input samples. Sparse matrices are accepted only - if they are supported by the base estimator. - - Returns - ------- - anomaly_scores : numpy array of shape (n_samples,) - The anomaly score of the input samples. - """ - - X = check_array(X).astype(np.float) - self._set_n_classes(None) - - inputs = X - self._channel.shape_test_data(inputs) - self._channel = self._model.batch_predict(channel = self._channel) - - errors = Errors(channel = self._channel, - window_size = self._window_size, - batch_size = self._batch_size, - smoothing_perc = self._smoothin_perc, - n_predictions = self._n_predictions, - l_s = self._l_s, - error_buffer = self._error_buffer, - p = self._p - ) - - # prediciton smoothed error - prediction_errors = np.reshape(errors.e_s,(self._channel.X_test.shape[0],self._channel.X_test.shape[2])) - prediction_errors = np.sum(prediction_errors,axis=1) - - left_indices = [] - right_indices = [] - scores = [] - for i in range(len(prediction_errors)): - left_indices.append(i) - right_indices.append(i+self._l_s) - scores.append(prediction_errors[i]) - - - - return np.asarray(scores),np.asarray(left_indices),np.asarray(right_indices) + """Class to Implement Deep Log LSTM based on "https://www.cs.utah.edu/~lifeifei/papers/deeplog.pdf + Only Parameter Value anomaly detection layer has been implemented for time series data""" + + def __init__(self,smoothing_perc=0.05,window_size = 10,error_buffer = 5,batch_size =30, \ + dropout = 0.3, validation_split=0.2,optimizer='adam',lstm_batch_size=64,loss_metric='mean_squared_error', \ + layers=[40,40],epochs = 1,patience =10,min_delta=0.0003,l_s=5,n_predictions=2,p = 0.05,contamination=0.1): + + # super(Detector, self).__init__(contamination=contamination) + super(Detector, self).__init__(contamination=contamination, + window_size=l_s, + step_size=1, + ) + + self._smoothin_perc = smoothing_perc + self._window_size =window_size + self._error_buffer = error_buffer + self._batch_size = batch_size + self._dropout = dropout + self._validation_split = validation_split + self._optimizer = optimizer + self._lstm_batch_size = lstm_batch_size + self._loss_metric = loss_metric + self._layers = layers + self._epochs = epochs + self._patience = patience + self._min_delta = min_delta + self._l_s = l_s + self._n_predictions = n_predictions + self._p = p + self.contamination = contamination + + # self.y_hat = None + self.results = [] + self.result_df = None + + self._model = None + self._channel = None + + + def fit(self,X,y=None): + """ + Fit data to LSTM model. + Args: + inputs : X , ndarray of size (number of sample,features) + + Returns: + return : self object with trained model + """ + X = check_array(X).astype(np.float) + self._set_n_classes(None) + + inputs = X + self._channel = Channel(n_predictions = self._n_predictions,l_s = self._l_s) + self._channel.shape_train_data(inputs) + + self._model = Model(self._channel,patience = self._patience, + min_delta =self._min_delta, + layers = self._layers, + dropout = self._dropout, + n_predictions = self._n_predictions, + loss_metric = self._loss_metric, + optimizer = self._optimizer, + lstm_batch_size = self._lstm_batch_size, + epochs = self._epochs, + validation_split = self._validation_split, + batch_size = self._batch_size, + l_s = self._l_s + ) + + self.decision_scores_, self.left_inds_, self.right_inds_ = self.decision_function(X) + self._process_decision_scores() + + return self + + + + def decision_function(self, X: np.array): + """Predict raw anomaly scores of X using the fitted detector. + + The anomaly score of an input sample is computed based on the fitted + detector. For consistency, outliers are assigned with + higher anomaly scores. + + Parameters + ---------- + X : numpy array of shape (n_samples, n_features) + The input samples. Sparse matrices are accepted only + if they are supported by the base estimator. + + Returns + ------- + anomaly_scores : numpy array of shape (n_samples,) + The anomaly score of the input samples. + """ + + X = check_array(X).astype(np.float) + self._set_n_classes(None) + + inputs = X + self._channel.shape_test_data(inputs) + self._channel = self._model.batch_predict(channel = self._channel) + + errors = Errors(channel = self._channel, + window_size = self._window_size, + batch_size = self._batch_size, + smoothing_perc = self._smoothin_perc, + n_predictions = self._n_predictions, + l_s = self._l_s, + error_buffer = self._error_buffer, + p = self._p + ) + + # prediciton smoothed error + prediction_errors = np.reshape(errors.e_s,(self._channel.X_test.shape[0],self._channel.X_test.shape[2])) + prediction_errors = np.sum(prediction_errors,axis=1) + + left_indices = [] + right_indices = [] + scores = [] + for i in range(len(prediction_errors)): + left_indices.append(i) + right_indices.append(i+self._l_s) + scores.append(prediction_errors[i]) + + + + return np.asarray(scores),np.asarray(left_indices),np.asarray(right_indices)