Browse Source

Telemanom bug fix

Former-commit-id: 80a039f4af [formerly 2b10904e21] [formerly d199475837 [formerly abf566fb2e]] [formerly 579f58cb3c [formerly 0a82035a8b] [formerly ed2f8d26f4 [formerly c0149e753c]]] [formerly 0e629f3810 [formerly 9c2cd633e5] [formerly 5f1bde63fe [formerly 8c9bb9fcb6]] [formerly 5fdf09c8dd [formerly 4de51403ed] [formerly dae65491c8 [formerly 6f8fd9ecd8]]]] [formerly 702cc2b946 [formerly bf04eafd20] [formerly 6ef5319fa4 [formerly 0320cb0e74]] [formerly dcb4aadc7d [formerly bb677ae507] [formerly 6de5848312 [formerly ca24f25d3e]]] [formerly 88a268e735 [formerly 9ff2c5d359] [formerly 27020e8b88 [formerly c4853c1d3a]] [formerly b23207788f [formerly 3ae7d175bc] [formerly 1d3acb533d [formerly 1750cef14a]]]]] [formerly b41cfd8d44 [formerly ad6888f5a2] [formerly baf30c8839 [formerly ad26c3269b]] [formerly 59105acc97 [formerly b0fbe6f1c8] [formerly 132be9081b [formerly 7342e151ff]]] [formerly 3e19691e01 [formerly 3dd9a0ccc9] [formerly fdc1346092 [formerly df235dc430]] [formerly de38ccf375 [formerly 06315ef06d] [formerly 5cb8762ed4 [formerly dbef43a708]]]] [formerly 865e4a5e1e [formerly 1a1b8f1e49] [formerly 7989a2bb09 [formerly b3da6a3dda]] [formerly 14570cbd3b [formerly e1931c4f0b] [formerly 0e5cb77a65 [formerly 80b0901c58]]] [formerly dd57712a9b [formerly 6c893b8879] [formerly d0ff42618a [formerly 05fdbdfc4b]] [formerly 3d4d639084 [formerly e954c49652] [formerly 44f766c8f5 [formerly 40cb42fab4]]]]]]
Former-commit-id: 05f96e228f [formerly 90a15e00fb] [formerly 8b9b29140c [formerly ea59211621]] [formerly 8dc2ed02f3 [formerly 622c9208d3] [formerly f36abe8121 [formerly 8187c622e1]]] [formerly 3297553613 [formerly a5ce9d176d] [formerly 5b64df634d [formerly 6d62753eb4]] [formerly 79bfccc1e2 [formerly d336fd5eb4] [formerly 560e26844d [formerly a149082283]]]] [formerly 159f944bb8 [formerly 3f4c876f20] [formerly 31c09ce014 [formerly b6fe9d5057]] [formerly 49f867db3e [formerly 2ea06e9e98] [formerly 8c0c8a58d9 [formerly fdd5279b81]]] [formerly b7dae63ce2 [formerly cdc81a3ed2] [formerly 8ae2f4ab72 [formerly 9c607d8a3c]] [formerly 5ddd4d1d59 [formerly 47adb2e882] [formerly 44f766c8f5]]]]
Former-commit-id: 9a148b9a33 [formerly c86125559b] [formerly 460c436849 [formerly 8048072f59]] [formerly ae3f406786 [formerly 3263287d0e] [formerly aaba8fdc8b [formerly b92eaeb050]]] [formerly ab37dabae2 [formerly 6db1bba492] [formerly 5774aeee55 [formerly 3f7c686f32]] [formerly 4b5cf2cbff [formerly 43d31d42dd] [formerly 454f18f843 [formerly 13e9edaa68]]]]
Former-commit-id: 4cfe86a3e7 [formerly 93da5517f2] [formerly fb2b5d7c50 [formerly 9f485c401b]] [formerly f46d4595fd [formerly 871e0db010] [formerly d6d78caf25 [formerly fb1c088bd9]]]
Former-commit-id: 14ec35b9b5 [formerly d84d848cbc] [formerly 44b03f9d69 [formerly ebe0f75a48]]
Former-commit-id: 9337124fb8 [formerly 4fa80baf90]
Former-commit-id: 786e4d12e2
master
Purav Zumkhawala 4 years ago
parent
commit
a5baa5ed3b
1 changed files with 360 additions and 358 deletions
  1. +360
    -358
      tods/detection_algorithm/Telemanom.py

+ 360
- 358
tods/detection_algorithm/Telemanom.py View File

@@ -27,9 +27,9 @@ from d3m.primitive_interfaces.base import CallResult, DockerContainer


from d3m.primitive_interfaces.unsupervised_learning import UnsupervisedLearnerPrimitiveBase from d3m.primitive_interfaces.unsupervised_learning import UnsupervisedLearnerPrimitiveBase
from d3m.primitive_interfaces.transformer import TransformerPrimitiveBase from d3m.primitive_interfaces.transformer import TransformerPrimitiveBase
from .UODBasePrimitive import Params_ODBase, Hyperparams_ODBase, UnsupervisedOutlierDetectorBase
from detection_algorithm.UODBasePrimitive import Params_ODBase, Hyperparams_ODBase, UnsupervisedOutlierDetectorBase


from .core.CollectiveBase import CollectiveBaseDetector
from detection_algorithm.core.CollectiveBase import CollectiveBaseDetector


from sklearn.utils import check_array from sklearn.utils import check_array


@@ -39,9 +39,9 @@ from d3m import exceptions


# from detection_algorithm.UODBasePrimitive import Params_ODBase, Hyperparams_ODBase, UnsupervisedOutlierDetectorBase # from detection_algorithm.UODBasePrimitive import Params_ODBase, Hyperparams_ODBase, UnsupervisedOutlierDetectorBase


from .core.utils.errors import Errors
from .core.utils.channel import Channel
from .core.utils.modeling import Model
from detection_algorithm.core.utils.errors import Errors
from detection_algorithm.core.utils.channel import Channel
from detection_algorithm.core.utils.modeling import Model


# from pyod.models.base import BaseDetector # from pyod.models.base import BaseDetector


@@ -53,394 +53,396 @@ Inputs = container.DataFrame
Outputs = container.DataFrame Outputs = container.DataFrame


class Params(Params_ODBase): class Params(Params_ODBase):
######## Add more Attributes #######
######## Add more Attributes #######


pass
pass




class Hyperparams(Hyperparams_ODBase): class Hyperparams(Hyperparams_ODBase):


smoothing_perc = hyperparams.Hyperparameter[float](
default=0.05,
semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'],
description="determines window size used in EWMA smoothing (percentage of total values for channel)"
)


smoothing_perc = hyperparams.Hyperparameter[float](
default=0.05,
semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'],
description="determines window size used in EWMA smoothing (percentage of total values for channel)"
)


window_size_ = hyperparams.Hyperparameter[int](
default=100,
semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'],
description="number of trailing batches to use in error calculation"
)


error_buffer = hyperparams.Hyperparameter[int](
default=50,
semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'],
description="number of values surrounding an error that are brought into the sequence (promotes grouping on nearby sequences"
)

batch_size = hyperparams.Hyperparameter[int](
default=70,
semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'],
description="Batch size while predicting"
)
window_size_ = hyperparams.Hyperparameter[int](
default=100,
semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'],
description="number of trailing batches to use in error calculation"
)


error_buffer = hyperparams.Hyperparameter[int](
default=50,
semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'],
description="number of values surrounding an error that are brought into the sequence (promotes grouping on nearby sequences"
)

batch_size = hyperparams.Hyperparameter[int](
default=70,
semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'],
description="Batch size while predicting"
)


# LSTM Model Parameters
dropout = hyperparams.Hyperparameter[float](
default=0.3,
semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'],
description="Dropout rate"
)


validation_split = hyperparams.Hyperparameter[float](
default=0.2,
semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'],
description="Validation split"
)
# LSTM Model Parameters


optimizer = hyperparams.Hyperparameter[typing.Union[str, None]](
default='Adam',
semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'],
description="Optimizer"
)


dropout = hyperparams.Hyperparameter[float](
default=0.3,
semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'],
description="Dropout rate"
)


lstm_batch_size = hyperparams.Hyperparameter[int](
default=64,
semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'],
description="lstm model training batch size"
)
validation_split = hyperparams.Hyperparameter[float](
default=0.2,
semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'],
description="Validation split"
)


optimizer = hyperparams.Hyperparameter[typing.Union[str, None]](
default='Adam',
semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'],
description="Optimizer"
)


loss_metric = hyperparams.Hyperparameter[typing.Union[str, None]](
default='mean_squared_error',
semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'],
description="loss function"
)


lstm_batch_size = hyperparams.Hyperparameter[int](
default=64,
semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'],
description="lstm model training batch size"
)


layers = hyperparams.List(
elements=hyperparams.Hyperparameter[int](1),
default=[10,10],
semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'],
description="No of units for the 2 lstm layers"
)


# Training Parameters
loss_metric = hyperparams.Hyperparameter[typing.Union[str, None]](
default='mean_squared_error',
semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'],
description="loss function"
)


epochs = hyperparams.Hyperparameter[int](
default=1,
semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'],
description="Epoch"
)


patience = hyperparams.Hyperparameter[int](
default=10,
semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'],
description="Number of consequetive training iterations to allow without decreasing the val_loss by at least min_delta"
)
layers = hyperparams.List(
elements=hyperparams.Hyperparameter[int](1),
default=[10,10],
semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'],
description="No of units for the 2 lstm layers"
)


min_delta = hyperparams.Hyperparameter[float](
default=0.0003,
semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'],
description="Number of consequetive training iterations to allow without decreasing the val_loss by at least min_delta"
)
# Training Parameters


epochs = hyperparams.Hyperparameter[int](
default=1,
semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'],
description="Epoch"
)


l_s = hyperparams.Hyperparameter[int](
default=100,
semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'],
description="num previous timesteps provided to model to predict future values"
)
patience = hyperparams.Hyperparameter[int](
default=10,
semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'],
description="Number of consequetive training iterations to allow without decreasing the val_loss by at least min_delta"
)


n_predictions = hyperparams.Hyperparameter[int](
default=10,
semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'],
description="number of steps ahead to predict"
)
min_delta = hyperparams.Hyperparameter[float](
default=0.0003,
semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'],
description="Number of consequetive training iterations to allow without decreasing the val_loss by at least min_delta"
)




# Error thresholding parameters
# ==================================

p = hyperparams.Hyperparameter[float](
default=0.05,
semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'],
description="minimum percent decrease between max errors in anomalous sequences (used for pruning)"
)

# Contamination
l_s = hyperparams.Hyperparameter[int](
default=100,
semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'],
description="num previous timesteps provided to model to predict future values"
)


contamination = hyperparams.Uniform(
lower=0.,
upper=0.5,
default=0.1,
description='the amount of contamination of the data set, i.e.the proportion of outliers in the data set. Used when fitting to define the threshold on the decision function',
semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter']
)
n_predictions = hyperparams.Hyperparameter[int](
default=10,
semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'],
description="number of steps ahead to predict"
)


# Error thresholding parameters
# ==================================

p = hyperparams.Hyperparameter[float](
default=0.05,
semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'],
description="minimum percent decrease between max errors in anomalous sequences (used for pruning)"
)

# Contamination

contamination = hyperparams.Uniform(
lower=0.,
upper=0.5,
default=0.1,
description='the amount of contamination of the data set, i.e.the proportion of outliers in the data set. Used when fitting to define the threshold on the decision function',
semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter']
)






class TelemanomPrimitive(UnsupervisedOutlierDetectorBase[Inputs, Outputs, Params, Hyperparams]): class TelemanomPrimitive(UnsupervisedOutlierDetectorBase[Inputs, Outputs, Params, Hyperparams]):
"""
A primitive that uses telmanom for outlier detection

Parameters
----------


"""

__author__ = "Data Lab"
metadata = metadata_base.PrimitiveMetadata(
{
'__author__' : "DATA Lab at Texas A&M University",
'name': "Telemanom",
'python_path': 'd3m.primitives.tods.detection_algorithm.telemanom',
'source': {
'name': 'DATA Lab at Texas A&M University',
'contact': 'mailto:khlai037@tamu.edu',
'uris': [
'https://gitlab.com/lhenry15/tods.git',
'https://gitlab.com/lhenry15/tods/-/blob/purav/anomaly-primitives/anomaly_primitives/telemanom.py',
],
},
'algorithm_types': [
metadata_base.PrimitiveAlgorithmType.TELEMANOM,
],
'primitive_family': metadata_base.PrimitiveFamily.ANOMALY_DETECTION,
'id': 'c7259da6-7ce6-42ad-83c6-15238679f5fa',
'hyperparameters_to_tune':['layers','loss_metric','optimizer','epochs','p','l_s','patience','min_delta','dropout','smoothing_perc'],
'version': '0.0.1',
},
)
def __init__(self, *,
hyperparams: Hyperparams, #
random_seed: int = 0,
docker_containers: Dict[str, DockerContainer] = None) -> None:
super().__init__(hyperparams=hyperparams, random_seed=random_seed, docker_containers=docker_containers)
self._clf = Detector(smoothing_perc=self.hyperparams['smoothing_perc'],
window_size=self.hyperparams['window_size_'],
error_buffer=self.hyperparams['error_buffer'],
batch_size = self.hyperparams['batch_size'],
validation_split = self.hyperparams['validation_split'],
optimizer = self.hyperparams['optimizer'],
lstm_batch_size = self.hyperparams['lstm_batch_size'],
loss_metric = self.hyperparams['loss_metric'],
layers = self.hyperparams['layers'],
epochs = self.hyperparams['epochs'],
patience = self.hyperparams['patience'],
min_delta = self.hyperparams['min_delta'],
l_s = self.hyperparams['l_s'],
n_predictions = self.hyperparams['n_predictions'],
p = self.hyperparams['p'],
contamination=hyperparams['contamination']
)
def set_training_data(self, *, inputs: Inputs) -> None:
"""
Set training data for outlier detection.
Args:
inputs: Container DataFrame
Returns:
None
"""
super().set_training_data(inputs=inputs)
def fit(self, *, timeout: float = None, iterations: int = None) -> CallResult[None]:
"""
Fit model with training data.
Args:
*: Container DataFrame. Time series data up to fit.
Returns:
None
"""
return super().fit()
def produce(self, *, inputs: Inputs, timeout: float = None, iterations: int = None) -> CallResult[Outputs]:
"""
Process the testing data.
Args:
inputs: Container DataFrame. Time series data up to outlier detection.
Returns:
Container DataFrame
1 marks Outliers, 0 marks normal.
"""
return super().produce(inputs=inputs, timeout=timeout, iterations=iterations)
def produce_score(self, *, inputs: Inputs, timeout: float = None, iterations: int = None) -> CallResult[Outputs]:
"""
Process the testing data.
Args:
inputs: Container DataFrame. Time series data up to outlier detection.
Returns:
Container DataFrame
Outlier score of input DataFrame.
"""
return super().produce_score(inputs=inputs, timeout=timeout, iterations=iterations)
def get_params(self) -> Params:
"""
Return parameters.
Args:
None
Returns:
class Params
"""
return super().get_params()
def set_params(self, *, params: Params) -> None:
"""
Set parameters for outlier detection.
Args:
params: class Params
Returns:
None
"""
super().set_params(params=params)
"""
A primitive that uses telmanom for outlier detection
Parameters
----------
"""
__author__ = "Data Lab"
metadata = metadata_base.PrimitiveMetadata(
{
'__author__' : "DATA Lab at Texas A&M University",
'name': "Telemanom",
'python_path': 'd3m.primitives.tods.detection_algorithm.telemanom',
'source': {
'name': 'DATA Lab at Texas A&M University',
'contact': 'mailto:khlai037@tamu.edu',
'uris': [
'https://gitlab.com/lhenry15/tods.git',
'https://gitlab.com/lhenry15/tods/-/blob/purav/anomaly-primitives/anomaly_primitives/telemanom.py',
],
},
'algorithm_types': [
metadata_base.PrimitiveAlgorithmType.TELEMANOM,
],
'primitive_family': metadata_base.PrimitiveFamily.ANOMALY_DETECTION,
'id': 'c7259da6-7ce6-42ad-83c6-15238679f5fa',
'hyperparameters_to_tune':['layers','loss_metric','optimizer','epochs','p','l_s','patience','min_delta','dropout','smoothing_perc'],
'version': '0.0.1',
},
)
def __init__(self, *,
hyperparams: Hyperparams, #
random_seed: int = 0,
docker_containers: Dict[str, DockerContainer] = None) -> None:
super().__init__(hyperparams=hyperparams, random_seed=random_seed, docker_containers=docker_containers)
self._clf = Detector(smoothing_perc=self.hyperparams['smoothing_perc'],
window_size=self.hyperparams['window_size_'],
error_buffer=self.hyperparams['error_buffer'],
batch_size = self.hyperparams['batch_size'],
validation_split = self.hyperparams['validation_split'],
optimizer = self.hyperparams['optimizer'],
lstm_batch_size = self.hyperparams['lstm_batch_size'],
loss_metric = self.hyperparams['loss_metric'],
layers = self.hyperparams['layers'],
epochs = self.hyperparams['epochs'],
patience = self.hyperparams['patience'],
min_delta = self.hyperparams['min_delta'],
l_s = self.hyperparams['l_s'],
n_predictions = self.hyperparams['n_predictions'],
p = self.hyperparams['p'],
contamination=hyperparams['contamination']
)
def set_training_data(self, *, inputs: Inputs) -> None:
"""
Set training data for outlier detection.
Args:
inputs: Container DataFrame
Returns:
None
"""
super().set_training_data(inputs=inputs)
def fit(self, *, timeout: float = None, iterations: int = None) -> CallResult[None]:
"""
Fit model with training data.
Args:
*: Container DataFrame. Time series data up to fit.
Returns:
None
"""
return super().fit()
def produce(self, *, inputs: Inputs, timeout: float = None, iterations: int = None) -> CallResult[Outputs]:
"""
Process the testing data.
Args:
inputs: Container DataFrame. Time series data up to outlier detection.
Returns:
Container DataFrame
1 marks Outliers, 0 marks normal.
"""
return super().produce(inputs=inputs, timeout=timeout, iterations=iterations)
def produce_score(self, *, inputs: Inputs, timeout: float = None, iterations: int = None) -> CallResult[Outputs]:
"""
Process the testing data.
Args:
inputs: Container DataFrame. Time series data up to outlier detection.
Returns:
Container DataFrame
Outlier score of input DataFrame.
"""
return super().produce_score(inputs=inputs, timeout=timeout, iterations=iterations)
def get_params(self) -> Params:
"""
Return parameters.
Args:
None
Returns:
class Params
"""
return super().get_params()
def set_params(self, *, params: Params) -> None:
"""
Set parameters for outlier detection.
Args:
params: class Params
Returns:
None
"""
super().set_params(params=params)






class Detector(CollectiveBaseDetector): class Detector(CollectiveBaseDetector):
"""Class to Implement Deep Log LSTM based on "https://www.cs.utah.edu/~lifeifei/papers/deeplog.pdf
Only Parameter Value anomaly detection layer has been implemented for time series data"""
def __init__(self,smoothing_perc=0.05,window_size = 10,error_buffer = 5,batch_size =30, \
dropout = 0.3, validation_split=0.2,optimizer='adam',lstm_batch_size=64,loss_metric='mean_squared_error', \
layers=[40,40],epochs = 1,patience =10,min_delta=0.0003,l_s=5,n_predictions=2,p = 0.05,contamination=0.1):
# super(Detector, self).__init__(contamination=contamination)
super(Detector, self).__init__(contamination=contamination,
window_size=l_s,
step_size=1,
)
self._smoothin_perc = smoothing_perc
self._window_size =window_size
self._error_buffer = error_buffer
self._batch_size = batch_size
self._dropout = dropout
self._validation_split = validation_split
self._optimizer = optimizer
self._lstm_batch_size = lstm_batch_size
self._loss_metric = loss_metric
self._layers = layers
self._epochs = epochs
self._patience = patience
self._min_delta = min_delta
self._l_s = l_s
self._n_predictions = n_predictions
self._p = p
self.contamination = contamination
# self.y_hat = None
self.results = []
self.result_df = None
self._model = None
self._channel = None
def fit(self,X,y=None):
"""
Fit data to LSTM model.
Args:
inputs : X , ndarray of size (number of sample,features)
Returns:
return : self object with trained model
"""
X = check_array(X).astype(np.float)
self._set_n_classes(None)
inputs = X
self._channel = Channel(n_predictions = self._n_predictions,l_s = self._l_s)
self._channel.shape_train_data(inputs)
self._model = Model(self._channel,patience = self._patience,
min_delta =self._min_delta,
layers = self._layers,
dropout = self._dropout,
n_predictions = self._n_predictions,
loss_metric = self._loss_metric,
optimizer = self._optimizer,
lstm_batch_size = self._lstm_batch_size,
epochs = self._epochs,
validation_split = self._validation_split,
batch_size = self._batch_size,
l_s = self._l_s
)
self.decision_scores_, self.left_inds_, self.right_inds_ = self.decision_function(X)
self._process_decision_scores()
return self
def decision_function(self, X: np.array):
"""Predict raw anomaly scores of X using the fitted detector.
The anomaly score of an input sample is computed based on the fitted
detector. For consistency, outliers are assigned with
higher anomaly scores.
Parameters
----------
X : numpy array of shape (n_samples, n_features)
The input samples. Sparse matrices are accepted only
if they are supported by the base estimator.
Returns
-------
anomaly_scores : numpy array of shape (n_samples,)
The anomaly score of the input samples.
"""
X = check_array(X).astype(np.float)
self._set_n_classes(None)
inputs = X
self._channel.shape_test_data(inputs)
self._channel = self._model.batch_predict(channel = self._channel)
errors = Errors(channel = self._channel,
window_size = self._window_size,
batch_size = self._batch_size,
smoothing_perc = self._smoothin_perc,
n_predictions = self._n_predictions,
l_s = self._l_s,
error_buffer = self._error_buffer,
p = self._p
)
# prediciton smoothed error
prediction_errors = np.reshape(errors.e_s,(self._channel.X_test.shape[0],self._channel.X_test.shape[2]))
prediction_errors = np.sum(prediction_errors,axis=1)
left_indices = []
right_indices = []
scores = []
for i in range(len(prediction_errors)):
left_indices.append(i)
right_indices.append(i+self._l_s)
scores.append(prediction_errors[i])
return np.asarray(scores),np.asarray(left_indices),np.asarray(right_indices)
"""Class to Implement Deep Log LSTM based on "https://www.cs.utah.edu/~lifeifei/papers/deeplog.pdf
Only Parameter Value anomaly detection layer has been implemented for time series data"""
def __init__(self,smoothing_perc=0.05,window_size = 10,error_buffer = 5,batch_size =30, \
dropout = 0.3, validation_split=0.2,optimizer='adam',lstm_batch_size=64,loss_metric='mean_squared_error', \
layers=[40,40],epochs = 1,patience =10,min_delta=0.0003,l_s=5,n_predictions=2,p = 0.05,contamination=0.1):
# super(Detector, self).__init__(contamination=contamination)
super(Detector, self).__init__(contamination=contamination,
window_size=l_s,
step_size=1,
)
self._smoothin_perc = smoothing_perc
self._window_size =window_size
self._error_buffer = error_buffer
self._batch_size = batch_size
self._dropout = dropout
self._validation_split = validation_split
self._optimizer = optimizer
self._lstm_batch_size = lstm_batch_size
self._loss_metric = loss_metric
self._layers = layers
self._epochs = epochs
self._patience = patience
self._min_delta = min_delta
self._l_s = l_s
self._n_predictions = n_predictions
self._p = p
self.contamination = contamination
# self.y_hat = None
self.results = []
self.result_df = None
self._model = None
self._channel = None
def fit(self,X,y=None):
"""
Fit data to LSTM model.
Args:
inputs : X , ndarray of size (number of sample,features)
Returns:
return : self object with trained model
"""
X = check_array(X).astype(np.float)
self._set_n_classes(None)
inputs = X
self._channel = Channel(n_predictions = self._n_predictions,l_s = self._l_s)
self._channel.shape_train_data(inputs)
self._model = Model(self._channel,patience = self._patience,
min_delta =self._min_delta,
layers = self._layers,
dropout = self._dropout,
n_predictions = self._n_predictions,
loss_metric = self._loss_metric,
optimizer = self._optimizer,
lstm_batch_size = self._lstm_batch_size,
epochs = self._epochs,
validation_split = self._validation_split,
batch_size = self._batch_size,
l_s = self._l_s
)
self.decision_scores_, self.left_inds_, self.right_inds_ = self.decision_function(X)
self._process_decision_scores()
return self
def decision_function(self, X: np.array):
"""Predict raw anomaly scores of X using the fitted detector.
The anomaly score of an input sample is computed based on the fitted
detector. For consistency, outliers are assigned with
higher anomaly scores.
Parameters
----------
X : numpy array of shape (n_samples, n_features)
The input samples. Sparse matrices are accepted only
if they are supported by the base estimator.
Returns
-------
anomaly_scores : numpy array of shape (n_samples,)
The anomaly score of the input samples.
"""
X = check_array(X).astype(np.float)
self._set_n_classes(None)
inputs = X
self._channel.shape_test_data(inputs)
self._channel = self._model.batch_predict(channel = self._channel)
errors = Errors(channel = self._channel,
window_size = self._window_size,
batch_size = self._batch_size,
smoothing_perc = self._smoothin_perc,
n_predictions = self._n_predictions,
l_s = self._l_s,
error_buffer = self._error_buffer,
p = self._p
)
# prediciton smoothed error
prediction_errors = np.reshape(errors.e_s,(self._channel.X_test.shape[0],self._channel.X_test.shape[2]))
prediction_errors = np.sum(prediction_errors,axis=1)
left_indices = []
right_indices = []
scores = []
for i in range(len(prediction_errors)):
left_indices.append(i)
right_indices.append(i+self._l_s)
scores.append(prediction_errors[i])
return np.asarray(scores),np.asarray(left_indices),np.asarray(right_indices)








Loading…
Cancel
Save