Browse Source

Merge pull request #3 from datamllab/dev

Telemanom path fix

Former-commit-id: 20b70ed13d [formerly eb94b3e262] [formerly dd3b1aab02 [formerly 45d409d86e]] [formerly e008cd31a9 [formerly dd8af8fa23] [formerly 0133e55637 [formerly 66fb58431a]]] [formerly 596e3d7505 [formerly f295cba38a] [formerly 2879a51326 [formerly 0ee03cf7a7]] [formerly 8ba54bb104 [formerly 9fb321ab35] [formerly 3bc9237179 [formerly b6f20f61e6]]]] [formerly 61cb4a20cc [formerly 30ea8bf9d6] [formerly fad9cda3a3 [formerly 6906ed0dee]] [formerly 591a5f2d24 [formerly e6a18d7a53] [formerly a152af8d09 [formerly 231a325f3b]]] [formerly 7ca1a50855 [formerly 26ebeed754] [formerly 65464eb1a5 [formerly 4bbb9f1141]] [formerly 7f69ffc4e3 [formerly bb6d13d80c] [formerly adf5d11b6d [formerly 070617617b]]]]] [formerly 58814b658b [formerly c3fbc70f34] [formerly 7df11c5266 [formerly 2565f64bb7]] [formerly 9cc26e2bc6 [formerly 8b1bd11e79] [formerly 1ba40adfaa [formerly aa6faa0251]]] [formerly 4c1bf088b9 [formerly e1cf9b0bed] [formerly dbc369f442 [formerly fd01ad98a4]] [formerly 9f0fcb69b9 [formerly 8871623228] [formerly 6d6a914272 [formerly 0dafb869a4]]]] [formerly c5200651da [formerly e762060159] [formerly 1c38f67b7e [formerly 601a5c302b]] [formerly aad5eca664 [formerly 689a484841] [formerly 99ffc599cb [formerly 6d95e78804]]] [formerly dbdf591447 [formerly 1c73362dc1] [formerly ade3867650 [formerly 41b4c7e00c]] [formerly 3e8c709855 [formerly 06a13d000b] [formerly ff938c41a9 [formerly 0b98f3ee10]]]]]]
Former-commit-id: 4ab7a12f49 [formerly 57df4030f9] [formerly ed12d70850 [formerly ab20b1f946]] [formerly 7fec18f382 [formerly bf1196a351] [formerly 2c61b9144b [formerly 170a99b5b2]]] [formerly b81f040a3f [formerly c34522af69] [formerly e1f1c05604 [formerly 5818987775]] [formerly 387184fd4a [formerly 36f259fb96] [formerly 288129f875 [formerly d3448f1165]]]] [formerly ca5a9e4a6c [formerly 757f5f51fa] [formerly f46a6449fc [formerly 32160fd572]] [formerly 3ef7d5c67f [formerly feafb4edbb] [formerly 15850bd55a [formerly 604db6a339]]] [formerly 7dca5599a1 [formerly 878fdf1e7a] [formerly ea73ceb674 [formerly 759a2fbe1a]] [formerly cbe6691fb0 [formerly cf0c900528] [formerly ff938c41a9]]]]
Former-commit-id: e9488b219f [formerly b731502fab] [formerly 56850d5eb7 [formerly ed7f873fb8]] [formerly 37e6bbb4ff [formerly 3f5d991e48] [formerly 9cd430a9e9 [formerly ed227e0913]]] [formerly f399a474e4 [formerly e0d4426a94] [formerly d29fced500 [formerly 64adee78a0]] [formerly 1af8655ce4 [formerly f2b64f97ca] [formerly 467ceb05ff [formerly 55f618ee5a]]]]
Former-commit-id: 1f5bc56004 [formerly ed4a36c09d] [formerly 4c463b8089 [formerly 5878f7fbde]] [formerly f9f0d659f1 [formerly 72cbbaf932] [formerly b1957a0a5e [formerly 14531b03d4]]]
Former-commit-id: cd202d673d [formerly 38f3ec483e] [formerly a9b33d7847 [formerly f32ad22563]]
Former-commit-id: c7da2cd53f [formerly a80c19771f]
Former-commit-id: cf53b06053
master
Henry GitHub 4 years ago
parent
commit
7bf996aa88
1 changed files with 355 additions and 353 deletions
  1. +355
    -353
      tods/detection_algorithm/Telemanom.py

+ 355
- 353
tods/detection_algorithm/Telemanom.py View File

@@ -53,394 +53,396 @@ Inputs = container.DataFrame
Outputs = container.DataFrame Outputs = container.DataFrame


class Params(Params_ODBase): class Params(Params_ODBase):
######## Add more Attributes #######
######## Add more Attributes #######


pass
pass




class Hyperparams(Hyperparams_ODBase): class Hyperparams(Hyperparams_ODBase):


smoothing_perc = hyperparams.Hyperparameter[float](
default=0.05,
semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'],
description="determines window size used in EWMA smoothing (percentage of total values for channel)"
)


smoothing_perc = hyperparams.Hyperparameter[float](
default=0.05,
semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'],
description="determines window size used in EWMA smoothing (percentage of total values for channel)"
)


window_size_ = hyperparams.Hyperparameter[int](
default=100,
semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'],
description="number of trailing batches to use in error calculation"
)


error_buffer = hyperparams.Hyperparameter[int](
default=50,
semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'],
description="number of values surrounding an error that are brought into the sequence (promotes grouping on nearby sequences"
)

batch_size = hyperparams.Hyperparameter[int](
default=70,
semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'],
description="Batch size while predicting"
)
window_size_ = hyperparams.Hyperparameter[int](
default=100,
semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'],
description="number of trailing batches to use in error calculation"
)


error_buffer = hyperparams.Hyperparameter[int](
default=50,
semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'],
description="number of values surrounding an error that are brought into the sequence (promotes grouping on nearby sequences"
)

batch_size = hyperparams.Hyperparameter[int](
default=70,
semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'],
description="Batch size while predicting"
)


# LSTM Model Parameters
dropout = hyperparams.Hyperparameter[float](
default=0.3,
semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'],
description="Dropout rate"
)


validation_split = hyperparams.Hyperparameter[float](
default=0.2,
semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'],
description="Validation split"
)
# LSTM Model Parameters


optimizer = hyperparams.Hyperparameter[typing.Union[str, None]](
default='Adam',
semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'],
description="Optimizer"
)


dropout = hyperparams.Hyperparameter[float](
default=0.3,
semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'],
description="Dropout rate"
)


lstm_batch_size = hyperparams.Hyperparameter[int](
default=64,
semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'],
description="lstm model training batch size"
)
validation_split = hyperparams.Hyperparameter[float](
default=0.2,
semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'],
description="Validation split"
)


optimizer = hyperparams.Hyperparameter[typing.Union[str, None]](
default='Adam',
semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'],
description="Optimizer"
)


loss_metric = hyperparams.Hyperparameter[typing.Union[str, None]](
default='mean_squared_error',
semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'],
description="loss function"
)


lstm_batch_size = hyperparams.Hyperparameter[int](
default=64,
semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'],
description="lstm model training batch size"
)


layers = hyperparams.List(
elements=hyperparams.Hyperparameter[int](1),
default=[10,10],
semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'],
description="No of units for the 2 lstm layers"
)


# Training Parameters
loss_metric = hyperparams.Hyperparameter[typing.Union[str, None]](
default='mean_squared_error',
semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'],
description="loss function"
)


epochs = hyperparams.Hyperparameter[int](
default=1,
semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'],
description="Epoch"
)


patience = hyperparams.Hyperparameter[int](
default=10,
semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'],
description="Number of consequetive training iterations to allow without decreasing the val_loss by at least min_delta"
)
layers = hyperparams.List(
elements=hyperparams.Hyperparameter[int](1),
default=[10,10],
semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'],
description="No of units for the 2 lstm layers"
)


min_delta = hyperparams.Hyperparameter[float](
default=0.0003,
semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'],
description="Number of consequetive training iterations to allow without decreasing the val_loss by at least min_delta"
)
# Training Parameters


epochs = hyperparams.Hyperparameter[int](
default=1,
semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'],
description="Epoch"
)


l_s = hyperparams.Hyperparameter[int](
default=100,
semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'],
description="num previous timesteps provided to model to predict future values"
)
patience = hyperparams.Hyperparameter[int](
default=10,
semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'],
description="Number of consequetive training iterations to allow without decreasing the val_loss by at least min_delta"
)


n_predictions = hyperparams.Hyperparameter[int](
default=10,
semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'],
description="number of steps ahead to predict"
)
min_delta = hyperparams.Hyperparameter[float](
default=0.0003,
semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'],
description="Number of consequetive training iterations to allow without decreasing the val_loss by at least min_delta"
)




# Error thresholding parameters
# ==================================

p = hyperparams.Hyperparameter[float](
default=0.05,
semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'],
description="minimum percent decrease between max errors in anomalous sequences (used for pruning)"
)

# Contamination
l_s = hyperparams.Hyperparameter[int](
default=100,
semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'],
description="num previous timesteps provided to model to predict future values"
)


contamination = hyperparams.Uniform(
lower=0.,
upper=0.5,
default=0.1,
description='the amount of contamination of the data set, i.e.the proportion of outliers in the data set. Used when fitting to define the threshold on the decision function',
semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter']
)
n_predictions = hyperparams.Hyperparameter[int](
default=10,
semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'],
description="number of steps ahead to predict"
)


# Error thresholding parameters
# ==================================

p = hyperparams.Hyperparameter[float](
default=0.05,
semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'],
description="minimum percent decrease between max errors in anomalous sequences (used for pruning)"
)

# Contamination

contamination = hyperparams.Uniform(
lower=0.,
upper=0.5,
default=0.1,
description='the amount of contamination of the data set, i.e.the proportion of outliers in the data set. Used when fitting to define the threshold on the decision function',
semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter']
)






class TelemanomPrimitive(UnsupervisedOutlierDetectorBase[Inputs, Outputs, Params, Hyperparams]): class TelemanomPrimitive(UnsupervisedOutlierDetectorBase[Inputs, Outputs, Params, Hyperparams]):
"""
A primitive that uses telmanom for outlier detection

Parameters
----------


"""

__author__ = "Data Lab"
metadata = metadata_base.PrimitiveMetadata(
{
'__author__' : "DATA Lab at Texas A&M University",
'name': "Telemanom",
'python_path': 'd3m.primitives.tods.detection_algorithm.telemanom',
'source': {
'name': 'DATA Lab at Texas A&M University',
'contact': 'mailto:khlai037@tamu.edu',
'uris': [
'https://gitlab.com/lhenry15/tods.git',
'https://gitlab.com/lhenry15/tods/-/blob/purav/anomaly-primitives/anomaly_primitives/telemanom.py',
],
},
'algorithm_types': [
metadata_base.PrimitiveAlgorithmType.TELEMANOM,
],
'primitive_family': metadata_base.PrimitiveFamily.ANOMALY_DETECTION,
'id': 'c7259da6-7ce6-42ad-83c6-15238679f5fa',
'hyperparameters_to_tune':['layers','loss_metric','optimizer','epochs','p','l_s','patience','min_delta','dropout','smoothing_perc'],
'version': '0.0.1',
},
)
def __init__(self, *,
hyperparams: Hyperparams, #
random_seed: int = 0,
docker_containers: Dict[str, DockerContainer] = None) -> None:
super().__init__(hyperparams=hyperparams, random_seed=random_seed, docker_containers=docker_containers)
self._clf = Detector(smoothing_perc=self.hyperparams['smoothing_perc'],
window_size=self.hyperparams['window_size_'],
error_buffer=self.hyperparams['error_buffer'],
batch_size = self.hyperparams['batch_size'],
validation_split = self.hyperparams['validation_split'],
optimizer = self.hyperparams['optimizer'],
lstm_batch_size = self.hyperparams['lstm_batch_size'],
loss_metric = self.hyperparams['loss_metric'],
layers = self.hyperparams['layers'],
epochs = self.hyperparams['epochs'],
patience = self.hyperparams['patience'],
min_delta = self.hyperparams['min_delta'],
l_s = self.hyperparams['l_s'],
n_predictions = self.hyperparams['n_predictions'],
p = self.hyperparams['p'],
contamination=hyperparams['contamination']
)
def set_training_data(self, *, inputs: Inputs) -> None:
"""
Set training data for outlier detection.
Args:
inputs: Container DataFrame
Returns:
None
"""
super().set_training_data(inputs=inputs)
def fit(self, *, timeout: float = None, iterations: int = None) -> CallResult[None]:
"""
Fit model with training data.
Args:
*: Container DataFrame. Time series data up to fit.
Returns:
None
"""
return super().fit()
def produce(self, *, inputs: Inputs, timeout: float = None, iterations: int = None) -> CallResult[Outputs]:
"""
Process the testing data.
Args:
inputs: Container DataFrame. Time series data up to outlier detection.
Returns:
Container DataFrame
1 marks Outliers, 0 marks normal.
"""
return super().produce(inputs=inputs, timeout=timeout, iterations=iterations)
def produce_score(self, *, inputs: Inputs, timeout: float = None, iterations: int = None) -> CallResult[Outputs]:
"""
Process the testing data.
Args:
inputs: Container DataFrame. Time series data up to outlier detection.
Returns:
Container DataFrame
Outlier score of input DataFrame.
"""
return super().produce_score(inputs=inputs, timeout=timeout, iterations=iterations)
def get_params(self) -> Params:
"""
Return parameters.
Args:
None
Returns:
class Params
"""
return super().get_params()
def set_params(self, *, params: Params) -> None:
"""
Set parameters for outlier detection.
Args:
params: class Params
Returns:
None
"""
super().set_params(params=params)
"""
A primitive that uses telmanom for outlier detection
Parameters
----------
"""
__author__ = "Data Lab"
metadata = metadata_base.PrimitiveMetadata(
{
'__author__' : "DATA Lab at Texas A&M University",
'name': "Telemanom",
'python_path': 'd3m.primitives.tods.detection_algorithm.telemanom',
'source': {
'name': 'DATA Lab at Texas A&M University',
'contact': 'mailto:khlai037@tamu.edu',
'uris': [
'https://gitlab.com/lhenry15/tods.git',
'https://gitlab.com/lhenry15/tods/-/blob/purav/anomaly-primitives/anomaly_primitives/telemanom.py',
],
},
'algorithm_types': [
metadata_base.PrimitiveAlgorithmType.TELEMANOM,
],
'primitive_family': metadata_base.PrimitiveFamily.ANOMALY_DETECTION,
'id': 'c7259da6-7ce6-42ad-83c6-15238679f5fa',
'hyperparameters_to_tune':['layers','loss_metric','optimizer','epochs','p','l_s','patience','min_delta','dropout','smoothing_perc'],
'version': '0.0.1',
},
)
def __init__(self, *,
hyperparams: Hyperparams, #
random_seed: int = 0,
docker_containers: Dict[str, DockerContainer] = None) -> None:
super().__init__(hyperparams=hyperparams, random_seed=random_seed, docker_containers=docker_containers)
self._clf = Detector(smoothing_perc=self.hyperparams['smoothing_perc'],
window_size=self.hyperparams['window_size_'],
error_buffer=self.hyperparams['error_buffer'],
batch_size = self.hyperparams['batch_size'],
validation_split = self.hyperparams['validation_split'],
optimizer = self.hyperparams['optimizer'],
lstm_batch_size = self.hyperparams['lstm_batch_size'],
loss_metric = self.hyperparams['loss_metric'],
layers = self.hyperparams['layers'],
epochs = self.hyperparams['epochs'],
patience = self.hyperparams['patience'],
min_delta = self.hyperparams['min_delta'],
l_s = self.hyperparams['l_s'],
n_predictions = self.hyperparams['n_predictions'],
p = self.hyperparams['p'],
contamination=hyperparams['contamination']
)
def set_training_data(self, *, inputs: Inputs) -> None:
"""
Set training data for outlier detection.
Args:
inputs: Container DataFrame
Returns:
None
"""
super().set_training_data(inputs=inputs)
def fit(self, *, timeout: float = None, iterations: int = None) -> CallResult[None]:
"""
Fit model with training data.
Args:
*: Container DataFrame. Time series data up to fit.
Returns:
None
"""
return super().fit()
def produce(self, *, inputs: Inputs, timeout: float = None, iterations: int = None) -> CallResult[Outputs]:
"""
Process the testing data.
Args:
inputs: Container DataFrame. Time series data up to outlier detection.
Returns:
Container DataFrame
1 marks Outliers, 0 marks normal.
"""
return super().produce(inputs=inputs, timeout=timeout, iterations=iterations)
def produce_score(self, *, inputs: Inputs, timeout: float = None, iterations: int = None) -> CallResult[Outputs]:
"""
Process the testing data.
Args:
inputs: Container DataFrame. Time series data up to outlier detection.
Returns:
Container DataFrame
Outlier score of input DataFrame.
"""
return super().produce_score(inputs=inputs, timeout=timeout, iterations=iterations)
def get_params(self) -> Params:
"""
Return parameters.
Args:
None
Returns:
class Params
"""
return super().get_params()
def set_params(self, *, params: Params) -> None:
"""
Set parameters for outlier detection.
Args:
params: class Params
Returns:
None
"""
super().set_params(params=params)






class Detector(CollectiveBaseDetector): class Detector(CollectiveBaseDetector):
"""Class to Implement Deep Log LSTM based on "https://www.cs.utah.edu/~lifeifei/papers/deeplog.pdf
Only Parameter Value anomaly detection layer has been implemented for time series data"""
def __init__(self,smoothing_perc=0.05,window_size = 10,error_buffer = 5,batch_size =30, \
dropout = 0.3, validation_split=0.2,optimizer='adam',lstm_batch_size=64,loss_metric='mean_squared_error', \
layers=[40,40],epochs = 1,patience =10,min_delta=0.0003,l_s=5,n_predictions=2,p = 0.05,contamination=0.1):
# super(Detector, self).__init__(contamination=contamination)
super(Detector, self).__init__(contamination=contamination,
window_size=l_s,
step_size=1,
)
self._smoothin_perc = smoothing_perc
self._window_size =window_size
self._error_buffer = error_buffer
self._batch_size = batch_size
self._dropout = dropout
self._validation_split = validation_split
self._optimizer = optimizer
self._lstm_batch_size = lstm_batch_size
self._loss_metric = loss_metric
self._layers = layers
self._epochs = epochs
self._patience = patience
self._min_delta = min_delta
self._l_s = l_s
self._n_predictions = n_predictions
self._p = p
self.contamination = contamination
# self.y_hat = None
self.results = []
self.result_df = None
self._model = None
self._channel = None
def fit(self,X,y=None):
"""
Fit data to LSTM model.
Args:
inputs : X , ndarray of size (number of sample,features)
Returns:
return : self object with trained model
"""
X = check_array(X).astype(np.float)
self._set_n_classes(None)
inputs = X
self._channel = Channel(n_predictions = self._n_predictions,l_s = self._l_s)
self._channel.shape_train_data(inputs)
self._model = Model(self._channel,patience = self._patience,
min_delta =self._min_delta,
layers = self._layers,
dropout = self._dropout,
n_predictions = self._n_predictions,
loss_metric = self._loss_metric,
optimizer = self._optimizer,
lstm_batch_size = self._lstm_batch_size,
epochs = self._epochs,
validation_split = self._validation_split,
batch_size = self._batch_size,
l_s = self._l_s
)
self.decision_scores_, self.left_inds_, self.right_inds_ = self.decision_function(X)
self._process_decision_scores()
return self
def decision_function(self, X: np.array):
"""Predict raw anomaly scores of X using the fitted detector.
The anomaly score of an input sample is computed based on the fitted
detector. For consistency, outliers are assigned with
higher anomaly scores.
Parameters
----------
X : numpy array of shape (n_samples, n_features)
The input samples. Sparse matrices are accepted only
if they are supported by the base estimator.
Returns
-------
anomaly_scores : numpy array of shape (n_samples,)
The anomaly score of the input samples.
"""
X = check_array(X).astype(np.float)
self._set_n_classes(None)
inputs = X
self._channel.shape_test_data(inputs)
self._channel = self._model.batch_predict(channel = self._channel)
errors = Errors(channel = self._channel,
window_size = self._window_size,
batch_size = self._batch_size,
smoothing_perc = self._smoothin_perc,
n_predictions = self._n_predictions,
l_s = self._l_s,
error_buffer = self._error_buffer,
p = self._p
)
# prediciton smoothed error
prediction_errors = np.reshape(errors.e_s,(self._channel.X_test.shape[0],self._channel.X_test.shape[2]))
prediction_errors = np.sum(prediction_errors,axis=1)
left_indices = []
right_indices = []
scores = []
for i in range(len(prediction_errors)):
left_indices.append(i)
right_indices.append(i+self._l_s)
scores.append(prediction_errors[i])
return np.asarray(scores),np.asarray(left_indices),np.asarray(right_indices)
"""Class to Implement Deep Log LSTM based on "https://www.cs.utah.edu/~lifeifei/papers/deeplog.pdf
Only Parameter Value anomaly detection layer has been implemented for time series data"""
def __init__(self,smoothing_perc=0.05,window_size = 10,error_buffer = 5,batch_size =30, \
dropout = 0.3, validation_split=0.2,optimizer='adam',lstm_batch_size=64,loss_metric='mean_squared_error', \
layers=[40,40],epochs = 1,patience =10,min_delta=0.0003,l_s=5,n_predictions=2,p = 0.05,contamination=0.1):
# super(Detector, self).__init__(contamination=contamination)
super(Detector, self).__init__(contamination=contamination,
window_size=l_s,
step_size=1,
)
self._smoothin_perc = smoothing_perc
self._window_size =window_size
self._error_buffer = error_buffer
self._batch_size = batch_size
self._dropout = dropout
self._validation_split = validation_split
self._optimizer = optimizer
self._lstm_batch_size = lstm_batch_size
self._loss_metric = loss_metric
self._layers = layers
self._epochs = epochs
self._patience = patience
self._min_delta = min_delta
self._l_s = l_s
self._n_predictions = n_predictions
self._p = p
self.contamination = contamination
# self.y_hat = None
self.results = []
self.result_df = None
self._model = None
self._channel = None
def fit(self,X,y=None):
"""
Fit data to LSTM model.
Args:
inputs : X , ndarray of size (number of sample,features)
Returns:
return : self object with trained model
"""
X = check_array(X).astype(np.float)
self._set_n_classes(None)
inputs = X
self._channel = Channel(n_predictions = self._n_predictions,l_s = self._l_s)
self._channel.shape_train_data(inputs)
self._model = Model(self._channel,patience = self._patience,
min_delta =self._min_delta,
layers = self._layers,
dropout = self._dropout,
n_predictions = self._n_predictions,
loss_metric = self._loss_metric,
optimizer = self._optimizer,
lstm_batch_size = self._lstm_batch_size,
epochs = self._epochs,
validation_split = self._validation_split,
batch_size = self._batch_size,
l_s = self._l_s
)
self.decision_scores_, self.left_inds_, self.right_inds_ = self.decision_function(X)
self._process_decision_scores()
return self
def decision_function(self, X: np.array):
"""Predict raw anomaly scores of X using the fitted detector.
The anomaly score of an input sample is computed based on the fitted
detector. For consistency, outliers are assigned with
higher anomaly scores.
Parameters
----------
X : numpy array of shape (n_samples, n_features)
The input samples. Sparse matrices are accepted only
if they are supported by the base estimator.
Returns
-------
anomaly_scores : numpy array of shape (n_samples,)
The anomaly score of the input samples.
"""
X = check_array(X).astype(np.float)
self._set_n_classes(None)
inputs = X
self._channel.shape_test_data(inputs)
self._channel = self._model.batch_predict(channel = self._channel)
errors = Errors(channel = self._channel,
window_size = self._window_size,
batch_size = self._batch_size,
smoothing_perc = self._smoothin_perc,
n_predictions = self._n_predictions,
l_s = self._l_s,
error_buffer = self._error_buffer,
p = self._p
)
# prediciton smoothed error
prediction_errors = np.reshape(errors.e_s,(self._channel.X_test.shape[0],self._channel.X_test.shape[2]))
prediction_errors = np.sum(prediction_errors,axis=1)
left_indices = []
right_indices = []
scores = []
for i in range(len(prediction_errors)):
left_indices.append(i)
right_indices.append(i+self._l_s)
scores.append(prediction_errors[i])
return np.asarray(scores),np.asarray(left_indices),np.asarray(right_indices)








Loading…
Cancel
Save