From 0cb2f051719cd39ecc0560640ee172d39d50c554 Mon Sep 17 00:00:00 2001 From: lhenry15 Date: Tue, 1 Jun 2021 11:28:54 -0500 Subject: [PATCH] modify default window size to 10 and fix autoregression and LSTM --- examples/sk_examples/LSTMOD_test.py | 52 ++++++++++++++++++++++++ examples/sk_examples/Telemanom_test.py | 2 +- tods/detection_algorithm/LSTMODetect.py | 8 ++-- tods/detection_algorithm/UODBasePrimitive.py | 2 +- tods/detection_algorithm/core/AutoRegOD.py | 27 +++++++++++++ tods/detection_algorithm/core/LSTMOD.py | 54 +++++++++++++++++++++++-- tods/detection_algorithm/core/MultiAutoRegOD.py | 27 +++++++++++++ 7 files changed, 163 insertions(+), 9 deletions(-) create mode 100644 examples/sk_examples/LSTMOD_test.py diff --git a/examples/sk_examples/LSTMOD_test.py b/examples/sk_examples/LSTMOD_test.py new file mode 100644 index 0000000..51bf1e7 --- /dev/null +++ b/examples/sk_examples/LSTMOD_test.py @@ -0,0 +1,52 @@ +import numpy as np +from tods.sk_interface.detection_algorithm.LSTMODetector_skinterface import LSTMODetectorSKI +from sklearn.metrics import precision_recall_curve +from sklearn.metrics import accuracy_score +from sklearn.metrics import confusion_matrix +from sklearn.metrics import classification_report +import matplotlib.pyplot as plt +from sklearn import metrics + +#prepare the data +data = np.loadtxt("./500_UCR_Anomaly_robotDOG1_10000_19280_19360.txt") + +X_train = np.expand_dims(data[:10000], axis=1) +X_test = np.expand_dims(data[10000:], axis=1) + +transformer = LSTMODetectorSKI() +transformer.fit(X_train) + +prediction_labels_train = transformer.predict(X_train) + +prediction_labels = transformer.predict(X_test) +prediction_score = transformer.predict_score(X_test) + +print("Prediction Labels\n", prediction_labels) +print("Prediction Score\n", prediction_score) + +# y_true = prediction_labels_train[:1000] +# y_pred = prediction_labels[:1000] +y_true = prediction_labels_train +y_pred = prediction_labels + +print('Accuracy Score: ', accuracy_score(y_true, y_pred)) + +confusion_matrix(y_true, y_pred) + +print(classification_report(y_true, y_pred)) + +precision, recall, thresholds = precision_recall_curve(y_true, y_pred) +f1_scores = 2*recall*precision/(recall+precision) + +print('Best threshold: ', thresholds[np.argmax(f1_scores)]) +print('Best F1-Score: ', np.max(f1_scores)) + +fpr, tpr, threshold = metrics.roc_curve(y_true, y_pred) +roc_auc = metrics.auc(fpr, tpr) + +plt.title('ROC') +plt.plot(fpr, tpr, 'b', label = 'AUC = %0.2f' % roc_auc) +plt.legend(loc = 'lower right') +plt.ylabel('True Positive Rate') +plt.xlabel('False Positive Rate') +plt.show() diff --git a/examples/sk_examples/Telemanom_test.py b/examples/sk_examples/Telemanom_test.py index d5f1ae0..261ae8a 100644 --- a/examples/sk_examples/Telemanom_test.py +++ b/examples/sk_examples/Telemanom_test.py @@ -1,5 +1,5 @@ import numpy as np -from tods.tods_skinterface.primitiveSKI.detection_algorithm.Telemanom_skinterface import TelemanomSKI +from tods.sk_interface.detection_algorithm.Telemanom_skinterface import TelemanomSKI from sklearn.metrics import precision_recall_curve from sklearn.metrics import accuracy_score from sklearn.metrics import confusion_matrix diff --git a/tods/detection_algorithm/LSTMODetect.py b/tods/detection_algorithm/LSTMODetect.py index 37d2edb..2bc3c27 100755 --- a/tods/detection_algorithm/LSTMODetect.py +++ b/tods/detection_algorithm/LSTMODetect.py @@ -68,7 +68,7 @@ class Hyperparams(Hyperparams_ODBase): ) min_attack_time = hyperparams.Hyperparameter[int]( - default=5, + default=10, description='The minimum amount of recent time steps that is used to define a collective attack.', semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] ) @@ -97,7 +97,7 @@ class Hyperparams(Hyperparams_ODBase): ) epochs = hyperparams.Hyperparameter[int]( - default=10, + default=50, description='Number of epochs to train the model.', semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] ) @@ -123,13 +123,13 @@ class Hyperparams(Hyperparams_ODBase): ) hidden_dim = hyperparams.Hyperparameter[int]( - default=16, + default=8, description='Hidden dim of LSTM.', semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] ) n_hidden_layer = hyperparams.Hyperparameter[int]( - default=0, + default=2, description='Hidden layer number of LSTM.', semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] ) diff --git a/tods/detection_algorithm/UODBasePrimitive.py b/tods/detection_algorithm/UODBasePrimitive.py index eee177d..8ca2463 100755 --- a/tods/detection_algorithm/UODBasePrimitive.py +++ b/tods/detection_algorithm/UODBasePrimitive.py @@ -83,7 +83,7 @@ class Hyperparams_ODBase(hyperparams.Hyperparams): ) window_size = hyperparams.Hyperparameter[int]( - default=1, + default=10, description='The moving window size.', semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] ) diff --git a/tods/detection_algorithm/core/AutoRegOD.py b/tods/detection_algorithm/core/AutoRegOD.py index bffd7f4..a2286dc 100644 --- a/tods/detection_algorithm/core/AutoRegOD.py +++ b/tods/detection_algorithm/core/AutoRegOD.py @@ -103,6 +103,33 @@ class AutoRegOD(CollectiveBaseDetector): self._process_decision_scores() return self + def predict(self, X): # pragma: no cover + """Predict if a particular sample is an outlier or not. + + Parameters + ---------- + X : numpy array of shape (n_samples, n_features) + The input samples. + + Returns + ------- + outlier_labels : numpy array of shape (n_samples,) + For each observation, tells whether or not + it should be considered as an outlier according to the + fitted model. 0 stands for inliers and 1 for outliers. + """ + + check_is_fitted(self, ['decision_scores_', 'threshold_', 'labels_']) + + pred_score, X_left_inds, X_right_inds = self.decision_function(X) + + pred_score = np.concatenate((np.zeros((self.window_size,)), pred_score)) + X_left_inds = np.concatenate((np.zeros((self.window_size,)), X_left_inds)) + X_right_inds = np.concatenate((np.zeros((self.window_size,)), X_right_inds)) + + return (pred_score > self.threshold_).astype( + 'int').ravel(), X_left_inds.ravel(), X_right_inds.ravel() + def decision_function(self, X: np.array): """Predict raw anomaly scores of X using the fitted detector. diff --git a/tods/detection_algorithm/core/LSTMOD.py b/tods/detection_algorithm/core/LSTMOD.py index d66478c..8edb792 100755 --- a/tods/detection_algorithm/core/LSTMOD.py +++ b/tods/detection_algorithm/core/LSTMOD.py @@ -33,7 +33,7 @@ class LSTMOutlierDetector(CollectiveBaseDetector): ): super(LSTMOutlierDetector, self).__init__(contamination=contamination, - window_size=min_attack_time, + # window_size=min_attack_time, step_size=1, ) @@ -54,14 +54,34 @@ class LSTMOutlierDetector(CollectiveBaseDetector): self.activation = activation + # def _build_model(self): + # print('dim:', self.hidden_dim, self.feature_dim) + # model_ = Sequential() + # model_.add(LSTM(units=self.hidden_dim, input_shape=(self.feature_dim, 1), + # dropout=self.dropout_rate, activation=self.activation, return_sequences=True)) + + # for layer_idx in range(self.n_hidden_layer-1): + # model_.add(LSTM(units=self.hidden_dim, input_shape=(self.hidden_dim, 1), + # dropout=self.dropout_rate, activation=self.activation, return_sequences=True)) + + # model_.add(LSTM(units=self.hidden_dim, input_shape=(self.hidden_dim, 1), + # dropout=self.dropout_rate, activation=self.activation)) + + # model_.add(Dense(units=self.feature_dim, input_shape=(self.hidden_dim, 1), activation=None)) + + # model_.compile(loss=self.loss, optimizer=self.optimizer) + # return model_ + def _build_model(self): model_ = Sequential() model_.add(LSTM(units=self.hidden_dim, input_shape=(self.feature_dim, 1), - dropout=self.dropout_rate, activation=self.activation)) + dropout=self.dropout_rate, activation=self.activation, + return_sequences=bool(self.n_hidden_layer>0))) for layer_idx in range(self.n_hidden_layer): model_.add(LSTM(units=self.hidden_dim, input_shape=(self.hidden_dim, 1), - dropout=self.dropout_rate, activation=self.activation)) + dropout=self.dropout_rate, activation=self.activation, + return_sequences=bool(layer_idx < self.n_hidden_layer - 1))) model_.add(Dense(units=self.feature_dim, input_shape=(self.hidden_dim, 1), activation=None)) @@ -84,6 +104,7 @@ class LSTMOutlierDetector(CollectiveBaseDetector): self : object Fitted estimator. """ + print("XXXX:", X.shape) X = check_array(X).astype(np.float) self._set_n_classes(None) X_buf, y_buf = self._get_sub_matrices(X) @@ -121,6 +142,33 @@ class LSTMOutlierDetector(CollectiveBaseDetector): return relative_error + def predict(self, X): # pragma: no cover + """Predict if a particular sample is an outlier or not. + + Parameters + ---------- + X : numpy array of shape (n_samples, n_features) + The input samples. + + Returns + ------- + outlier_labels : numpy array of shape (n_samples,) + For each observation, tells whether or not + it should be considered as an outlier according to the + fitted model. 0 stands for inliers and 1 for outliers. + """ + + check_is_fitted(self, ['decision_scores_', 'threshold_', 'labels_']) + + pred_score, X_left_inds, X_right_inds = self.decision_function(X) + + pred_score = np.concatenate((np.zeros((self.window_size,)), pred_score)) + X_left_inds = np.concatenate((np.zeros((self.window_size,)), X_left_inds)) + X_right_inds = np.concatenate((np.zeros((self.window_size,)), X_right_inds)) + + return (pred_score > self.threshold_).astype( + 'int').ravel(), X_left_inds.ravel(), X_right_inds.ravel() + def decision_function(self, X: np.array): """Predict raw anomaly scores of X using the fitted detector. diff --git a/tods/detection_algorithm/core/MultiAutoRegOD.py b/tods/detection_algorithm/core/MultiAutoRegOD.py index 8c9ff76..d1f6a07 100644 --- a/tods/detection_algorithm/core/MultiAutoRegOD.py +++ b/tods/detection_algorithm/core/MultiAutoRegOD.py @@ -157,6 +157,33 @@ class MultiAutoRegOD(CollectiveBaseDetector): self._process_decision_scores() return self + def predict(self, X): # pragma: no cover + """Predict if a particular sample is an outlier or not. + + Parameters + ---------- + X : numpy array of shape (n_samples, n_features) + The input samples. + + Returns + ------- + outlier_labels : numpy array of shape (n_samples,) + For each observation, tells whether or not + it should be considered as an outlier according to the + fitted model. 0 stands for inliers and 1 for outliers. + """ + + check_is_fitted(self, ['decision_scores_', 'threshold_', 'labels_']) + + pred_score, X_left_inds, X_right_inds = self.decision_function(X) + + pred_score = np.concatenate((np.zeros((self.window_size,)), pred_score)) + X_left_inds = np.concatenate((np.zeros((self.window_size,)), X_left_inds)) + X_right_inds = np.concatenate((np.zeros((self.window_size,)), X_right_inds)) + + return (pred_score > self.threshold_).astype( + 'int').ravel(), X_left_inds.ravel(), X_right_inds.ravel() + def decision_function(self, X: np.array): """Predict raw anomaly scores of X using the fitted detector.