Browse Source

fix sk_interface examples and Matrix profile

master
lhenry15 4 years ago
parent
commit
e478dd06dc
6 changed files with 213 additions and 224 deletions
  1. +0
    -2
      examples/sk_examples/DeepLog_test.py
  2. +1
    -8
      examples/sk_examples/IsolationForest_test.py
  3. +1
    -8
      examples/sk_examples/MatrixProfile_test.py
  4. +0
    -1
      examples/sk_examples/Telemanom_test.py
  5. +211
    -199
      tods/detection_algorithm/MatrixProfile.py
  6. +0
    -6
      tods/detection_algorithm/UODBasePrimitive.py

+ 0
- 2
examples/sk_examples/DeepLog_test.py View File

@@ -1,6 +1,5 @@
import numpy as np import numpy as np
from tods.sk_interface.detection_algorithm.DeepLog_skinterface import DeepLogSKI from tods.sk_interface.detection_algorithm.DeepLog_skinterface import DeepLogSKI
#from tods.tods_skinterface.primitiveSKI.detection_algorithm.DeepLog_skinterface import DeepLogSKI
from sklearn.metrics import precision_recall_curve from sklearn.metrics import precision_recall_curve
from sklearn.metrics import accuracy_score from sklearn.metrics import accuracy_score
from sklearn.metrics import confusion_matrix from sklearn.metrics import confusion_matrix
@@ -22,7 +21,6 @@ prediction_labels_train = transformer.predict(X_train)
prediction_labels = transformer.predict(X_test) prediction_labels = transformer.predict(X_test)
prediction_score = transformer.predict_score(X_test) prediction_score = transformer.predict_score(X_test)


print("Primitive: ", transformer.primitive)
print("Prediction Labels\n", prediction_labels) print("Prediction Labels\n", prediction_labels)
print("Prediction Score\n", prediction_score) print("Prediction Score\n", prediction_score)




+ 1
- 8
examples/sk_examples/IsolationForest_test.py View File

@@ -1,5 +1,5 @@
import numpy as np import numpy as np
from tods.tods_skinterface.primitiveSKI.detection_algorithm.IsolationForest_skinterface import IsolationForestSKI
from tods.sk_interface.detection_algorithm.IsolationForest_skinterface import IsolationForestSKI
from sklearn.metrics import precision_recall_curve from sklearn.metrics import precision_recall_curve
from sklearn.metrics import accuracy_score from sklearn.metrics import accuracy_score
from sklearn.metrics import confusion_matrix from sklearn.metrics import confusion_matrix
@@ -24,7 +24,6 @@ prediction_labels_train = transformer.predict(X_train)
prediction_labels = transformer.predict(X_test) prediction_labels = transformer.predict(X_test)
prediction_score = transformer.predict_score(X_test) prediction_score = transformer.predict_score(X_test)


print("Primitive: ", transformer.primitive)
print("Prediction Labels\n", prediction_labels) print("Prediction Labels\n", prediction_labels)
print("Prediction Score\n", prediction_score) print("Prediction Score\n", prediction_score)


@@ -36,9 +35,3 @@ print('Accuracy Score: ', accuracy_score(y_true, y_pred))
confusion_matrix(y_true, y_pred) confusion_matrix(y_true, y_pred)


print(classification_report(y_true, y_pred)) print(classification_report(y_true, y_pred))

precision, recall, thresholds = precision_recall_curve(y_true, y_pred)
f1_scores = 2*recall*precision/(recall+precision)

print('Best threshold: ', thresholds[np.argmax(f1_scores)])
print('Best F1-Score: ', np.max(f1_scores))

+ 1
- 8
examples/sk_examples/MatrixProfile_test.py View File

@@ -1,5 +1,5 @@
import numpy as np import numpy as np
from tods.tods_skinterface.primitiveSKI.detection_algorithm.MatrixProfile_skinterface import MatrixProfileSKI
from tods.sk_interface.detection_algorithm.MatrixProfile_skinterface import MatrixProfileSKI
from sklearn.metrics import precision_recall_curve from sklearn.metrics import precision_recall_curve
from sklearn.metrics import accuracy_score from sklearn.metrics import accuracy_score
from sklearn.metrics import confusion_matrix from sklearn.metrics import confusion_matrix
@@ -17,7 +17,6 @@ prediction_labels_train = transformer.predict(X_train)
prediction_labels = transformer.predict(X_test) prediction_labels = transformer.predict(X_test)
prediction_score = transformer.predict_score(X_test) prediction_score = transformer.predict_score(X_test)


print("Primitive: ", transformer.primitive)
print("Prediction Labels\n", prediction_labels) print("Prediction Labels\n", prediction_labels)
print("Prediction Score\n", prediction_score) print("Prediction Score\n", prediction_score)


@@ -29,9 +28,3 @@ print('Accuracy Score: ', accuracy_score(y_true, y_pred))
confusion_matrix(y_true, y_pred) confusion_matrix(y_true, y_pred)


print(classification_report(y_true, y_pred)) print(classification_report(y_true, y_pred))

precision, recall, thresholds = precision_recall_curve(y_true, y_pred)
f1_scores = 2*recall*precision/(recall+precision)

print('Best threshold: ', thresholds[np.argmax(f1_scores)])
print('Best F1-Score: ', np.max(f1_scores))

+ 0
- 1
examples/sk_examples/Telemanom_test.py View File

@@ -25,7 +25,6 @@ prediction_labels_train = transformer.predict(X_train)
prediction_labels = transformer.predict(X_test) prediction_labels = transformer.predict(X_test)
prediction_score = transformer.predict_score(X_test) prediction_score = transformer.predict_score(X_test)


print("Primitive: ", transformer.primitive)
print("Prediction Labels\n", prediction_labels) print("Prediction Labels\n", prediction_labels)
print("Prediction Score\n", prediction_score) print("Prediction Score\n", prediction_score)
y_true = prediction_labels_train y_true = prediction_labels_train


+ 211
- 199
tods/detection_algorithm/MatrixProfile.py View File

@@ -34,6 +34,8 @@ import pandas
import uuid import uuid


from d3m import container, utils as d3m_utils from d3m import container, utils as d3m_utils
from .core.CollectiveBase import CollectiveBaseDetector
from .core.utility import get_sub_matrices


from .UODBasePrimitive import Params_ODBase, Hyperparams_ODBase, UnsupervisedOutlierDetectorBase from .UODBasePrimitive import Params_ODBase, Hyperparams_ODBase, UnsupervisedOutlierDetectorBase
import stumpy import stumpy
@@ -47,216 +49,226 @@ Outputs = d3m_dataframe




class Params(Params_ODBase): class Params(Params_ODBase):
######## Add more Attributes #######
pass
######## Add more Attributes #######
pass




class Hyperparams(Hyperparams_ODBase): class Hyperparams(Hyperparams_ODBase):
######## Add more Attributes #######
#pass
window_size = hyperparams.Hyperparameter[int](
######## Add more Attributes #######
#pass
window_size = hyperparams.Hyperparameter[int](
default=3, default=3,
description='The moving window size.', description='The moving window size.',
semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter']
) )


class MP:
"""
This is the class for matrix profile function
"""
def __init__(self, window_size, step_size):
self._window_size = window_size
self._step_size = step_size
return

def fit(self, X, y=None):
"""Fit detector. y is ignored in unsupervised methods.
Parameters
----------
X : numpy array of shape (n_samples, n_features)
The input samples.
y : Ignored
Not used, present for API consistency by convention.
Returns
-------
self : object
Fitted estimator.
"""

# validate inputs X and y (optional)
# X = check_array(X)
# self._set_n_classes(y)
# self.decision_scores_ = self.decision_function(X)
# self._process_decision_scores()
return self

def _get_right_inds(self, data):
right_inds = []
for row in data[1]:
right_inds.append(row+self._window_size-1)
right_inds = pd.DataFrame(right_inds)
data = pd.concat([data,right_inds], axis=1)
data.columns = range(0,len(data.columns))
return data

def produce(self, data):

"""

Args:
data: dataframe column
Returns:
nparray

"""
"""
#only keep first two columns of MP results, the second column is left index, use windowsize to get right index
transformed_columns=utils.pandas.DataFrame()
for col in data.transpose(): #data.reshape(1,len(data)):
output = stumpy.stump(col, m = self._window_size)
output = pd.DataFrame(output)
output=output.drop(columns=[2,3])
output = self._get_right_inds(output)
transformed_columns=pd.concat([transformed_columns,output], axis=1)
return transformed_columns
"""
#data = np.random.rand(3, 1000)
#data = np.array([[1., 2., 3., 4.], [5., 6., 7., 8.], [9., 10., 11., 12.]])
#data = np.array([1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12.])
#data = np.array([[3., 4., 8.6, 13., 22.5, 17, 19.2, 36.1, 127, -23, 59.2, -10],[3., 4., 8.6, 13., 22.5, 17, 19.2, 36.1, 127, -23, 59.2, -10],[3., 4., 8.6, 13., 22.5, 17, 19.2, 36.1, 127, -23, 59.2, -10]])

matrix_profile, matrix_profile_indices = stumpy.mstump(data.transpose(), m = self._window_size)
#matrix_profile, matrix_profile_indices = stumpy.mstump(data, m = self._window_size)

left_inds_ = numpy.arange(0, len(matrix_profile), self._step_size)
right_inds_ = left_inds_ + self._window_size
right_inds_[right_inds_ > len(matrix_profile)] = len(matrix_profile)
left_inds_ = np.array([left_inds_]).transpose()
right_inds_ = np.array([right_inds_]).transpose()
# apply min-max scaling
scaler = MinMaxScaler()
scaler = scaler.fit(matrix_profile)
matrix_profile = scaler.transform(matrix_profile)
output = []
for timestamp in matrix_profile:
timestamp = sum(timestamp)
output.append([timestamp])

output = np.concatenate((output, left_inds_, right_inds_),axis=1)
return output

def predict(self, data):
return self.produce(data)
class MP(CollectiveBaseDetector):
"""
This is the class for matrix profile function
"""
def __init__(self, window_size, step_size, contamination):
self._window_size = window_size
self._step_size = step_size
self.contamination = contamination
return

def _get_right_inds(self, data):
right_inds = []
for row in data[1]:
right_inds.append(row+self._window_size-1)
right_inds = pd.DataFrame(right_inds)
data = pd.concat([data,right_inds], axis=1)
data.columns = range(0,len(data.columns))
return data

def fit(self, X):
"""Fit detector. y is ignored in unsupervised methods.
Parameters
----------
X : numpy array of shape (n_samples, n_features)
The input samples.
y : Ignored
Not used, present for API consistency by convention.
Returns
-------
self : object
Fitted estimator.
"""
sub_matrices, self.left_inds_, self.right_inds_ = get_sub_matrices(
X,
window_size=self._window_size,
step=self._step_size,
return_numpy=True,
flatten=True)
sub_matrices = sub_matrices[:-1, :]
self.left_inds_ = self.left_inds_[:-1]
self.right_inds_ = self.right_inds_[:-1]
matrix_profile, matrix_profile_indices = stumpy.mstump(X.transpose(), m = self._window_size)
#matrix_profile, matrix_profile_indices = stumpy.mstump(data, m = self._window_size)

#left_inds_ = numpy.arange(0, len(matrix_profile), self._step_size)
#right_inds_ = left_inds_ + self._window_size
#right_inds_[right_inds_ > len(matrix_profile)] = len(matrix_profile)
#left_inds_ = np.array([left_inds_]).transpose()
#right_inds_ = np.array([right_inds_]).transpose()
# apply min-max scaling
scaler = MinMaxScaler()
scaler = scaler.fit(matrix_profile)
matrix_profile = scaler.transform(matrix_profile)
self.decision_scores_ = matrix_profile
self._process_decision_scores()
return self

def decision_function(self, data):

"""

Args:
data: dataframe column
Returns:
nparray

"""
"""
#only keep first two columns of MP results, the second column is left index, use windowsize to get right index
transformed_columns=utils.pandas.DataFrame()
for col in data.transpose(): #data.reshape(1,len(data)):
output = stumpy.stump(col, m = self._window_size)
output = pd.DataFrame(output)
output=output.drop(columns=[2,3])
output = self._get_right_inds(output)
transformed_columns=pd.concat([transformed_columns,output], axis=1)
return transformed_columns
"""
matrix_profile, matrix_profile_indices = stumpy.mstump(data.transpose(), m = self._window_size)
#matrix_profile, matrix_profile_indices = stumpy.mstump(data, m = self._window_size)

left_inds_ = numpy.arange(0, len(matrix_profile), self._step_size)
right_inds_ = left_inds_ + self._window_size
right_inds_[right_inds_ > len(matrix_profile)] = len(matrix_profile)
left_inds_ = np.array([left_inds_]).transpose()
right_inds_ = np.array([right_inds_]).transpose()
# apply min-max scaling
scaler = MinMaxScaler()
scaler = scaler.fit(matrix_profile)
matrix_profile = scaler.transform(matrix_profile)
#output = []
#for timestamp in matrix_profile:
# timestamp = sum(timestamp)
# output.append([timestamp])

#output = np.concatenate((output, left_inds_, right_inds_),axis=1)
return matrix_profile, left_inds_, right_inds_
class MatrixProfilePrimitive(UnsupervisedOutlierDetectorBase[Inputs, Outputs, Params, Hyperparams]): class MatrixProfilePrimitive(UnsupervisedOutlierDetectorBase[Inputs, Outputs, Params, Hyperparams]):
"""

A primitive that performs matrix profile on a DataFrame using Stumpy package
Stumpy documentation: https://stumpy.readthedocs.io/en/latest/index.html

Parameters
----------
T_A : ndarray
The time series or sequence for which to compute the matrix profile
m : int
Window size
T_B : ndarray
The time series or sequence that contain your query subsequences
of interest. Default is `None` which corresponds to a self-join.
ignore_trivial : bool
Set to `True` if this is a self-join. Otherwise, for AB-join, set this
to `False`. Default is `True`.
Returnsfdsf
-------
out : ndarray
The first column consists of the matrix profile, the second column
consists of the matrix profile indices, the third column consists of
the left matrix profile indices, and the fourth column consists of
the right matrix profile indices.
"""
metadata = metadata_base.PrimitiveMetadata({
'__author__': "DATA Lab @Texas A&M University",
'name': "Matrix Profile",
'python_path': 'd3m.primitives.tods.detection_algorithm.matrix_profile',
'source': {
"""
A primitive that performs matrix profile on a DataFrame using Stumpy package
Stumpy documentation: https://stumpy.readthedocs.io/en/latest/index.html
Parameters
----------
T_A : ndarray
The time series or sequence for which to compute the matrix profile
m : int
Window size
T_B : ndarray
The time series or sequence that contain your query subsequences
of interest. Default is `None` which corresponds to a self-join.
ignore_trivial : bool
Set to `True` if this is a self-join. Otherwise, for AB-join, set this
to `False`. Default is `True`.
Returnsfdsf
-------
out : ndarray
The first column consists of the matrix profile, the second column
consists of the matrix profile indices, the third column consists of
the left matrix profile indices, and the fourth column consists of
the right matrix profile indices.
"""
metadata = metadata_base.PrimitiveMetadata({
'__author__': "DATA Lab @Texas A&M University",
'name': "Matrix Profile",
'python_path': 'd3m.primitives.tods.detection_algorithm.matrix_profile',
'source': {
'name': "DATA Lab @Taxes A&M University", 'name': "DATA Lab @Taxes A&M University",
'contact': 'mailto:khlai037@tamu.edu', 'contact': 'mailto:khlai037@tamu.edu',
}, },
'hyperparams_to_tune': ['window_size'],
'version': '0.0.2',
'algorithm_types': [
'hyperparams_to_tune': ['window_size'],
'version': '0.0.2',
'algorithm_types': [
metadata_base.PrimitiveAlgorithmType.TODS_PRIMITIVE, metadata_base.PrimitiveAlgorithmType.TODS_PRIMITIVE,
], ],
'primitive_family': metadata_base.PrimitiveFamily.ANOMALY_DETECTION,
'id': str(uuid.uuid3(uuid.NAMESPACE_DNS, 'MatrixProfilePrimitive')),
})
def __init__(self, *,
hyperparams: Hyperparams, #
random_seed: int = 0,
docker_containers: Dict[str, DockerContainer] = None) -> None:
super().__init__(hyperparams=hyperparams, random_seed=random_seed, docker_containers=docker_containers)
self._clf = MP(window_size=hyperparams['window_size'], step_size=hyperparams['step_size'])
def set_training_data(self, *, inputs: Inputs) -> None:
"""
Set training data for outlier detection.
Args:
inputs: Container DataFrame
Returns:
None
"""
super().set_training_data(inputs=inputs)
def fit(self, *, timeout: float = None, iterations: int = None) -> CallResult[None]:
"""
Fit model with training data.
Args:
*: Container DataFrame. Time series data up to fit.
Returns:
None
"""
return super().fit()
def produce(self, *, inputs: Inputs, timeout: float = None, iterations: int = None) -> CallResult[Outputs]:
"""
Process the testing data.
Args:
inputs: Container DataFrame. Time series data up to outlier detection.
Returns:
Container DataFrame
1 marks Outliers, 0 marks normal.
"""
return super().produce(inputs=inputs, timeout=timeout, iterations=iterations)
def get_params(self) -> Params: # pragma: no cover
"""
Return parameters.
Args:
None
Returns:
class Params
"""
return super().get_params()
def set_params(self, *, params: Params) -> None: # pragma: no cover
"""
Set parameters for outlier detection.
Args:
params: class Params
Returns:
None
"""
super().set_params(params=params)
'primitive_family': metadata_base.PrimitiveFamily.ANOMALY_DETECTION,
'id': str(uuid.uuid3(uuid.NAMESPACE_DNS, 'MatrixProfilePrimitive')),
})
def __init__(self, *,
hyperparams: Hyperparams, #
random_seed: int = 0,
docker_containers: Dict[str, DockerContainer] = None) -> None:
super().__init__(hyperparams=hyperparams, random_seed=random_seed, docker_containers=docker_containers)
self._clf = MP(window_size=hyperparams['window_size'], step_size=hyperparams['step_size'], contamination=hyperparams['contamination'])
def set_training_data(self, *, inputs: Inputs) -> None:
"""
Set training data for outlier detection.
Args:
inputs: Container DataFrame
Returns:
None
"""
super().set_training_data(inputs=inputs)
def fit(self, *, timeout: float = None, iterations: int = None) -> CallResult[None]:
"""
Fit model with training data.
Args:
*: Container DataFrame. Time series data up to fit.
Returns:
None
"""
return super().fit()
def produce(self, *, inputs: Inputs, timeout: float = None, iterations: int = None) -> CallResult[Outputs]:
"""
Process the testing data.
Args:
inputs: Container DataFrame. Time series data up to outlier detection.
Returns:
Container DataFrame
1 marks Outliers, 0 marks normal.
"""
return super().produce(inputs=inputs, timeout=timeout, iterations=iterations)
def get_params(self) -> Params: # pragma: no cover
"""
Return parameters.
Args:
None
Returns:
class Params
"""
return super().get_params()
def set_params(self, *, params: Params) -> None: # pragma: no cover
"""
Set parameters for outlier detection.
Args:
params: class Params
Returns:
None
"""
super().set_params(params=params)

+ 0
- 6
tods/detection_algorithm/UODBasePrimitive.py View File

@@ -293,9 +293,7 @@ class UnsupervisedOutlierDetectorBase(TODSUnsupervisedLearnerPrimitiveBase[Input
if self.hyperparams['use_semantic_types']: if self.hyperparams['use_semantic_types']:
sk_inputs = inputs.iloc[:, self._training_indices] sk_inputs = inputs.iloc[:, self._training_indices]
output_columns = [] output_columns = []
#print("skinputs ", sk_inputs.values)
if len(self._training_indices) > 0: if len(self._training_indices) > 0:

if self.hyperparams['return_subseq_inds']: if self.hyperparams['return_subseq_inds']:


if getattr(self._clf, 'left_inds_', None) is None or getattr(self._clf, 'right_inds_', None) is None: # point OD if getattr(self._clf, 'left_inds_', None) is None or getattr(self._clf, 'right_inds_', None) is None: # point OD
@@ -306,9 +304,6 @@ class UnsupervisedOutlierDetectorBase(TODSUnsupervisedLearnerPrimitiveBase[Input
else: else:
pred_label, left_inds_, right_inds_ = self._clf.predict(sk_inputs.values) pred_label, left_inds_, right_inds_ = self._clf.predict(sk_inputs.values)


# print(pred_label.shape, left_inds_.shape, right_inds_.shape)
# print(pred_label, left_inds_, right_inds_)

sk_output = numpy.concatenate((numpy.expand_dims(pred_label, axis=1), sk_output = numpy.concatenate((numpy.expand_dims(pred_label, axis=1),
numpy.expand_dims(left_inds_, axis=1), numpy.expand_dims(left_inds_, axis=1),
numpy.expand_dims(right_inds_, axis=1)), axis=1) numpy.expand_dims(right_inds_, axis=1)), axis=1)
@@ -321,7 +316,6 @@ class UnsupervisedOutlierDetectorBase(TODSUnsupervisedLearnerPrimitiveBase[Input
else: else:
sk_output, _, _ = self._clf.predict(sk_inputs.values) sk_output, _, _ = self._clf.predict(sk_inputs.values)


#print("sk output ", sk_output)
if sparse.issparse(sk_output): # pragma: no cover if sparse.issparse(sk_output): # pragma: no cover
sk_output = sk_output.toarray() sk_output = sk_output.toarray()




Loading…
Cancel
Save