From af76d18e21963853528d5900f8b086000310529e Mon Sep 17 00:00:00 2001 From: Devesh Kumar Date: Tue, 29 Sep 2020 07:12:14 -0500 Subject: [PATCH 1/2] System Wise detection added Former-commit-id: b0101f5db5043ccf8027cf31cf45b8ffe920129d [formerly 29c80e5ad8290d6a2329f04393fb1020cf000682] [formerly 20adaa7aebb1be5092f8308de1a158821270d556 [formerly 954235e92d2ae2f143d491d2603bda2ed50363c3]] [formerly be8de1f5cb0c7eb4a3c0cde516992296bf25a889 [formerly f04459aab8efbd7fcf90f9136c6a452cc74e55c9] [formerly af6a2417775056ede3438c90d504aa1c81886ce9 [formerly 11832ea941771f9171ea4a017766c100bce2c502]]] [formerly 76836c2e90f4af947fb58dff6b3da9a2a5d9bc8a [formerly 49d86e4e363afc6627db0e1a5bd89eea0947f7bc] [formerly 83647e8ffd04db38cb91ce0f1c26f225573b5590 [formerly b54a78f40c3ce51e070281ff1c0977fa7885981c]] [formerly e0d49339dea885f7df9edfcac392bb976e4a1540 [formerly 9729ac54bd0f24edd657a605c0f0a02f6a9ce52e] [formerly 30bc1b36d39d080db3a9c0d0d65455188aa20579 [formerly a79efa4e6d2b6ace46571dc2aa292e928d17d59f]]]] [formerly 1681476537c3937e0cb63a852e3d33e16e5168f5 [formerly dc6796c55cb55799625da4b457ad86fd50869c79] [formerly 70c2a56d6fae4628182a1a3fcac2ee989bd0dd26 [formerly 1c9c8e76089f525a6a6ad054d894389a4f06f1e8]] [formerly ba01035d8bef3caf9b72a037f927fc41a975d9b3 [formerly 217b51ab37b3d96b28f2515a20ff98d19a86def5] [formerly b7192505f01ab071b1a80e765e3e88a61f2615a9 [formerly fa243cd501807c062fa2d173dc77e3a8eb86741b]]] [formerly 3ccd933f5571022cf0498d5d4f5f128965544712 [formerly 5f9dc8539721f319a8f0e3fd059d76c52341308e] [formerly ca8b40852ed585ccdf47814f7a1da45faaec7905 [formerly 50f6cb845cb82aa818fff59eac42c88c5182cae5]] [formerly ec3635f261996b2bc6b1f45308894b0eb58d7d12 [formerly d2666795d0b4a3f5de7fe3dac6372a2d0c4e04cf] [formerly 1a06a32245ffed8e6374359810c65f77a4ce988f [formerly 173d1b30840841065840ee1389ba8355919a28aa]]]]] [formerly 6d860cf3736a5394332b6e621e29d413d2bb79ff [formerly c83b20a27b27d76dd4cbb2a06d51b913a4ddde79] [formerly 1bb62d904fc37561f3cfd5092692f4a73f9e8eec [formerly f812ef7fa4c71d29922802f5439ac549c315c092]] [formerly 0b70aa942c32bce1e24f0dc00259b3208bf80677 [formerly 01faeef027f593a8d67c465f818f3c881df934fe] [formerly e9f100c53825ad829b0bfa13bd9c2276b4c3b8f1 [formerly 73bfd33c6316a5383ad6f428804ba2d232d97788]]] [formerly dfa753e39859d0e96054549304dcb93ce4f6aa00 [formerly 81d9256d17b5732089e5e8e059f51c7baa3d468c] [formerly 315119e9ff1d2cb55a84504935e351876da107b9 [formerly 4d62d31d75e57b87fd66a9cb1aecdcf1002f5dd9]] [formerly a43db5fe93e13a1c2366ef91d8a87c0015c9424f [formerly 8fcf1401ef56d5a164dd1d900ae6c694f889e4d9] [formerly 44e0278b994b4f390af35a8aea0b3b40e98f81c7 [formerly b62aa327ba99c79023760bef4869762119d7a007]]]] [formerly 0727340c2ae47f7cfd928e49e96bd902ba330cb6 [formerly 2908762c9ed0ec418bcd3623a463139c868d5d05] [formerly 539d47dcfefed3ebe5b9a27b5832a4b0d3ea7684 [formerly 23886725f73e986f9b9d2e980ba5937225241bd9]] [formerly cb0236a7a719d58652bf87ffd39a6bffd23900ff [formerly 383ee073acbd05f3e3a3a985234168081954543f] [formerly 3ff61591888bc48d8cf63a45650e1cd813f1597f [formerly 49067bd3eda5dfdcd029286e0bcc869ee13c3128]]] [formerly 5243ae03dd133e3f1887cfae0e657049a6445edc [formerly 499398b844d89bca9bd7d437421b9c403286229e] [formerly 1c1d7c0b9602b59941d0653414f3798efa6f7ce8 [formerly b44a9d6b806598016477c790f94e3ada97c051eb]] [formerly db83075114e69ad9f470d52f4cf484a45d6b16e4 [formerly 9e9d8c9877f6881c4d7dc8b00d659756456d5cb8] [formerly 3357faf70d595315349a7b858212f1887cee697b [formerly 7fc9ac7050e327d157b5ab0e53923c9bbdcec14e]]]]]] Former-commit-id: a1b9d5fd9382c8fc1678ca0bb5cbc8ac912b507c [formerly d885f24004750f585bc5fe9ffda7e55320bc9fcb] [formerly 8aa0c6324f270a0a1658f3e22a153dcb75f9fb2c [formerly 323da8b84315195ccd9cad8efaea22e114af585e]] [formerly 1c098dfce8765510f6d38584efda66af09b8961e [formerly 2d70712b9d5a29d10a6786fd2b8471889d48142f] [formerly ddd288172f0676c3a5c76b068463678a8dd614a2 [formerly 6853a65d95c034d023119c2d2cff5a600b524e64]]] [formerly ef0875e7607b38058723197b4063792c3640e261 [formerly 6def45bf3c4f018f0f279a58f8a38cc96bb49bc8] [formerly 2161f1a1f645b4dd901e39fcad87300d1b8f1a4f [formerly f5fa6032fbab4d219cb126b0ab2beebf57792245]] [formerly 28ee6a45efabd73123ea81c9ea111871a095091b [formerly 3eaa3fd743f5e7d0b643999706110f67107b6faa] [formerly 157b7f651e6c31de234b58f90fb4e061225bdb6a [formerly 415dcb445126918fbc3e55f816c7d0b33400ee28]]]] [formerly e80d2baea814d7522f31ed755c85046701d7c3e7 [formerly af276f0e1726ab16b2cdaffad3e072bfa019da06] [formerly d16a3a9e3b4dcdde596799b3028e798e66a1826d [formerly ab6fc5f38fc652caab2295d4babb35c01c6c429e]] [formerly d1d0b8d53a04d5336327e30475427abb1fcdb08c [formerly 7b2397e0a6ee99008097e59925c5f411bc33fb0f] [formerly b3a08b8aece9ae649a80f593a1fd5da539e1b4da [formerly a22ce8f90fe0a1866730532b53840ead258769b1]]] [formerly 5b0c7bde636df3a52de476ab04cf4076dc516a9a [formerly 863d1d21047f5e0c37dc92dd96794eaf8ac97f3c] [formerly b280500e71785d3d5c4d98b2624abd81b2b6b183 [formerly 8a1e6280c6f7859a70a0e32beaf8199fe1d11c37]] [formerly aeb47189b9dc536e451cb03ce4107253e1806ed6 [formerly 11793fb2095ebc6e417ec5786b1ab6cc859ad238] [formerly 3357faf70d595315349a7b858212f1887cee697b]]]] Former-commit-id: 44f81dd1d87178b9ceb6f4174a03639c9d1cd95d [formerly a63fabe160699782de2444434d5bb3d7baabbef2] [formerly ff67ec2d258f578746c0d4fd8cbb9395e4b8bbf6 [formerly 1591a88714efe47a624b4d4d6d26531e035abe37]] [formerly 3bbe67e7f6aa44de3c9dda166f436b4cf7f203e9 [formerly b34a8d50f9e5f846591116d0be85cf12fa8106fd] [formerly bdf75c1cde5ef9809773112ffb0ac8496f182186 [formerly c0f2e2d83819346d23d357f51db80191da8c379e]]] [formerly f4c40e62d7c58c4a906da005425e46b63364fa75 [formerly 3d4a394a389d6dc273b1c8c2777f692c2ccdcf62] [formerly 34a11cb3fdd6e043c229d2ba13ab30beb4c7474c [formerly a48b745458c2bdf3435a1005e1318633991c529a]] [formerly 8ab86b4533da86bbd43d787d232a71421a35ed02 [formerly c5d1edfe84f0737bf86ce90997f25b88000a795b] [formerly 90967c4da75bf81f9ac655014f64179f1de244d4 [formerly ca862cc1d907bc8bcb44394b8abbb949c6593838]]]] Former-commit-id: e7a99f97aa86012154fd140085ba44db50cb7e79 [formerly 9efa17aa11e63e4b9f5f54ba1a1eaa2e665852de] [formerly 0504778d2cba8667b2249be2aa7a8f265f8bf08e [formerly 3ced73ed0c5b23711a0f4bfbbd9833479771a205]] [formerly 288987b7c07e07d197a28e67b9ab8a178ad06639 [formerly a3467d5a5ecca4e776a7582f5488c14ba30b1f00] [formerly c047ea98d7515dfc6eedca31047b3fe3c8c3b55a [formerly 0a29b28b43241a95e4d4f22fa8fd83cdace1ad64]]] Former-commit-id: 6ac75ca115e8c642529f07f1b6801700ca84143b [formerly 0e1aaa07fec0957834c5d3c79d3720202f3eb667] [formerly 392b1fcb10fa7e09a2e08d4848414a024115d2ed [formerly d36aac32fbaea0fa31d5b42bd4c7b0140759d8bf]] Former-commit-id: 088d9a0c399d6c132ec220636d26e8b27f1a8af7 [formerly c4f9f20b573369ad5e6242f461fedeba57db5491] Former-commit-id: 33d69cfffb8a894b26ff5eea30b67fb23b75aa28 --- examples/build_System_Wise_Detection_pipeline.py | 74 +++++ tods/detection_algorithm/SystemWiseDetection.py | 375 +++++++++++++++++++++++ tods/resources/.entry_points.ini | 3 + 3 files changed, 452 insertions(+) create mode 100644 examples/build_System_Wise_Detection_pipeline.py create mode 100644 tods/detection_algorithm/SystemWiseDetection.py diff --git a/examples/build_System_Wise_Detection_pipeline.py b/examples/build_System_Wise_Detection_pipeline.py new file mode 100644 index 0000000..aa5ea69 --- /dev/null +++ b/examples/build_System_Wise_Detection_pipeline.py @@ -0,0 +1,74 @@ +from d3m import index +from d3m.metadata.base import ArgumentType +from d3m.metadata.pipeline import Pipeline, PrimitiveStep + +# -> dataset_to_dataframe -> column_parser -> extract_columns_by_semantic_types(attributes) -> imputer -> random_forest +# extract_columns_by_semantic_types(targets) -> ^ + +# Creating pipeline +pipeline_description = Pipeline() +pipeline_description.add_input(name='inputs') + +# Step 0: dataset_to_dataframe +step_0 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.data_transformation.dataset_to_dataframe.Common')) +step_0.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='inputs.0') +step_0.add_output('produce') +pipeline_description.add_step(step_0) + +# Step 1: column_parser +step_1 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.data_transformation.column_parser.Common')) +step_1.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce') +step_1.add_output('produce') +pipeline_description.add_step(step_1) + +# Step 2: extract_columns_by_semantic_types(attributes) +step_2 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.data_transformation.extract_columns_by_semantic_types.Common')) +step_2.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce') +step_2.add_output('produce') +step_2.add_hyperparameter(name='semantic_types', argument_type=ArgumentType.VALUE, + data=['https://metadata.datadrivendiscovery.org/types/Attribute']) +pipeline_description.add_step(step_2) + +# Step 3: extract_columns_by_semantic_types(targets) +step_3 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.data_transformation.extract_columns_by_semantic_types.Common')) +step_3.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce') +step_3.add_output('produce') +step_3.add_hyperparameter(name='semantic_types', argument_type=ArgumentType.VALUE, + data=['https://metadata.datadrivendiscovery.org/types/TrueTarget']) +pipeline_description.add_step(step_3) + +attributes = 'steps.2.produce' +targets = 'steps.3.produce' + +# Step 4: auto encoder +step_4 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.detection_algorithm.pyod_ae')) +step_4.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference=attributes) +step_4.add_output('produce_score') +#step_4.add_hyperparameter(name='use_columns', argument_type=ArgumentType.VALUE, data=[2]) +#step_4.add_hyperparameter(name='use_semantic_types', argument_type=ArgumentType.VALUE, data=True) +step_4.add_hyperparameter(name='return_result', argument_type=ArgumentType.VALUE, data='append') +pipeline_description.add_step(step_4) + +# Step 5: ensemble +step_5 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.detection_algorithm.system_wise_detection')) +step_5.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.4.produce_score') +step_5.add_hyperparameter(name='return_result', argument_type=ArgumentType.VALUE, data='new') + +step_5.add_output('produce') +pipeline_description.add_step(step_5) + + +# Final Output +pipeline_description.add_output(name='output predictions', data_reference='steps.5.produce') + +# Output to YAML +#yaml = pipeline_description.to_yaml() +#with open('pipeline.yml', 'w') as f: +# f.write(yaml) +#prin(yaml) + +# Output to json +data = pipeline_description.to_json() +with open('example_pipeline.json', 'w') as f: + f.write(data) + print(data) diff --git a/tods/detection_algorithm/SystemWiseDetection.py b/tods/detection_algorithm/SystemWiseDetection.py new file mode 100644 index 0000000..3cb122d --- /dev/null +++ b/tods/detection_algorithm/SystemWiseDetection.py @@ -0,0 +1,375 @@ +import os +from typing import Any,Optional,List +import statsmodels.api as sm +import numpy as np +from d3m import container, utils as d3m_utils +from d3m import utils + +from numpy import ndarray +from collections import OrderedDict +from scipy import sparse +import os + +import numpy +import typing +import time + +from d3m import container +from d3m.primitive_interfaces import base, transformer + +from d3m.container import DataFrame as d3m_dataframe +from d3m.metadata import hyperparams, params, base as metadata_base + +from d3m.base import utils as base_utils +from d3m.exceptions import PrimitiveNotFittedError + +__all__ = ('SystemWiseDetectionPrimitive',) + +Inputs = container.DataFrame +Outputs = container.DataFrame + +class Params(params.Params): + #to-do : how to make params dynamic + use_column_names: Optional[Any] + + + +class Hyperparams(hyperparams.Hyperparams): + + #Tuning Parameter + #default -1 considers entire time series is considered + window_size = hyperparams.Hyperparameter(default=-1, semantic_types=[ + 'https://metadata.datadrivendiscovery.org/types/TuningParameter', + ], description="Window Size for decomposition") + + method_type = hyperparams.Enumeration( + values=['max', 'avg', 'majority_voting_sum'], + default='avg', + semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], + description="The type of method used to find anomalous system", + ) + contamination = hyperparams.Uniform( + lower=0., + upper=0.5, + default=0.1, + description='The amount of contamination of the data set, i.e. the proportion of outliers in the data set. ', + semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] + ) + + #control parameter + use_columns = hyperparams.Set( + elements=hyperparams.Hyperparameter[int](-1), + default=(), + semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], + description="A set of column indices to force primitive to operate on. If any specified column cannot be parsed, it is skipped.", + ) + exclude_columns = hyperparams.Set( + elements=hyperparams.Hyperparameter[int](-1), + default=(), + semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], + description="A set of column indices to not operate on. Applicable only if \"use_columns\" is not provided.", + ) + return_result = hyperparams.Enumeration( + values=['append', 'replace', 'new'], + default='new', + semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], + description="Should parsed columns be appended, should they replace original columns, or should only parsed columns be returned? This hyperparam is ignored if use_semantic_types is set to false.", + ) + use_semantic_types = hyperparams.UniformBool( + default=False, + semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], + description="Controls whether semantic_types metadata will be used for filtering columns in input dataframe. Setting this to false makes the code ignore return_result and will produce only the output dataframe" + ) + add_index_columns = hyperparams.UniformBool( + default=False, + semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], + description="Also include primary index columns if input data has them. Applicable only if \"return_result\" is set to \"new\".", + ) + error_on_no_input = hyperparams.UniformBool( + default=True, + semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], + description="Throw an exception if no input column is selected/provided. Defaults to true to behave like sklearn. To prevent pipelines from breaking set this to False.", + ) + + return_semantic_type = hyperparams.Enumeration[str]( + values=['https://metadata.datadrivendiscovery.org/types/Attribute', + 'https://metadata.datadrivendiscovery.org/types/ConstructedAttribute'], + default='https://metadata.datadrivendiscovery.org/types/Attribute', + description='Decides what semantic type to attach to generated attributes', + semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'] + ) + + + +class SystemWiseDetectionPrimitive(transformer.TransformerPrimitiveBase[Inputs, Outputs, Hyperparams]): + """ + Primitive to find abs_energy of time series + """ + + __author__ = "DATA Lab at Texas A&M University", + metadata = metadata_base.PrimitiveMetadata( + { + 'id': '3726fa29-28c5-4529-aec5-2f8b4ff2ef9e', + 'version': '0.1.0', + 'name': 'Sytem_Wise_Anomaly_Detection_Primitive', + 'python_path': 'd3m.primitives.tods.detection_algorithm.system_wise_detection', + 'keywords': ['Time Series','Anomalous System '], + "hyperparams_to_tune": ['window_size','method_type','contamination'], + 'source': { + 'name': 'DATA Lab at Texas A&M University', + 'uris': ['https://gitlab.com/lhenry15/tods.git','https://gitlab.com/lhenry15/tods/-/blob/devesh/tods/feature_analysis/StatisticalAbsEnergy.py'], + 'contact': 'mailto:khlai037@tamu.edu' + + }, + 'installation': [ + {'type': metadata_base.PrimitiveInstallationType.PIP, + 'package_uri': 'git+https://gitlab.com/lhenry15/tods.git@{git_commit}#egg=TODS'.format( + git_commit=d3m_utils.current_git_commit(os.path.dirname(__file__)), + ), + } + + ], + 'algorithm_types': [ + metadata_base.PrimitiveAlgorithmType.DATA_PROFILING, + ], + 'primitive_family': metadata_base.PrimitiveFamily.ANOMALY_DETECTION, + + } + ) + + def __init__(self, *, hyperparams: Hyperparams) -> None: + super().__init__(hyperparams=hyperparams) + self.primitiveNo = 0 + + def produce(self, *, inputs: Inputs, timeout: float = None, iterations: int = None) -> base.CallResult[Outputs]: + """ + + Args: + inputs: Container DataFrame + timeout: Default + iterations: Default + + Returns: + Container DataFrame containing abs_energy of time series + """ + + self.logger.info('System wise Detection Input Primitive called') + + + # Get cols to fit. + self._fitted = False + self._training_inputs, self._training_indices = self._get_columns_to_fit(inputs, self.hyperparams) + self._input_column_names = self._training_inputs.columns + + if len(self._training_indices) > 0: + # self._clf.fit(self._training_inputs) + self._fitted = True + else: + if self.hyperparams['error_on_no_input']: + raise RuntimeError("No input columns were selected") + self.logger.warn("No input columns were selected") + + if not self._fitted: + raise PrimitiveNotFittedError("Primitive not fitted.") + system_wise_detection_input = inputs + if self.hyperparams['use_semantic_types']: + system_wise_detection_input = inputs.iloc[:, self._training_indices] + output_columns = [] + if len(self._training_indices) > 0: + system_wise_detection_output = self._system_wise_detection(system_wise_detection_input,self.hyperparams["method_type"],self.hyperparams["window_size"],self.hyperparams["contamination"]) + outputs = system_wise_detection_output + + + if sparse.issparse(system_wise_detection_output): + system_wise_detection_output = system_wise_detection_output.toarray() + outputs = self._wrap_predictions(inputs, system_wise_detection_output) + + #if len(outputs.columns) == len(self._input_column_names): + # outputs.columns = self._input_column_names + + output_columns = [outputs] + + + else: + if self.hyperparams['error_on_no_input']: + raise RuntimeError("No input columns were selected") + self.logger.warn("No input columns were selected") + + + self.logger.info('System wise Detection Primitive returned') + outputs = base_utils.combine_columns(return_result=self.hyperparams['return_result'], + add_index_columns=self.hyperparams['add_index_columns'], + inputs=inputs, column_indices=self._training_indices, + columns_list=output_columns) + return base.CallResult(outputs) + + @classmethod + def _get_columns_to_fit(cls, inputs: Inputs, hyperparams: Hyperparams): + """ + Select columns to fit. + Args: + inputs: Container DataFrame + hyperparams: d3m.metadata.hyperparams.Hyperparams + + Returns: + list + """ + if not hyperparams['use_semantic_types']: + return inputs, list(range(len(inputs.columns))) + + inputs_metadata = inputs.metadata + + def can_produce_column(column_index: int) -> bool: + return cls._can_produce_column(inputs_metadata, column_index, hyperparams) + + use_columns = hyperparams['use_columns'] + exclude_columns = hyperparams['exclude_columns'] + + columns_to_produce, columns_not_to_produce = base_utils.get_columns_to_use(inputs_metadata, + use_columns=use_columns, + exclude_columns=exclude_columns, + can_use_column=can_produce_column) + return inputs.iloc[:, columns_to_produce], columns_to_produce + # return columns_to_produce + + @classmethod + def _can_produce_column(cls, inputs_metadata: metadata_base.DataMetadata, column_index: int, + hyperparams: Hyperparams) -> bool: + """ + Output whether a column can be processed. + Args: + inputs_metadata: d3m.metadata.base.DataMetadata + column_index: int + + Returns: + bool + """ + column_metadata = inputs_metadata.query((metadata_base.ALL_ELEMENTS, column_index)) + + accepted_structural_types = (int, float, numpy.integer, numpy.float64) + accepted_semantic_types = set() + accepted_semantic_types.add("https://metadata.datadrivendiscovery.org/types/Attribute") + if not issubclass(column_metadata['structural_type'], accepted_structural_types): + return False + + semantic_types = set(column_metadata.get('semantic_types', [])) + return True + if len(semantic_types) == 0: + cls.logger.warning("No semantic types found in column metadata") + return False + + # Making sure all accepted_semantic_types are available in semantic_types + if len(accepted_semantic_types - semantic_types) == 0: + return True + + return False + + @classmethod + def _update_predictions_metadata(cls, inputs_metadata: metadata_base.DataMetadata, outputs: Optional[Outputs], + target_columns_metadata: List[OrderedDict]) -> metadata_base.DataMetadata: + """ + Updata metadata for selected columns. + Args: + inputs_metadata: metadata_base.DataMetadata + outputs: Container Dataframe + target_columns_metadata: list + + Returns: + d3m.metadata.base.DataMetadata + """ + outputs_metadata = metadata_base.DataMetadata().generate(value=outputs) + + for column_index, column_metadata in enumerate(target_columns_metadata): + column_metadata.pop("structural_type", None) + outputs_metadata = outputs_metadata.update_column(column_index, column_metadata) + + return outputs_metadata + + def _wrap_predictions(self, inputs: Inputs, predictions: ndarray) -> Outputs: + """ + Wrap predictions into dataframe + Args: + inputs: Container Dataframe + predictions: array-like data (n_samples, n_features) + + Returns: + Dataframe + """ + outputs = d3m_dataframe(predictions, generate_metadata=True) + target_columns_metadata = self._add_target_columns_metadata(outputs.metadata, self.hyperparams,self.primitiveNo) + outputs.metadata = self._update_predictions_metadata(inputs.metadata, outputs, target_columns_metadata) + + return outputs + + @classmethod + def _add_target_columns_metadata(cls, outputs_metadata: metadata_base.DataMetadata, hyperparams, primitiveNo): + """ + Add target columns metadata + Args: + outputs_metadata: metadata.base.DataMetadata + hyperparams: d3m.metadata.hyperparams.Hyperparams + + Returns: + List[OrderedDict] + """ + outputs_length = outputs_metadata.query((metadata_base.ALL_ELEMENTS,))['dimension']['length'] + target_columns_metadata: List[OrderedDict] = [] + for column_index in range(outputs_length): + column_name = "{0}{1}_{2}".format(cls.metadata.query()['name'], primitiveNo, column_index) + column_metadata = OrderedDict() + semantic_types = set() + semantic_types.add(hyperparams["return_semantic_type"]) + column_metadata['semantic_types'] = list(semantic_types) + + column_metadata["name"] = str(column_name) + target_columns_metadata.append(column_metadata) + + return target_columns_metadata + + def _write(self, inputs: Inputs): + inputs.to_csv(str(time.time()) + '.csv') + + + + + def _system_wise_detection(self,X,method_type,window_size,contamination): + systemIds = X.system_id.unique() + groupedX = X.groupby(X.system_id) + + transformed_X = [] + if(method_type=="max"): + maxOutlierScorePerSystemList = [] + for systemId in systemIds: + systemDf = groupedX.get_group(systemId) + maxOutlierScorePerSystemList.append(np.max(systemDf["value_0"].values)) + + ranking = np.sort(maxOutlierScorePerSystemList) + threshold = ranking[int((1 - contamination) * len(ranking))] + self.threshold = threshold + mask = (maxOutlierScorePerSystemList >= threshold) + ranking[mask] = 1 + ranking[np.logical_not(mask)] = 0 + for iter in range(len(systemIds)): + transformed_X.append([systemIds[iter],ranking[iter]]) + + if (method_type == "avg"): + maxOutlierScorePerSystemList = [] + for systemId in systemIds: + systemDf = groupedX.get_group(systemId) + maxOutlierScorePerSystemList.append(np.mean(np.abs(systemDf["value_0"].values))) + + ranking = np.sort(maxOutlierScorePerSystemList) + threshold = ranking[int((1 - contamination) * len(ranking))] + self.threshold = threshold + mask = (maxOutlierScorePerSystemList >= threshold) + ranking[mask] = 1 + ranking[np.logical_not(mask)] = 0 + for iter in range(len(systemIds)): + transformed_X.append([systemIds[iter], ranking[iter]]) + + + return transformed_X + + + + diff --git a/tods/resources/.entry_points.ini b/tods/resources/.entry_points.ini index a3767f2..f51c82a 100644 --- a/tods/resources/.entry_points.ini +++ b/tods/resources/.entry_points.ini @@ -79,3 +79,6 @@ tods.detection_algorithm.telemanom = tods.detection_algorithm.Telemanom:Telemano tods.detection_algorithm.Ensemble = tods.detection_algorithm.Ensemble:Ensemble tods.reinforcement.rule_filter = tods.reinforcement.RuleBasedFilter:RuleBasedFilter + + +tods.detection_algorithm.system_wise_detection = tods.detection_algorithm.SystemWiseDetection:SystemWiseDetectionPrimitive \ No newline at end of file From c36004451805cf65e7aa2d2c0d828df84a0e1808 Mon Sep 17 00:00:00 2001 From: Devesh Kumar Date: Tue, 29 Sep 2020 08:12:39 -0500 Subject: [PATCH 2/2] System Wise detection added Former-commit-id: e4302b53012091459b225210d586ba093e837480 [formerly 4aaa7e334e3670203277fde693162e92a4cf9d35] [formerly 6f539539faa232b1f6c1a539c2fc0636ad31058d [formerly f88384aa0a8126b80882c17a24ae14d61025c876]] [formerly 9d6ef41d61d601e97f10c4e76d29120cecf65efd [formerly af05d0388a4383f969a72fdd1d97733e2a8eff77] [formerly 98a735f03051db87738b6182e40ba1250fce07fe [formerly 4d7c25c4286af60d41c3ef2d973894c57019e521]]] [formerly 83fb6a49f3c2b8c242b764fbb3951ac7ba148926 [formerly a8357e514ed743af0fd3a179b7fcdb65192bf4a9] [formerly 8d2001a47aeb430f1d1d4893e8202da809bd32cd [formerly 4da78f5507915fb21d44d060b411b53f4496b54e]] [formerly 2e5ca864e36caa4bf63d71272cb6a104fcf96bde [formerly 15d36e9ea0f6fcb203f8b5a8ebf50aabf5d8f33b] [formerly 9f3c47c7b1489ac97ef329b18f3d1142107417bc [formerly cbfccef799011cecbb1593a7f10c8425254366ed]]]] [formerly 6eaafdda6d0f87ec3f9c6bdd400d431212f81caf [formerly 5943f5a18d08b2ef584f95ca077a5f40ed74fd2c] [formerly 4a72e151d4751d18dd414c735d2b93adf82cc8b0 [formerly dea240cb7cf33f99d5316f7ab6c7faec9667b0eb]] [formerly 832b5a2819689cafdf14c3009b8be5b5fc6fed70 [formerly b59d6be50aa90336a420323b092596d625f525ec] [formerly b6e28bbdf39ce981c92dc3118cb476226bb1623f [formerly 4d2c50b21206b52aaa993e10ca7277c0990532f5]]] [formerly e4b2e4ad8f7ac6c516dc3a300661411659bb2694 [formerly dcb324a2027077657b6fa653ac559a8ffcd504b0] [formerly 0d3d65d68b064f119f59620c7b15c64b2046a44a [formerly 7cd9cec3f0d1d3f8283386e9b7a97ee2f86dcfb3]] [formerly 5ea92c8534a1cc4f546d7f61acf3b0fd45dc3c73 [formerly 77f44ab71678f1fcec159e641f861e48906bd23a] [formerly 4b866c4ce539335b39a4b7c55589be1fa8f1737f [formerly 53a6a05c6fa8af9375374da99d4dd870e2d03edd]]]]] [formerly e309d0478ef1df407339c1e95df86e8cf0e30d63 [formerly 5e7023951a6bae3900bc52e078d7e2f7b68de4db] [formerly 5211ed45f021801b4eccb943dd7c275b9ae7be0a [formerly f95a8296075f1c1af00a821094e64b3460499190]] [formerly fcfeed2bad85c1af622dc838dd23f194a98a2873 [formerly 86004bdf7f6a0de1171dd3206a129827fd86184e] [formerly ec61732826c6334943b19a4cba3065569cbebeb5 [formerly a1d353f7fcc0ffb9ec7b4b34ff1737ec54b6925b]]] [formerly 2c7e5539438d15a3d86416bab4dd41cec502bc64 [formerly 4d69729004cf788bcf43c884cc93042125f687b8] [formerly ccb35644eb05984c55c4f421de8490e2627984da [formerly 5f7775ed4285ffe3e66a155e727bc76db6e03c0d]] [formerly 5d250477e36092648e51df1b606f4c808dc2a77b [formerly d169bd2ad41b8c8f7f4e819ac5a840cb614e1a73] [formerly 0d0c9f6b03057eb52accb2db6e9e3823839f021d [formerly 5487a767bf398bb866590d7c53feb2ce256ce27f]]]] [formerly 79af600ff94dbc2cb149aea473ea09882439db00 [formerly f14c51011228ed0e4ef21e89900b1ca8390c3d8d] [formerly 258d1abf33cb88fe1b4fce32bbab9ac19fbcc03a [formerly 5928649d9155ef83a217c063de46dc079aa17c79]] [formerly 59c329f4b01ab8384185fd694dbaa2cda616eded [formerly 77391acc479ca7346b17bb31ef9c7957da4eb72c] [formerly de7b6992b77cdbe74882588043eff754f0d19102 [formerly 77d5517d4215c006abe2d1370463cfa983478ff9]]] [formerly 5cdaaecda44c680e2a8a5444191a27f66e47f7da [formerly 077c1200fa0d943c144d0e330efe9ae69b63be45] [formerly 6b3e082e3fc0d85d6786b25408e068b8389fe3d7 [formerly 6d547b0ea9620bc3b483c7c677a3e365d40cb8e3]] [formerly 7d4e1f6bd549ba644fa0881a015f29ba20317b23 [formerly 3f9f8bf1e98d096814fe40207b07a16d8bc050a4] [formerly 3ab33e4e0a225de440dfb5acb2a8a1e1746e7345 [formerly 15c823899c80e0f56762f36037c061649cb95d18]]]]]] Former-commit-id: 7d581a11ae023631ac1cf46034f25c1dab12a958 [formerly 8e7111350dd185b524cc3054a3019ee107fea0e6] [formerly e470c42ba3e5387bc62170d506d4d2c1140604af [formerly 04e47379117cca2eac6f0dda8e12ec8133d461e0]] [formerly 9492392d985595878306401aff9084158daa941f [formerly f7baa50dacafe68372e093799dbe4be1da7638e5] [formerly ea939f8031b667fa7a498c740ea85cf9506c9fd7 [formerly 499e5145084a9359c29cd29f16e230a89bc779b7]]] [formerly 9e31983ac792b01db00e28f52d25795be6a6675b [formerly 50823179fca9f453893364acb3f32891c0f82acd] [formerly 8e53f8ffb30f851954cc2d77c27fb05eb2a6e828 [formerly 1b79318b09503dc74c51f34a0a097f3b5f24eeff]] [formerly 5057c32d67ede6c06bf1b27105da611d21b9fe1c [formerly 8c89e00fc70a46ceb758984904e5b5cf994e9257] [formerly f7ac6d7a450e873b6816411a307f80517f19b43c [formerly 44e943805dc5b3ae29b75c31753f9075d455cf60]]]] [formerly e63ff23c4b761af8884cf0a39b3b2dd121baf947 [formerly d769223bc370b065a8fd694e2d8c727ca08ce142] [formerly e560da815613e0ad438c334ea9cf5fb6f2fcd515 [formerly c493de91c119d4ddc5d911d43acf2deaf8178f94]] [formerly a91671bb4490b9db3bc8beb1156e0db62654bb88 [formerly a19cc66d9978428972e648655be6bc6db9752daa] [formerly a61ccd69958257e351d10665b5d087476291e0d8 [formerly f738fbeca6592684e22b21c22614e3f3c82f1e7a]]] [formerly 163b1d74e1265eea978861789d95184194b84db0 [formerly ebbf736e533fcaf98ef2184797cbd566a891fdad] [formerly 4f973dd70a339853fbbb9c3de946917b5f7cce84 [formerly 7ecc4fb1d5073c63b8ab04a57d7a039ffcd6d903]] [formerly 5f369aa799b1bc0252ea17361e7664b3f136fa26 [formerly 7ba6e4367d74f807a75ae61221895502e8800618] [formerly 3ab33e4e0a225de440dfb5acb2a8a1e1746e7345]]]] Former-commit-id: 4bf5312685ac1e81a43d78699ea906e9e9e6af71 [formerly ed91c6d8a1a0c6eb5d5c0f0b1207f2a83bc7c4ff] [formerly 3ea0046108dc66ef66a66540d38b586d719e129a [formerly 84bfff70217c36dc6bf65981bc2e77681588bc69]] [formerly 520135ee9765a414470af3fade617f92ef16d0a7 [formerly 186c07386d43be2b52d2ed66459b1ea22f8f5a76] [formerly 20a947321569ab2f8d0c1e97f2d23131f8d1d77c [formerly de476ee28c8908cd872766fce04fc8824358c9c3]]] [formerly 4130e0a401904d12774831a1b25cdad09e29a322 [formerly 1c82429fa596438094969643105328d88fc06e76] [formerly 2a8a9e3be83297f4eb9cab1c63a40e152ee75b93 [formerly 0120401179eb347d2ae1d397d69e87cb943a7e08]] [formerly 10d866fe7177973257b90753d2350b6ccb7c34a3 [formerly 0a7d02b841e47dbe455b5a61f6edfd337ebcd57b] [formerly fc8c90a9376a73e18cd104c42d22261908663063 [formerly 29c5b1f03d7eb42dc0f3bc17f78951e1cdad818a]]]] Former-commit-id: 684b9d1af033eec77c562fb9cadf72efd2ffb366 [formerly 99ed1e909ede98259f00b68ad6da9c5abf1aab64] [formerly e7b2f2759cfb65bc64b912141acbf7375e4a3251 [formerly 09261016819cf039b6cff3185d6fc547eba7aa46]] [formerly 9c854f8bf887f1b87babf71ab550184a00a74ee5 [formerly a90c5f05a1edce6527e9dd1103b36549893f4dc4] [formerly 77b126bf28439c1aca1444d7c19136bffcb2e2b5 [formerly 6a7e79785726dd1ce2fd733b15c732cd74fea4b0]]] Former-commit-id: 48133573d756ad5ef6d81316f7c79db9e884cfcf [formerly 50a7456017ecd50481ea6945211b00180150039c] [formerly b28a5351535dfaae12a3439b9e8ddc3344b2bec5 [formerly e0d06b8061e7de8b0c4e79a79c201cc873695534]] Former-commit-id: 679a59d059f47623a7c649332569af47b285e6e5 [formerly e4bb5749072c6b19592c0eca750d46c41fb37126] Former-commit-id: 3022920ffe453a5433d74c13cc039de9c095d919 --- tods/detection_algorithm/SystemWiseDetection.py | 102 ++++++++++++++++++++++-- 1 file changed, 96 insertions(+), 6 deletions(-) diff --git a/tods/detection_algorithm/SystemWiseDetection.py b/tods/detection_algorithm/SystemWiseDetection.py index 3cb122d..d505522 100644 --- a/tods/detection_algorithm/SystemWiseDetection.py +++ b/tods/detection_algorithm/SystemWiseDetection.py @@ -38,13 +38,13 @@ class Hyperparams(hyperparams.Hyperparams): #Tuning Parameter #default -1 considers entire time series is considered - window_size = hyperparams.Hyperparameter(default=-1, semantic_types=[ + window_size = hyperparams.Hyperparameter(default=10, semantic_types=[ 'https://metadata.datadrivendiscovery.org/types/TuningParameter', ], description="Window Size for decomposition") method_type = hyperparams.Enumeration( - values=['max', 'avg', 'majority_voting_sum'], - default='avg', + values=['max', 'avg', 'sliding_window_sum','majority_voting_sliding_window_sum','majority_voting_sliding_window_max'], + default='majority_voting_sliding_window_max', semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], description="The type of method used to find anomalous system", ) @@ -338,10 +338,13 @@ class SystemWiseDetectionPrimitive(transformer.TransformerPrimitiveBase[Inputs, transformed_X = [] if(method_type=="max"): + """ + Sytems are sorted based on maximum of reconstruction errors" + """ maxOutlierScorePerSystemList = [] for systemId in systemIds: systemDf = groupedX.get_group(systemId) - maxOutlierScorePerSystemList.append(np.max(systemDf["value_0"].values)) + maxOutlierScorePerSystemList.append(np.max(np.abs(systemDf["value_0"].values))) ranking = np.sort(maxOutlierScorePerSystemList) threshold = ranking[int((1 - contamination) * len(ranking))] @@ -353,10 +356,40 @@ class SystemWiseDetectionPrimitive(transformer.TransformerPrimitiveBase[Inputs, transformed_X.append([systemIds[iter],ranking[iter]]) if (method_type == "avg"): - maxOutlierScorePerSystemList = [] + """ + Sytems are sorted based on average of reconstruction errors" + """ + avgOutlierScorePerSystemList = [] + for systemId in systemIds: + systemDf = groupedX.get_group(systemId) + avgOutlierScorePerSystemList.append(np.mean(np.abs(systemDf["value_0"].values))) + + ranking = np.sort(avgOutlierScorePerSystemList) + threshold = ranking[int((1 - contamination) * len(ranking))] + self.threshold = threshold + mask = (avgOutlierScorePerSystemList >= threshold) + ranking[mask] = 1 + ranking[np.logical_not(mask)] = 0 + for iter in range(len(systemIds)): + transformed_X.append([systemIds[iter], ranking[iter]]) + + if (method_type == "sliding_window_sum"): + """ + Sytems are sorted based on max of max of reconstruction errors in each window" + """ + OutlierScorePerSystemList = [] for systemId in systemIds: systemDf = groupedX.get_group(systemId) - maxOutlierScorePerSystemList.append(np.mean(np.abs(systemDf["value_0"].values))) + column_value = systemDf["value_0"].values + column_score = np.zeros(len(column_value)) + for iter in range(window_size - 1, len(column_value)): + sequence = column_value[iter - window_size + 1:iter + 1] + column_score[iter] = np.sum(np.abs(sequence)) + column_score[:window_size - 1] = column_score[window_size - 1] + OutlierScorePerSystemList.append(column_score.tolist()) + OutlierScorePerSystemList = np.asarray(OutlierScorePerSystemList) + + maxOutlierScorePerSystemList = OutlierScorePerSystemList.max(axis=1).tolist() ranking = np.sort(maxOutlierScorePerSystemList) threshold = ranking[int((1 - contamination) * len(ranking))] @@ -367,6 +400,63 @@ class SystemWiseDetectionPrimitive(transformer.TransformerPrimitiveBase[Inputs, for iter in range(len(systemIds)): transformed_X.append([systemIds[iter], ranking[iter]]) + if (method_type == "majority_voting_sliding_window_sum"): + """ + Sytem with most vote based on max of sum of reconstruction errors in each window + """ + OutlierScorePerSystemList = [] + for systemId in systemIds: + systemDf = groupedX.get_group(systemId) + column_value = systemDf["value_0"].values + column_score = np.zeros(len(column_value)) + for iter in range(window_size - 1, len(column_value)): + sequence = column_value[iter - window_size + 1:iter + 1] + column_score[iter] = np.sum(np.abs(sequence)) + column_score[:window_size - 1] = column_score[window_size - 1] + OutlierScorePerSystemList.append(column_score.tolist()) + OutlierScorePerSystemList = np.asarray(OutlierScorePerSystemList) + OutlierScorePerSystemList = ( + OutlierScorePerSystemList == OutlierScorePerSystemList.max(axis=0)[None, :]).astype(int) + + maxOutlierScorePerSystemList = OutlierScorePerSystemList.sum(axis=1).tolist() + + ranking = np.sort(maxOutlierScorePerSystemList) + threshold = ranking[int((1 - contamination) * len(ranking))] + self.threshold = threshold + mask = (maxOutlierScorePerSystemList >= threshold) + ranking[mask] = 1 + ranking[np.logical_not(mask)] = 0 + for iter in range(len(systemIds)): + transformed_X.append([systemIds[iter], ranking[iter]]) + + if (method_type == "majority_voting_sliding_window_max"): + """ + Sytem with most vote based on max of max of reconstruction errors in each window + """ + OutlierScorePerSystemList = [] + for systemId in systemIds: + systemDf = groupedX.get_group(systemId) + column_value = systemDf["value_0"].values + column_score = np.zeros(len(column_value)) + for iter in range(window_size - 1, len(column_value)): + sequence = column_value[iter - window_size + 1:iter + 1] + column_score[iter] = np.max(np.abs(sequence)) + column_score[:window_size - 1] = column_score[window_size - 1] + OutlierScorePerSystemList.append(column_score.tolist()) + OutlierScorePerSystemList = np.asarray(OutlierScorePerSystemList) + OutlierScorePerSystemList = ( + OutlierScorePerSystemList == OutlierScorePerSystemList.max(axis=0)[None, :]).astype(int) + + maxOutlierScorePerSystemList = OutlierScorePerSystemList.sum(axis=1).tolist() + + ranking = np.sort(maxOutlierScorePerSystemList) + threshold = ranking[int((1 - contamination) * len(ranking))] + self.threshold = threshold + mask = (maxOutlierScorePerSystemList >= threshold) + ranking[mask] = 1 + ranking[np.logical_not(mask)] = 0 + for iter in range(len(systemIds)): + transformed_X.append([systemIds[iter], ranking[iter]]) return transformed_X