From 39892aabb6b7071699af648656cca7a454288cfd Mon Sep 17 00:00:00 2001 From: lhenry15 Date: Thu, 10 Sep 2020 01:26:28 -0500 Subject: [PATCH] change name of tods.tods to tods.searcher Former-commit-id: 8393f1343ee4ce7e6fae666023dcff93269437ac [formerly 2f09dc09d3325b0d4c1310619c257a7020f93df3] [formerly 6459af62ce8a0bcb05d33f93ed245ff8858cee0a [formerly c7c511c679f144daccc6c16ede8eefad3fadd3dd]] [formerly 962bd418d27b5a6cbf9a1fcf2de7c55074501832 [formerly 10bae4656ebb80601b7ab73779aec4b5bb35967e] [formerly 1f7f47172868d8d6523895217e5c5b35c88e29ef [formerly 985e2fd2a923334a4377e34c462a2e7fab920bad]]] [formerly 426c8044abeae142bd1237da1ad77023424b671c [formerly ca018191c818becde08e7630d5fbca6738752331] [formerly cb22635f517447a37689cdc579a3dd30913f677b [formerly 9a37cb878d23729b6212d352094f5ac1d85b364f]] [formerly 95a039741e0d034846a39a5a9b85d0f3c10e89f6 [formerly 4e44dba96f2159e16e2069e9df2be162a48ebbe0] [formerly 476343b2ff082227b7ca372159352d23c06800f8 [formerly b7d3a898d8769d0fb13852e201f6f455d3993623]]]] [formerly 5f9f1d4fa2d07e242ea7d02cd4511c2c721f4d0f [formerly 7373856571022ceb7e3deb488cd02b7e110b934f] [formerly ed2becf1cfb60b9dc08bc20cfd32bc9d5b95521e [formerly d1a792fe3c50d4f4df0f6e21c89142d905fc62c9]] [formerly 89390ba8d0d51b435639a67e1e7692ae4bd44264 [formerly 31079ea695ecdc6905c94fab8475b82b8e0a282e] [formerly 3fca4e6457e07a52ee46ab7071ca4002c5e769d1 [formerly 19895c69529ab7125ac2bf004da50e016f60aa86]]] [formerly f6f0cfd0d3cbb982c45a4ce6ad6bbacc0b03b698 [formerly 3eb287783840d8d6f077225a951d01bf2f618eee] [formerly 942e60784dc8b1202cca435fbf50a6601cbe3253 [formerly 9da85687f582bf09004604a0bb6d3a96c3590484]] [formerly 1ce6d5d9679ec6777b630398bdb1a90cee5f5105 [formerly 15d901d20f4ca63828aa25442b64ca29b0547afe] [formerly e7c7cb86b68a43bbb7dab59305dac8e24dce2645 [formerly b955f7f6a07b03f835ff6a9736dfc65c39628d71]]]]] [formerly 96c3d57ba2a7f4a8d1f8eb02cbbaf72bf9cfe3b0 [formerly b7bb5e8c9c8c74546b60f59e3207dd471180760d] [formerly 3b15a7261f333515e415a95a49540cd8a6179b50 [formerly 0c214acc35f534051ec2d657a366dc417626151e]] [formerly 3da333747e896f7d81ef8549d1f19a176cbac4fc [formerly 71c31d49dbc652fdbca6dddab8d94e6d7c306991] [formerly 67e89ad02e1fc129a6891868ff26005b23a9da4f [formerly c7c5d75512c3950bb0f772954ed8d7fabd276b6e]]] [formerly e83fe83221280c11049256f2525111c2664bfc0b [formerly e05023eebc4024b5f126f47333a5573555207cc7] [formerly b4493082c24c2b4a82e004ffca5f5e59ba8cbaed [formerly bcd0c5d46e362719452bb9d9c6258c5e012db592]] [formerly 4e9b99bee9b439ab855632c7954747075cb69b35 [formerly 2f7c556e4a06fcfc28aad2d2a15ace61b14d700e] [formerly 8145a743dd7c5b6a5be2bf402d4c4d95c5e287d6 [formerly d358d9207591d66d11e0d42dfc0d6ce53f2b37ad]]]] [formerly d35df56eb4b0e51f815480106c9e57e847079392 [formerly 95ea749c6572143824b0fe614658233202847b8f] [formerly b06b45949a37f6648c7dfa25a5f185f287b5e49f [formerly d1c5e31c89e50898dd2f906fcb7f1e33e46c5dc9]] [formerly be0353db0e30390f6f8d3f211a57597e90b725dc [formerly fc7795d1cda0a8e9ea42ff929d70cf75d658824d] [formerly 434997d9434af7d75ff5f07a4f3fca56d99f8669 [formerly 077868fc581147194f2dffe6ff4984333e2dca7e]]] [formerly 59735884e56cb1f300fae99a46870794e806ee89 [formerly 91232bec73a45084b903d22c52f5e48bc23c679f] [formerly 9853ad2d5aaa0bd080a951342ce5e8cdd90cab62 [formerly bc51d3dd54e3622dc73871f7121c06b36884efe5]] [formerly ac59740fd3c569734572fa1b735ad335f216e608 [formerly f7d7af859c7a9d598e46304afbd47b6f5c344eb7] [formerly 612849c2f67712ae970d87daecf5e1c93bb60265 [formerly 0ab7e381cc5a41e9d8d05434e11037a1802ebe73]]]]]] Former-commit-id: bc31415855ae8e8594f2727b1144e69b641ee7d5 [formerly 897aa81f633c710ab0bea2a5f2e3cd05e3d4b6c1] [formerly 3ad8fdc92d5efed87405e7b2439311844f9006b1 [formerly a645626d600df479f9a454d45cf30d663b83bdc0]] [formerly ca79927ae2695dbc50d76526f71924547ab2de0d [formerly c0353aab6ea4cd653f11b7f767ba151f71d98bd8] [formerly 9bf795f2f0f330d7e51d0db4a75b5ce09d20d460 [formerly d400dc70c26dfcb16df78c71b4f72d221885e4d6]]] [formerly 892f7940b9b236307cfb565f306cc83940c3d1f8 [formerly 4b85a391d044ae5157dd217aea9f792f7a29b387] [formerly f1a245b50fdc4dfe509121516bad116733583e23 [formerly eed0b96b06f57be473a9bf0aefd056bba563fc09]] [formerly 39f38d738697cf6c656844288bde34a2b8cc5b9a [formerly 48d624ad1616026b5cd0e8bb74eb70b667a18bbb] [formerly 9904a669925072f61c5a6430b8004340030adf7a [formerly 889ec8e3af7afc5c41c1becc18bd2449e4328537]]]] [formerly e98b8e5076cf67ad33c68edbdbe1051ae82f7599 [formerly 9338a3e165ae26c1664c01b798047bea8a5e405b] [formerly ad8bb4a8ade6f4f108f17b511c76e83385890748 [formerly 5e15f7041c84f803aaa0bbe2aa556fe403fb2fa1]] [formerly 47685cc5d74c0b47be0f0dd5de8902182a162242 [formerly 87ad6d28996002f4076ea86d29af93068753793e] [formerly f8872f5d8e91e2241ab982f752f50097e74a70ea [formerly fbc775274851a87105ab4ad222ab6c6319cf27d4]]] [formerly 7e3217b173132d8f5f76f9821323831928aa5a44 [formerly 727df8524d067e3b69a07d728ac5914edfe1f4e6] [formerly 351608565218d866c1a9df523c23db1207e33c5f [formerly 6987e5df4a249863d2c61381ba91430415c91f2b]] [formerly 00053c392effa4410e87fd12d8d5155af5b28838 [formerly 5f4b91456e85d74e2c35296160c72d06fcec9bca] [formerly 612849c2f67712ae970d87daecf5e1c93bb60265]]]] Former-commit-id: 1d2db740408449057b0f1665417374986ebeb0a4 [formerly 0bc612341755e685d1fa916dd45e89beebcd965d] [formerly 2ef904820d782855eb6c6cdc869ffff57e12ba29 [formerly b79c9a540a73e3d2c9be52c6506dc2d5dc2c3c70]] [formerly 423a699757365728f888b337d71324e79c78c4b2 [formerly 81b990c9de1840c23c4fc52a013ef0ef94872628] [formerly bce0dc63eef711f0c67abdfc58a9b6ee68deea56 [formerly 2202d9ac06c14353f0f89be4a4a5c43d5a77eb80]]] [formerly 8d4d2751f0d019758c2ddba923143b6537a9508f [formerly 9d774ce2488bda8b76679f9bd7f4faa986ee1732] [formerly 38d91d8a76dc31539606b24da5e06407c809e04a [formerly d7fa49e88bfd84c04f6e6eaa015b713994979006]] [formerly aa334d23e11898d1a1857db7a054c7e631843d5c [formerly 576ff1da209f6144626f53504f2b9994f1b976ba] [formerly 3acda2ccd001bf38b3c0f102fb4f18eb9204216a [formerly 082260484516eabbdf9e3762f4cdc4030aeafc31]]]] Former-commit-id: 601afd5dfb6828f3305df0b03f4fe9bf4254aafb [formerly 45c0ee02b4f36302635639dbea1b8a5ef41086c7] [formerly a3ad5522f65f361fd2149169f562a3f133daea47 [formerly 5da1fe7c4e9fa42ba9106a23437fcc7545a21dd9]] [formerly 2018fd5e5e551ac912d1ff7ee78fcb3c22cfe5e4 [formerly ff4640bb3a3e16dec00aa9b08e160c2d08b18b67] [formerly eea7ceadb1f92297cc558d1be2c8010fd64550fa [formerly 5ea304a132cec69e26ffb7420b8e0c71858cdf63]]] Former-commit-id: d6a8e4729ae9fb5cac4c0f6c64c98adb76111116 [formerly e0ed5cddc0057e6d45adee6be56ec48d67e980d6] [formerly 318fed267476560521a7fa92ae14c734b8255e38 [formerly 724249cab2ada3b968476b718de74ec063a896f1]] Former-commit-id: 85469cec0d4cf83495793e5ddadf1b422ac84d06 [formerly c9dccd3bf7f549d56f884399d65d4e8a3335ce6a] Former-commit-id: 38cb31fab29109086e1fd284e06cade0108a9e85 --- tods/searcher/__init__.py | 0 tods/searcher/resources/default_pipeline.json | 1 + tods/searcher/schemas.py | 10 + tods/searcher/search/__init__.py | 1 + tods/searcher/search/brute_force_search.py | 292 ++++++++++++++++++++++++++ tods/searcher/tods/utils.py | 59 ++++++ tods/searcher/utils.py | 51 +++++ 7 files changed, 414 insertions(+) create mode 100644 tods/searcher/__init__.py create mode 100644 tods/searcher/resources/default_pipeline.json create mode 100644 tods/searcher/schemas.py create mode 100644 tods/searcher/search/__init__.py create mode 100644 tods/searcher/search/brute_force_search.py create mode 100644 tods/searcher/tods/utils.py create mode 100644 tods/searcher/utils.py diff --git a/tods/searcher/__init__.py b/tods/searcher/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tods/searcher/resources/default_pipeline.json b/tods/searcher/resources/default_pipeline.json new file mode 100644 index 0000000..bff2c98 --- /dev/null +++ b/tods/searcher/resources/default_pipeline.json @@ -0,0 +1 @@ +{"id": "384bbfab-4f6d-4001-9f90-684ea5681f5d", "schema": "https://metadata.datadrivendiscovery.org/schemas/v0/pipeline.json", "created": "2020-09-09T23:40:01.756164Z", "inputs": [{"name": "inputs"}], "outputs": [{"data": "steps.7.produce", "name": "output predictions"}], "steps": [{"type": "PRIMITIVE", "primitive": {"id": "4b42ce1e-9b98-4a25-b68e-fad13311eb65", "version": "0.3.0", "python_path": "d3m.primitives.data_transformation.dataset_to_dataframe.Common", "name": "Extract a DataFrame from a Dataset", "digest": "b94ee59ccf8db678d506adddbc238fb2049fb664a1e3f3f3f6a6517c0c4f8e5f"}, "arguments": {"inputs": {"type": "CONTAINER", "data": "inputs.0"}}, "outputs": [{"id": "produce"}]}, {"type": "PRIMITIVE", "primitive": {"id": "d510cb7a-1782-4f51-b44c-58f0236e47c7", "version": "0.6.0", "python_path": "d3m.primitives.data_transformation.column_parser.Common", "name": "Parses strings into their types", "digest": "256f0155c7185d747b3b23096e46c40d15844106f9ed6346453f6010891f1896"}, "arguments": {"inputs": {"type": "CONTAINER", "data": "steps.0.produce"}}, "outputs": [{"id": "produce"}]}, {"type": "PRIMITIVE", "primitive": {"id": "4503a4c6-42f7-45a1-a1d4-ed69699cf5e1", "version": "0.4.0", "python_path": "d3m.primitives.data_transformation.extract_columns_by_semantic_types.Common", "name": "Extracts columns by semantic type", "digest": "85fe81066e85dbb62eacbe8a96be52d08e7aec22a025a29c81feaaaa72d7f7d0"}, "arguments": {"inputs": {"type": "CONTAINER", "data": "steps.1.produce"}}, "outputs": [{"id": "produce"}], "hyperparams": {"semantic_types": {"type": "VALUE", "data": ["https://metadata.datadrivendiscovery.org/types/Attribute"]}}}, {"type": "PRIMITIVE", "primitive": {"id": "4503a4c6-42f7-45a1-a1d4-ed69699cf5e1", "version": "0.4.0", "python_path": "d3m.primitives.data_transformation.extract_columns_by_semantic_types.Common", "name": "Extracts columns by semantic type", "digest": "85fe81066e85dbb62eacbe8a96be52d08e7aec22a025a29c81feaaaa72d7f7d0"}, "arguments": {"inputs": {"type": "CONTAINER", "data": "steps.0.produce"}}, "outputs": [{"id": "produce"}], "hyperparams": {"semantic_types": {"type": "VALUE", "data": ["https://metadata.datadrivendiscovery.org/types/TrueTarget"]}}}, {"type": "PRIMITIVE", "primitive": {"id": "642de2e7-5590-3cab-9266-2a53c326c461", "version": "0.0.1", "python_path": "d3m.primitives.tods.timeseries_processing.transformation.axiswise_scaler", "name": "Axis_wise_scale"}, "arguments": {"inputs": {"type": "CONTAINER", "data": "steps.2.produce"}}, "outputs": [{"id": "produce"}]}, {"type": "PRIMITIVE", "primitive": {"id": "eaff2f35-978c-4530-a12e-061a5f0beacd", "version": "0.1.0", "python_path": "d3m.primitives.tods.feature_analysis.statistical_mean", "name": "Time Series Decompostional", "digest": "2f2a8c07878643fe29c346096b91b5ba91477baa1e7e78684f07e53d29766ca4"}, "arguments": {"inputs": {"type": "CONTAINER", "data": "steps.4.produce"}}, "outputs": [{"id": "produce"}]}, {"type": "PRIMITIVE", "primitive": {"id": "67e7fcdf-d645-3417-9aa4-85cd369487d9", "version": "0.0.1", "python_path": "d3m.primitives.tods.detection_algorithm.pyod_vae", "name": "TODS.anomaly_detection_primitives.VariationalAutoEncoder"}, "arguments": {"inputs": {"type": "CONTAINER", "data": "steps.5.produce"}}, "outputs": [{"id": "produce"}]}, {"type": "PRIMITIVE", "primitive": {"id": "8d38b340-f83f-4877-baaa-162f8e551736", "version": "0.3.0", "python_path": "d3m.primitives.data_transformation.construct_predictions.Common", "name": "Construct pipeline predictions output", "digest": "d5384857f75090844f367504befb1a854e5088589f6aae0795f66ccf10403e19"}, "arguments": {"inputs": {"type": "CONTAINER", "data": "steps.6.produce"}, "reference": {"type": "CONTAINER", "data": "steps.1.produce"}}, "outputs": [{"id": "produce"}]}], "digest": "8d969800816d9596e94cb045aacce43dc3d49e8c5bedb403e35af6c9b8339990"} diff --git a/tods/searcher/schemas.py b/tods/searcher/schemas.py new file mode 100644 index 0000000..02d76ad --- /dev/null +++ b/tods/searcher/schemas.py @@ -0,0 +1,10 @@ +import os + +resource_dir = os.path.dirname(__file__) + +DEFAULT_PIPELINE_DIR = os.path.join(resource_dir, 'resources', 'default_pipeline.json') + +def load_default_pipeline(): + from axolotl.utils import pipeline as pipeline_utils + pipeline = pipeline_utils.load_pipeline(DEFAULT_PIPELINE_DIR) + return pipeline diff --git a/tods/searcher/search/__init__.py b/tods/searcher/search/__init__.py new file mode 100644 index 0000000..179c117 --- /dev/null +++ b/tods/searcher/search/__init__.py @@ -0,0 +1 @@ +from .brute_force_search import BruteForceSearch diff --git a/tods/searcher/search/brute_force_search.py b/tods/searcher/search/brute_force_search.py new file mode 100644 index 0000000..ac875e1 --- /dev/null +++ b/tods/searcher/search/brute_force_search.py @@ -0,0 +1,292 @@ +# A Brute-Force Search +import uuid +import random + +from d3m.metadata.pipeline import Pipeline + +from axolotl.algorithms.base import PipelineSearchBase +from axolotl.utils import schemas as schemas_utils + +class BruteForceSearch(PipelineSearchBase): + def __init__(self, problem_description, backend, *, primitives_blocklist=None, ranking_function=None): + super().__init__(problem_description=problem_description, backend=backend, + primitives_blocklist=primitives_blocklist, ranking_function=ranking_function) + if self.ranking_function is None: + self.ranking_function = _rank_first_metric + + # Find the candidates + self.task_description = schemas_utils.get_task_description(self.problem_description['problem']['task_keywords']) + self.available_pipelines = self._return_pipelines( + self.task_description['task_type'], self.task_description['task_subtype'], self.task_description['data_types']) + + self.metrics = self.problem_description['problem']['performance_metrics'] + self.data_preparation_pipeline = _generate_data_preparation_pipeline() + self.scoring_pipeline = _generate_scoring_pipeline() + self.data_preparation_params = _generate_data_preparation_params() + + self.current_pipeline_index = 0 + self.offset = 1 + + def evaluate(self, pipeline_to_eval, input_data=None): + if input_data is None: + input_data = self.input_data + pipeline_result = self.backend.evaluate_pipeline( + problem_description=self.problem_description, + pipeline=pipeline_to_eval, + input_data=input_data, + metrics=self.metrics, + data_preparation_pipeline=self.data_preparation_pipeline, + scoring_pipeline=self.scoring_pipeline, + data_preparation_params=self.data_preparation_params) + + return pipeline_result + + def _search(self, time_left): + # Read all the pipelines to be evaluated + pipelines_to_eval = self.available_pipelines[self.current_pipeline_index: self.current_pipeline_index+self.offset] + self.current_pipeline_index += 1 + + pipeline_results = self.backend.evaluate_pipelines( + problem_description=self.problem_description, + pipelines=pipelines_to_eval, + input_data=self.input_data, + metrics=self.metrics, + data_preparation_pipeline=self.data_preparation_pipeline, + scoring_pipeline=self.scoring_pipeline, + data_preparation_params=self.data_preparation_params) + + # DEBUG + #################### + for pipeline_result in pipeline_results: + try: + for error in pipeline_result.error: + if error is not None: + raise error + except: + import traceback + traceback.print_exc() + #################### + + return [self.ranking_function(pipeline_result) for pipeline_result in pipeline_results] + + def _return_pipelines(self, task_type, task_subtype, data_type): + pipeline_candidates = _generate_pipelines(primitive_python_paths) + return pipeline_candidates + +primitive_python_paths = { + 'data_processing': [ + #'d3m.primitives.tods.data_processing.time_interval_transform', + #'d3m.primitives.tods.data_processing.categorical_to_binary', + 'd3m.primitives.tods.data_processing.column_filter', + #'d3m.primitives.tods.data_processing.timestamp_validation', + #'d3m.primitives.tods.data_processing.duplication_validation', + #'d3m.primitives.tods.data_processing.continuity_validation', + ], + 'timeseries_processing': [ + 'd3m.primitives.tods.timeseries_processing.transformation.axiswise_scaler', + 'd3m.primitives.tods.timeseries_processing.transformation.standard_scaler', + 'd3m.primitives.tods.timeseries_processing.transformation.power_transformer', + 'd3m.primitives.tods.timeseries_processing.transformation.quantile_transformer', + 'd3m.primitives.tods.timeseries_processing.transformation.moving_average_transform', + 'd3m.primitives.tods.timeseries_processing.transformation.simple_exponential_smoothing', + #'d3m.primitives.tods.timeseries_processing.transformation.holt_smoothing', + #'d3m.primitives.tods.timeseries_processing.transformation.holt_winters_exponential_smoothing', + #'d3m.primitives.tods.timeseries_processing.decomposition.time_series_seasonality_trend_decomposition', + ], + 'feature_analysis': [ + #'d3m.primitives.tods.feature_analysis.auto_correlation', + 'd3m.primitives.tods.feature_analysis.statistical_mean', + 'd3m.primitives.tods.feature_analysis.statistical_median', + 'd3m.primitives.tods.feature_analysis.statistical_g_mean', + 'd3m.primitives.tods.feature_analysis.statistical_abs_energy', + 'd3m.primitives.tods.feature_analysis.statistical_abs_sum', + 'd3m.primitives.tods.feature_analysis.statistical_h_mean', + 'd3m.primitives.tods.feature_analysis.statistical_maximum', + #'d3m.primitives.tods.feature_analysis.statistical_minimum', + #'d3m.primitives.tods.feature_analysis.statistical_mean_abs', + #'d3m.primitives.tods.feature_analysis.statistical_mean_abs_temporal_derivative', + #'d3m.primitives.tods.feature_analysis.statistical_mean_temporal_derivative', + #'d3m.primitives.tods.feature_analysis.statistical_median_abs_deviation', + #'d3m.primitives.tods.feature_analysis.statistical_kurtosis', + #'d3m.primitives.tods.feature_analysis.statistical_skew', + #'d3m.primitives.tods.feature_analysis.statistical_std', + #'d3m.primitives.tods.feature_analysis.statistical_var', + #'d3m.primitives.tods.feature_analysis.statistical_variation', + #'d3m.primitives.tods.feature_analysis.statistical_vec_sum', + #'d3m.primitives.tods.feature_analysis.statistical_willison_amplitude', + #'d3m.primitives.tods.feature_analysis.statistical_zero_crossing', + #'d3m.primitives.tods.feature_analysis.spectral_residual_transform', + #'d3m.primitives.tods.feature_analysis.fast_fourier_transform', + #'d3m.primitives.tods.feature_analysis.discrete_cosine_transform', + #'d3m.primitives.tods.feature_analysis.non_negative_matrix_factorization', + #'d3m.primitives.tods.feature_analysis.bk_filter', + #'d3m.primitives.tods.feature_analysis.hp_filter', + #'d3m.primitives.tods.feature_analysis.truncated_svd', + #'d3m.primitives.tods.feature_analysis.wavelet_transform', + #'d3m.primitives.tods.feature_analysis.trmf', + ], + 'detection_algorithm': [ + 'd3m.primitives.tods.detection_algorithm.pyod_ae', + 'd3m.primitives.tods.detection_algorithm.pyod_vae', + 'd3m.primitives.tods.detection_algorithm.pyod_cof', + 'd3m.primitives.tods.detection_algorithm.pyod_sod', + 'd3m.primitives.tods.detection_algorithm.pyod_abod', + 'd3m.primitives.tods.detection_algorithm.pyod_hbos', + 'd3m.primitives.tods.detection_algorithm.pyod_iforest', + #'d3m.primitives.tods.detection_algorithm.pyod_lof', + #'d3m.primitives.tods.detection_algorithm.pyod_knn', + #'d3m.primitives.tods.detection_algorithm.pyod_ocsvm', + #'d3m.primitives.tods.detection_algorithm.pyod_loda', + #'d3m.primitives.tods.detection_algorithm.pyod_cblof', + #'d3m.primitives.tods.detection_algorithm.pyod_sogaal', + #'d3m.primitives.tods.detection_algorithm.pyod_mogaal', + #'d3m.primitives.tods.detection_algorithm.matrix_profile', + #'d3m.primitives.tods.detection_algorithm.AutoRegODetector', + #'d3m.primitives.tods.detection_algorithm.LSTMODetector', + #'d3m.primitives.tods.detection_algorithm.AutoRegODetector', + #'d3m.primitives.tods.detection_algorithm.PCAODetector', + #'d3m.primitives.tods.detection_algorithm.KDiscordODetector', + #'d3m.primitives.tods.detection_algorithm.deeplog', + #'d3m.primitives.tods.detection_algorithm.telemanom', + ] +} + + +def _rank_first_metric(pipeline_result): + if pipeline_result.status == 'COMPLETED': + scores = pipeline_result.scores + pipeline_result.rank = -scores['value'][0] + return pipeline_result + else: + # error + pipeline_result.rank = 1 + return pipeline_result + +def _generate_data_preparation_params(): + from axolotl.utils import schemas as schemas_utils + data_preparation_params = schemas_utils.DATA_PREPARATION_PARAMS['no_split'] + return data_preparation_params + +def _generate_scoring_pipeline(): + from axolotl.utils import schemas as schemas_utils + scoring_pipeline = schemas_utils.get_scoring_pipeline() + return scoring_pipeline + +def _generate_data_preparation_pipeline(): + from axolotl.utils import schemas as schemas_utils + data_preparation_pipeline = schemas_utils.get_splitting_pipeline("TRAINING_DATA") + return data_preparation_pipeline + +def _generate_pipline(combinations): + from d3m import index + from d3m.metadata.base import ArgumentType + from d3m.metadata.pipeline import Pipeline, PrimitiveStep + + piplines = [] + for combination in combinations: + # Creating pipeline + pipeline_description = Pipeline() + pipeline_description.add_input(name='inputs') + + # The first three steps are fixed + # Step 0: dataset_to_dataframe + step_0 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.data_transformation.dataset_to_dataframe.Common')) + step_0.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='inputs.0') + step_0.add_output('produce') + pipeline_description.add_step(step_0) + + # Step 1: column_parser + step_1 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.data_transformation.column_parser.Common')) + step_1.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce') + step_1.add_output('produce') + pipeline_description.add_step(step_1) + + # Step 2: extract_columns_by_semantic_types(attributes) + step_2 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.data_transformation.extract_columns_by_semantic_types.Common')) + step_2.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce') + step_2.add_output('produce') + step_2.add_hyperparameter(name='semantic_types', argument_type=ArgumentType.VALUE, + data=['https://metadata.datadrivendiscovery.org/types/Attribute']) + pipeline_description.add_step(step_2) + + # Step 3: extract_columns_by_semantic_types(targets) + step_3 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.data_transformation.extract_columns_by_semantic_types.Common')) + step_3.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce') + step_3.add_output('produce') + step_3.add_hyperparameter(name='semantic_types', argument_type=ArgumentType.VALUE, + data=['https://metadata.datadrivendiscovery.org/types/TrueTarget']) + pipeline_description.add_step(step_3) + + attributes = 'steps.2.produce' + targets = 'steps.3.produce' + + tods_step_4 = PrimitiveStep(primitive=index.get_primitive(combination[0])) + tods_step_4.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference=attributes) + tods_step_4.add_output('produce') + pipeline_description.add_step(tods_step_4) + + tods_step_5 = PrimitiveStep(primitive=index.get_primitive(combination[1])) + tods_step_5.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.4.produce') + tods_step_5.add_output('produce') + pipeline_description.add_step(tods_step_5) + + tods_step_6= PrimitiveStep(primitive=index.get_primitive(combination[2])) + tods_step_6.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.5.produce') + tods_step_6.add_output('produce') + pipeline_description.add_step(tods_step_6) + + #tods_step_7 = PrimitiveStep(primitive=index.get_primitive(combination[3])) + #tods_step_7.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.6.produce') + #tods_step_7.add_output('produce') + #pipeline_description.add_step(tods_step_7) + + # Finalize the pipeline + final_step = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.data_transformation.construct_predictions.Common')) + final_step.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.6.produce') + final_step.add_argument(name='reference', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce') + final_step.add_output('produce') + pipeline_description.add_step(final_step) + + pipeline_description.add_output(name='output predictions', data_reference='steps.7.produce') + + pipeline_description.id = str(uuid.uuid4()) + pipeline_description.created = Pipeline().created + + piplines.append(pipeline_description) + return piplines + +def _generate_pipelines(primitive_python_paths, cpu_count=40): + """ + Args: + primitive_python_paths: a list of primitive Python paths for algorithms + + Returns: + the pipline description json + """ + import itertools + import multiprocessing as mp + + #components = ['data_processing', 'timeseries_processing', 'feature_analysis', 'detection_algorithm'] + components = ['timeseries_processing', 'feature_analysis', 'detection_algorithm'] + combinations = itertools.product(*(primitive_python_paths[k] for k in components)) + + + return _generate_pipline(combinations) + #pipelines = [] + + ## Allocate tasks + #combination_each_core_list = [[] for i in range(cpu_count)] + #for idx, combination in enumerate(combinations): + # core = idx % cpu_count + # combination_each_core_list[core].append(combination) + + ## Obtain all the pipelines + #pool = mp.Pool(processes=cpu_count) + #results = [pool.apply_async(_generate_pipline, + # args=(combinations,)) + # for combinations in combination_each_core_list] + #piplines = [] + #for p in results: + # piplines.extend(p.get()) + + return piplines diff --git a/tods/searcher/tods/utils.py b/tods/searcher/tods/utils.py new file mode 100644 index 0000000..f41bb03 --- /dev/null +++ b/tods/searcher/tods/utils.py @@ -0,0 +1,59 @@ + +def generate_dataset_problem(df, target_index, metric): + """ + A wrapper for generating dataset and problem + + Args: + df (pandas.DataFrame): dataset + target_index (int): The column index of the target + metric (str): `F1` for computing F1 on label 1, 'F1_MACRO` for + macro-F1 on both 0 and 1 + + returns: + dataset, problem + """ + from axolotl.utils import data_problem + from d3m.metadata.problem import TaskKeyword, PerformanceMetric + + if metric == 'F1': + performance_metrics = [{'metric': PerformanceMetric.F1, 'params': {'pos_label': '1'}}] + elif metric == 'F1_MACRO': + performance_metrics = [{'metric': PerformanceMetric.F1_MACRO, 'params': {}}] + else: + raise ValueError('The metric {} not supported.'.format(metric)) + + + dataset, problem_description = data_problem.generate_dataset_problem(df, + target_index=target_index, + task_keywords=[TaskKeyword.ANOMALY_DETECTION,], + performance_metrics=performance_metrics) + + return dataset, problem_description + +def evaluate_pipeline(problem_description, dataset, pipeline): + from axolotl.utils import schemas as schemas_utils + from axolotl.backend.simple import SimpleRunner + data_preparation_pipeline = schemas_utils.get_splitting_pipeline("TRAINING_DATA") + scoring_pipeline = schemas_utils.get_scoring_pipeline() + data_preparation_params = schemas_utils.DATA_PREPARATION_PARAMS['no_split'] + metrics = problem_description['problem']['performance_metrics'] + + backend = SimpleRunner(random_seed=0) + pipeline_result = backend.evaluate_pipeline(problem_description=problem_description, + pipeline=pipeline, + input_data=[dataset], + metrics=metrics, + data_preparation_pipeline=data_preparation_pipeline, + scoring_pipeline=scoring_pipeline, + data_preparation_params=data_preparation_params) + try: + for error in pipeline_result.error: + if error is not None: + raise error + except: + import traceback + traceback.print_exc() + + return pipeline_result + + diff --git a/tods/searcher/utils.py b/tods/searcher/utils.py new file mode 100644 index 0000000..e375354 --- /dev/null +++ b/tods/searcher/utils.py @@ -0,0 +1,51 @@ + +def generate_dataset_problem(df, target_index, metric): + """ + A wrapper for generating dataset and problem + + Args: + df (pandas.DataFrame): dataset + target_index (int): The column index of the target + metric (str): `F1` for computing F1 on label 1, 'F1_MACRO` for + macro-F1 on both 0 and 1 + + returns: + dataset, problem + """ + from axolotl.utils import data_problem + from d3m.metadata.problem import TaskKeyword, PerformanceMetric + + if metric == 'F1': + performance_metrics = [{'metric': PerformanceMetric.F1, 'params': {'pos_label': '1'}}] + elif metric == 'F1_MACRO': + performance_metrics = [{'metric': PerformanceMetric.F1_MACRO, 'params': {}}] + else: + raise ValueError('The metric {} not supported.'.format(metric)) + + + dataset, problem_description = data_problem.generate_dataset_problem(df, + target_index=target_index, + task_keywords=[TaskKeyword.ANOMALY_DETECTION,], + performance_metrics=performance_metrics) + + return dataset, problem_description + +def evaluate_pipeline(problem_description, dataset, pipeline): + from axolotl.utils import schemas as schemas_utils + from axolotl.backend.simple import SimpleRunner + data_preparation_pipeline = schemas_utils.get_splitting_pipeline("TRAINING_DATA") + scoring_pipeline = schemas_utils.get_scoring_pipeline() + data_preparation_params = schemas_utils.DATA_PREPARATION_PARAMS['no_split'] + metrics = problem_description['problem']['performance_metrics'] + + backend = SimpleRunner(random_seed=0) + pipeline_result = backend.evaluate_pipeline(problem_description=problem_description, + pipeline=pipeline, + input_data=[dataset], + metrics=metrics, + data_preparation_pipeline=data_preparation_pipeline, + scoring_pipeline=scoring_pipeline, + data_preparation_params=data_preparation_params) + return pipeline_result + +