From 61c030d8c3db03e5189f7a01daaf2ad7bf128b9a Mon Sep 17 00:00:00 2001 From: lhenry15 Date: Fri, 11 Sep 2020 11:49:09 -0500 Subject: [PATCH] remove tods.tods Former-commit-id: 7cb88a69a68c04403f1ad38dbb08f6b17b2d170f [formerly 4fbd90802eae3316a96aa4bf95ff3f08d30f379b] [formerly a4072ea7094aa792f7aeeada4d85e40777c1865a [formerly 5c24887cf3d42e6c6ca592387ab6736a030a21f3]] [formerly 4b0cbd4882029d1e4da9793d6ae040004fa98345 [formerly b72b1f128b9e5afebbbece8b343639d9480aaa1a] [formerly 39effff470166aed418e1083038ae1fae9a7e2ce [formerly 4c3166687e77d8dd760c2c71efa4a87fd6c3b780]]] [formerly 01845fef1d9993743708a7df33aa216c585e864e [formerly a25c508e36ef88de4080aa9b353d3aa1516b6f19] [formerly 23aea11bc257016e06f2cd0abb5f704c728cf5b0 [formerly cccfcf20cc73b99a4c10015f202d8fa3c86d843d]] [formerly d60527716ec959016008c3ba01fa644448f92a75 [formerly 326dfa8756199c49079896dc9dbbab800493f88c] [formerly d5c57ba2c3233861db0ab80e0e1bfd6be5d02990 [formerly 3e1df44ae071a09ff5c2f1fb23819890edc0b41a]]]] [formerly bd4002462b15529ea586528889f1e12cba99160e [formerly 76520b849a3511ef17c23ff5c34bdfa38ecfc56b] [formerly 4b6510df801a405862f48157115165b75022df0d [formerly 6c1508b4dad4cec387c27a59cf878fc4d27e8547]] [formerly 71e67dfe8c711bfdbf896312a70cf21e78d9d22e [formerly 63568e03d3b8c6bf5cbd72db4b9993a63faf44ea] [formerly 234615cae4cc559a011fa435043b8a462cd677ab [formerly 50c3747e01d1f784c1797d22ae427ac0a6347c8f]]] [formerly 13db11b9f78374a0433b5b9ee16f43769bb43a1e [formerly 566af3af9f347e36efd68a2d7233ffc0df77967b] [formerly 7a806c065a8ad621215c39e5aca7c13c02ba26f9 [formerly edf19d2c2b1284354b8ac2f53a589fe9bc1bddb5]] [formerly 43010c8e765ac2fc7afbb1f91113e98f2e0216e9 [formerly c5f1ccd1a39f5b5ca73376bb889976ea6f79b8d9] [formerly 6fa1dcd3333aaa1a79700fcc8ace8d96a125de46 [formerly 779904c5e50aa0b0f914581ac8720e20a0ae31a9]]]]] [formerly 9a80608818a5c943e6e978cd44aa83d4d0d18a03 [formerly ec4b6112d991f1d771c523cae906ceccd53056b0] [formerly c9a4a284dce362b116aab1a930416eb12f875273 [formerly 59199600cf3ab559b78562b70c47840645d8b27b]] [formerly 211167cc2c3e63656318d2aca79b77cfac22ecc4 [formerly fe17b2918b279c4ecf94546e0fa03120a0ef6ba2] [formerly db333f25396f44fe19570ffed701a5e2978808bc [formerly 1d96d6dca1a854ec7152ff7e36376a799805317f]]] [formerly 079a69d32be1e3eed2997fc1316e117e76a831bb [formerly 3760a03b12522d686534c706d3028cbbf94b53e8] [formerly 58cc12cf88d0f96c6f28fc5142672e21acf309d8 [formerly 0924ab138612352e31fb800d5b7d7149d403003f]] [formerly 32c805da5faa592eb8ecd5283936414194b35fdc [formerly 4a2e368457c12be86b3f859942566e832d6fc4a2] [formerly 640052a30a997091554220cedcd28da1bc8a3669 [formerly 6f7f3fd3ca967f83999d30bcfd1cbdcb4538bc73]]]] [formerly 54cae887179b185ae020a06d7fcccd2aabab8531 [formerly 0cef27877e27faa6d8feff26da079868642131cb] [formerly f8958f1c361cd9fd3cc5d227cc29953b1079ff58 [formerly 56c2956321ff7dfcbcaea28273d1470588e84833]] [formerly e0822d0997c72f25c0f0f47ae145e5be29b4d320 [formerly b0bf0adabfdcded6fc07085f58ec9c08eadc9702] [formerly fe1263a49949de2186e5441158cce1ed20a5483e [formerly 6eb85bb89e10c087632f410df25ef82d3586be68]]] [formerly 1f9d9c1f7832d8b9e06cc1cf13142c621bbd9c3d [formerly 095645611ff6f2404c2243c282cd73d715f2afb2] [formerly b34dbe366d8d9d57deac48900484ebf0dab0ad00 [formerly 669d68519e7d2c108e39573a57748e94865ad20b]] [formerly 9765a4d1b8986d7370678ad36c26ab24a0e1ff86 [formerly 17f53bc31fb0d7f847bc20d9da9d0081a2c1beda] [formerly 200acbc208d15af881cbf8251252c23f7a90a1db [formerly cde865f1018a083d114d2bdaf020b913c10da19e]]]]]] Former-commit-id: e835ee8ec08ef0ba9a1a16b29299ad2562ef36cc [formerly 30b4428dd37226d661b4193db579cf73c316e3ce] [formerly 5510247f6c2232f1e378ec204e8dbfde9d5f5200 [formerly 591f9ca63765ed0c2f4de9b112d7b725cba12591]] [formerly 148024a4cda0c59ceff9a5264ea459d471958d60 [formerly c10110df470e4dd82c706006941fd714173c518c] [formerly cf33661baeedf4353507f647dd099c19a7ee3eeb [formerly 516307fffba89541b0e8768a97ed97c60bebf4bf]]] [formerly 1a8b3f39342c5f5fbbb4a5ff3f0148c219448362 [formerly 6866f6336d6eea387a68c29533596467b7b9d762] [formerly 1343917840e257da1297bbdd0d35e8eeed8f7cf2 [formerly 5db877536f170da1cbf0019fd7cbcec781c0961c]] [formerly a7d013a4e14a353342d444bda0306dc7c6ed297e [formerly 9065cc957701b3d87c2ca835ade098dd4ecf9523] [formerly 44e2c84e31df2abf22820b5f153cb4b0a45859cb [formerly 084adca9baa1c41bd10c3e77a9ea343c0e74ab92]]]] [formerly 1c4e744bd326fe138c5e26220bacded14a466bb2 [formerly 65dc6a8966e68749eac53fffe9b54fc28221a484] [formerly e5550bb39cfba21a217da03b306aed3eb04da94e [formerly 8c079c16e230bb018a76f4a4938e8c4f991107e5]] [formerly 2804559b5551a14f93a752d86e68b64e66f909f5 [formerly 7efdf1f086cad97b5fc252558e57059475d264c4] [formerly 4c3c43f2d521af837612318fac49d0df713f3f08 [formerly 07b3ddb27d881a95d27d243e76d288b86549a8f5]]] [formerly 506d6767dd0aaa2573ef34ad6dea80a5db1f0dbe [formerly ebe6b24b53ad066120bc4da6a8138ac7cd41917a] [formerly 01b0d06b2ef7d574ceb22b35f7514d48a4c839e8 [formerly 5822e7553f024e6dab8955893bb5609498b9989e]] [formerly 810f29d1128e48976c45a1caee549c7c383d65c5 [formerly dbcfddfd9d944ba1620f52bc7143fff15ae1f29f] [formerly 200acbc208d15af881cbf8251252c23f7a90a1db]]]] Former-commit-id: 2df65b96098b973eab57f6bd41daddf1b3c8df74 [formerly 961c540335ba908f575ce27c153e30f0efd00596] [formerly 66b08b2be7574e43e877a60e974e3a83ed9cb7c4 [formerly bf187f2c911acd125cc4794e749f644bfe639137]] [formerly 02328c287b10cb6888d0461d815d8bc785281d91 [formerly 4c29ca48e98a2557bbea86af8af2c45cffd63ab1] [formerly 58b2e2aa82c35fcd4188bdcfa1e1e131edec687b [formerly 638dc88abf3f937f5d94ba4a7066e4ad438feae0]]] [formerly ac0958fb89cdb8788c3e5a622a4cd2fced4bc88b [formerly 95ffb01e204dbc718955df03f2b241a1f079a4a2] [formerly 2d01f29ff6bf5af72adacab47502faad8f2bd712 [formerly 6604f92cd04146ee35cb85407c6ede9234a62725]] [formerly 6d3e65855dfa51499555a7e6a166bd2b4f7ee77b [formerly becbed13266f57cdf7392ed159b07425c9cbd5a1] [formerly 507fe748eefcbb5f97d5cf283ecad0b841251ce4 [formerly 724ed612cf9c94365751b9226d6d982bb93bf170]]]] Former-commit-id: 7ea96e15cd723ec64bb6d7d15b2f5ae195438f68 [formerly 5e51128f61756a163c202d21f99370f081f211e0] [formerly 76c0516e2d833ec3745d0dc515aee8a60b467b02 [formerly 269715a9c6f5e869a1aac3bf4d495338d1fb7ef4]] [formerly 1a1bd17701e1ecbd951581bdf2c70c25d1872afd [formerly f74a4429930bb6b483b083a4049dc0ab1bd7aadf] [formerly f80c3cb1c531c12904d2ce280276591166dea562 [formerly 2fc66bed46ddf37655df95ecfbfa2e4ee198cd13]]] Former-commit-id: 888b5285930486cbe9611c8e24f325960b2a08ca [formerly 7f5d30fd01ce49e487e715b9c8460abe92d0f854] [formerly 63d138a389b38fdd105d9f45e5fb61f6904fc56f [formerly 844b3a734402aea29a917a7916f48a91e01b5410]] Former-commit-id: 94cd36463a10a0f89c437ea42606bd397960daa2 [formerly 21e92189f21b1ee1df801bba291fa1737be71ff1] Former-commit-id: e5d657ac16f5b59988bd5eda5f00d78ac7cc7d83 --- tods/tods/__init__.py | 0 tods/tods/resources/default_pipeline.json | 1 - tods/tods/schemas.py | 10 - tods/tods/search/__init__.py | 1 - tods/tods/search/brute_force_search.py | 292 ------------------------------ tods/tods/utils.py | 59 ------ 6 files changed, 363 deletions(-) delete mode 100644 tods/tods/__init__.py delete mode 100644 tods/tods/resources/default_pipeline.json delete mode 100644 tods/tods/schemas.py delete mode 100644 tods/tods/search/__init__.py delete mode 100644 tods/tods/search/brute_force_search.py delete mode 100644 tods/tods/utils.py diff --git a/tods/tods/__init__.py b/tods/tods/__init__.py deleted file mode 100644 index e69de29..0000000 diff --git a/tods/tods/resources/default_pipeline.json b/tods/tods/resources/default_pipeline.json deleted file mode 100644 index bff2c98..0000000 --- a/tods/tods/resources/default_pipeline.json +++ /dev/null @@ -1 +0,0 @@ -{"id": "384bbfab-4f6d-4001-9f90-684ea5681f5d", "schema": "https://metadata.datadrivendiscovery.org/schemas/v0/pipeline.json", "created": "2020-09-09T23:40:01.756164Z", "inputs": [{"name": "inputs"}], "outputs": [{"data": "steps.7.produce", "name": "output predictions"}], "steps": [{"type": "PRIMITIVE", "primitive": {"id": "4b42ce1e-9b98-4a25-b68e-fad13311eb65", "version": "0.3.0", "python_path": "d3m.primitives.data_transformation.dataset_to_dataframe.Common", "name": "Extract a DataFrame from a Dataset", "digest": "b94ee59ccf8db678d506adddbc238fb2049fb664a1e3f3f3f6a6517c0c4f8e5f"}, "arguments": {"inputs": {"type": "CONTAINER", "data": "inputs.0"}}, "outputs": [{"id": "produce"}]}, {"type": "PRIMITIVE", "primitive": {"id": "d510cb7a-1782-4f51-b44c-58f0236e47c7", "version": "0.6.0", "python_path": "d3m.primitives.data_transformation.column_parser.Common", "name": "Parses strings into their types", "digest": "256f0155c7185d747b3b23096e46c40d15844106f9ed6346453f6010891f1896"}, "arguments": {"inputs": {"type": "CONTAINER", "data": "steps.0.produce"}}, "outputs": [{"id": "produce"}]}, {"type": "PRIMITIVE", "primitive": {"id": "4503a4c6-42f7-45a1-a1d4-ed69699cf5e1", "version": "0.4.0", "python_path": "d3m.primitives.data_transformation.extract_columns_by_semantic_types.Common", "name": "Extracts columns by semantic type", "digest": "85fe81066e85dbb62eacbe8a96be52d08e7aec22a025a29c81feaaaa72d7f7d0"}, "arguments": {"inputs": {"type": "CONTAINER", "data": "steps.1.produce"}}, "outputs": [{"id": "produce"}], "hyperparams": {"semantic_types": {"type": "VALUE", "data": ["https://metadata.datadrivendiscovery.org/types/Attribute"]}}}, {"type": "PRIMITIVE", "primitive": {"id": "4503a4c6-42f7-45a1-a1d4-ed69699cf5e1", "version": "0.4.0", "python_path": "d3m.primitives.data_transformation.extract_columns_by_semantic_types.Common", "name": "Extracts columns by semantic type", "digest": "85fe81066e85dbb62eacbe8a96be52d08e7aec22a025a29c81feaaaa72d7f7d0"}, "arguments": {"inputs": {"type": "CONTAINER", "data": "steps.0.produce"}}, "outputs": [{"id": "produce"}], "hyperparams": {"semantic_types": {"type": "VALUE", "data": ["https://metadata.datadrivendiscovery.org/types/TrueTarget"]}}}, {"type": "PRIMITIVE", "primitive": {"id": "642de2e7-5590-3cab-9266-2a53c326c461", "version": "0.0.1", "python_path": "d3m.primitives.tods.timeseries_processing.transformation.axiswise_scaler", "name": "Axis_wise_scale"}, "arguments": {"inputs": {"type": "CONTAINER", "data": "steps.2.produce"}}, "outputs": [{"id": "produce"}]}, {"type": "PRIMITIVE", "primitive": {"id": "eaff2f35-978c-4530-a12e-061a5f0beacd", "version": "0.1.0", "python_path": "d3m.primitives.tods.feature_analysis.statistical_mean", "name": "Time Series Decompostional", "digest": "2f2a8c07878643fe29c346096b91b5ba91477baa1e7e78684f07e53d29766ca4"}, "arguments": {"inputs": {"type": "CONTAINER", "data": "steps.4.produce"}}, "outputs": [{"id": "produce"}]}, {"type": "PRIMITIVE", "primitive": {"id": "67e7fcdf-d645-3417-9aa4-85cd369487d9", "version": "0.0.1", "python_path": "d3m.primitives.tods.detection_algorithm.pyod_vae", "name": "TODS.anomaly_detection_primitives.VariationalAutoEncoder"}, "arguments": {"inputs": {"type": "CONTAINER", "data": "steps.5.produce"}}, "outputs": [{"id": "produce"}]}, {"type": "PRIMITIVE", "primitive": {"id": "8d38b340-f83f-4877-baaa-162f8e551736", "version": "0.3.0", "python_path": "d3m.primitives.data_transformation.construct_predictions.Common", "name": "Construct pipeline predictions output", "digest": "d5384857f75090844f367504befb1a854e5088589f6aae0795f66ccf10403e19"}, "arguments": {"inputs": {"type": "CONTAINER", "data": "steps.6.produce"}, "reference": {"type": "CONTAINER", "data": "steps.1.produce"}}, "outputs": [{"id": "produce"}]}], "digest": "8d969800816d9596e94cb045aacce43dc3d49e8c5bedb403e35af6c9b8339990"} diff --git a/tods/tods/schemas.py b/tods/tods/schemas.py deleted file mode 100644 index 02d76ad..0000000 --- a/tods/tods/schemas.py +++ /dev/null @@ -1,10 +0,0 @@ -import os - -resource_dir = os.path.dirname(__file__) - -DEFAULT_PIPELINE_DIR = os.path.join(resource_dir, 'resources', 'default_pipeline.json') - -def load_default_pipeline(): - from axolotl.utils import pipeline as pipeline_utils - pipeline = pipeline_utils.load_pipeline(DEFAULT_PIPELINE_DIR) - return pipeline diff --git a/tods/tods/search/__init__.py b/tods/tods/search/__init__.py deleted file mode 100644 index 179c117..0000000 --- a/tods/tods/search/__init__.py +++ /dev/null @@ -1 +0,0 @@ -from .brute_force_search import BruteForceSearch diff --git a/tods/tods/search/brute_force_search.py b/tods/tods/search/brute_force_search.py deleted file mode 100644 index ac875e1..0000000 --- a/tods/tods/search/brute_force_search.py +++ /dev/null @@ -1,292 +0,0 @@ -# A Brute-Force Search -import uuid -import random - -from d3m.metadata.pipeline import Pipeline - -from axolotl.algorithms.base import PipelineSearchBase -from axolotl.utils import schemas as schemas_utils - -class BruteForceSearch(PipelineSearchBase): - def __init__(self, problem_description, backend, *, primitives_blocklist=None, ranking_function=None): - super().__init__(problem_description=problem_description, backend=backend, - primitives_blocklist=primitives_blocklist, ranking_function=ranking_function) - if self.ranking_function is None: - self.ranking_function = _rank_first_metric - - # Find the candidates - self.task_description = schemas_utils.get_task_description(self.problem_description['problem']['task_keywords']) - self.available_pipelines = self._return_pipelines( - self.task_description['task_type'], self.task_description['task_subtype'], self.task_description['data_types']) - - self.metrics = self.problem_description['problem']['performance_metrics'] - self.data_preparation_pipeline = _generate_data_preparation_pipeline() - self.scoring_pipeline = _generate_scoring_pipeline() - self.data_preparation_params = _generate_data_preparation_params() - - self.current_pipeline_index = 0 - self.offset = 1 - - def evaluate(self, pipeline_to_eval, input_data=None): - if input_data is None: - input_data = self.input_data - pipeline_result = self.backend.evaluate_pipeline( - problem_description=self.problem_description, - pipeline=pipeline_to_eval, - input_data=input_data, - metrics=self.metrics, - data_preparation_pipeline=self.data_preparation_pipeline, - scoring_pipeline=self.scoring_pipeline, - data_preparation_params=self.data_preparation_params) - - return pipeline_result - - def _search(self, time_left): - # Read all the pipelines to be evaluated - pipelines_to_eval = self.available_pipelines[self.current_pipeline_index: self.current_pipeline_index+self.offset] - self.current_pipeline_index += 1 - - pipeline_results = self.backend.evaluate_pipelines( - problem_description=self.problem_description, - pipelines=pipelines_to_eval, - input_data=self.input_data, - metrics=self.metrics, - data_preparation_pipeline=self.data_preparation_pipeline, - scoring_pipeline=self.scoring_pipeline, - data_preparation_params=self.data_preparation_params) - - # DEBUG - #################### - for pipeline_result in pipeline_results: - try: - for error in pipeline_result.error: - if error is not None: - raise error - except: - import traceback - traceback.print_exc() - #################### - - return [self.ranking_function(pipeline_result) for pipeline_result in pipeline_results] - - def _return_pipelines(self, task_type, task_subtype, data_type): - pipeline_candidates = _generate_pipelines(primitive_python_paths) - return pipeline_candidates - -primitive_python_paths = { - 'data_processing': [ - #'d3m.primitives.tods.data_processing.time_interval_transform', - #'d3m.primitives.tods.data_processing.categorical_to_binary', - 'd3m.primitives.tods.data_processing.column_filter', - #'d3m.primitives.tods.data_processing.timestamp_validation', - #'d3m.primitives.tods.data_processing.duplication_validation', - #'d3m.primitives.tods.data_processing.continuity_validation', - ], - 'timeseries_processing': [ - 'd3m.primitives.tods.timeseries_processing.transformation.axiswise_scaler', - 'd3m.primitives.tods.timeseries_processing.transformation.standard_scaler', - 'd3m.primitives.tods.timeseries_processing.transformation.power_transformer', - 'd3m.primitives.tods.timeseries_processing.transformation.quantile_transformer', - 'd3m.primitives.tods.timeseries_processing.transformation.moving_average_transform', - 'd3m.primitives.tods.timeseries_processing.transformation.simple_exponential_smoothing', - #'d3m.primitives.tods.timeseries_processing.transformation.holt_smoothing', - #'d3m.primitives.tods.timeseries_processing.transformation.holt_winters_exponential_smoothing', - #'d3m.primitives.tods.timeseries_processing.decomposition.time_series_seasonality_trend_decomposition', - ], - 'feature_analysis': [ - #'d3m.primitives.tods.feature_analysis.auto_correlation', - 'd3m.primitives.tods.feature_analysis.statistical_mean', - 'd3m.primitives.tods.feature_analysis.statistical_median', - 'd3m.primitives.tods.feature_analysis.statistical_g_mean', - 'd3m.primitives.tods.feature_analysis.statistical_abs_energy', - 'd3m.primitives.tods.feature_analysis.statistical_abs_sum', - 'd3m.primitives.tods.feature_analysis.statistical_h_mean', - 'd3m.primitives.tods.feature_analysis.statistical_maximum', - #'d3m.primitives.tods.feature_analysis.statistical_minimum', - #'d3m.primitives.tods.feature_analysis.statistical_mean_abs', - #'d3m.primitives.tods.feature_analysis.statistical_mean_abs_temporal_derivative', - #'d3m.primitives.tods.feature_analysis.statistical_mean_temporal_derivative', - #'d3m.primitives.tods.feature_analysis.statistical_median_abs_deviation', - #'d3m.primitives.tods.feature_analysis.statistical_kurtosis', - #'d3m.primitives.tods.feature_analysis.statistical_skew', - #'d3m.primitives.tods.feature_analysis.statistical_std', - #'d3m.primitives.tods.feature_analysis.statistical_var', - #'d3m.primitives.tods.feature_analysis.statistical_variation', - #'d3m.primitives.tods.feature_analysis.statistical_vec_sum', - #'d3m.primitives.tods.feature_analysis.statistical_willison_amplitude', - #'d3m.primitives.tods.feature_analysis.statistical_zero_crossing', - #'d3m.primitives.tods.feature_analysis.spectral_residual_transform', - #'d3m.primitives.tods.feature_analysis.fast_fourier_transform', - #'d3m.primitives.tods.feature_analysis.discrete_cosine_transform', - #'d3m.primitives.tods.feature_analysis.non_negative_matrix_factorization', - #'d3m.primitives.tods.feature_analysis.bk_filter', - #'d3m.primitives.tods.feature_analysis.hp_filter', - #'d3m.primitives.tods.feature_analysis.truncated_svd', - #'d3m.primitives.tods.feature_analysis.wavelet_transform', - #'d3m.primitives.tods.feature_analysis.trmf', - ], - 'detection_algorithm': [ - 'd3m.primitives.tods.detection_algorithm.pyod_ae', - 'd3m.primitives.tods.detection_algorithm.pyod_vae', - 'd3m.primitives.tods.detection_algorithm.pyod_cof', - 'd3m.primitives.tods.detection_algorithm.pyod_sod', - 'd3m.primitives.tods.detection_algorithm.pyod_abod', - 'd3m.primitives.tods.detection_algorithm.pyod_hbos', - 'd3m.primitives.tods.detection_algorithm.pyod_iforest', - #'d3m.primitives.tods.detection_algorithm.pyod_lof', - #'d3m.primitives.tods.detection_algorithm.pyod_knn', - #'d3m.primitives.tods.detection_algorithm.pyod_ocsvm', - #'d3m.primitives.tods.detection_algorithm.pyod_loda', - #'d3m.primitives.tods.detection_algorithm.pyod_cblof', - #'d3m.primitives.tods.detection_algorithm.pyod_sogaal', - #'d3m.primitives.tods.detection_algorithm.pyod_mogaal', - #'d3m.primitives.tods.detection_algorithm.matrix_profile', - #'d3m.primitives.tods.detection_algorithm.AutoRegODetector', - #'d3m.primitives.tods.detection_algorithm.LSTMODetector', - #'d3m.primitives.tods.detection_algorithm.AutoRegODetector', - #'d3m.primitives.tods.detection_algorithm.PCAODetector', - #'d3m.primitives.tods.detection_algorithm.KDiscordODetector', - #'d3m.primitives.tods.detection_algorithm.deeplog', - #'d3m.primitives.tods.detection_algorithm.telemanom', - ] -} - - -def _rank_first_metric(pipeline_result): - if pipeline_result.status == 'COMPLETED': - scores = pipeline_result.scores - pipeline_result.rank = -scores['value'][0] - return pipeline_result - else: - # error - pipeline_result.rank = 1 - return pipeline_result - -def _generate_data_preparation_params(): - from axolotl.utils import schemas as schemas_utils - data_preparation_params = schemas_utils.DATA_PREPARATION_PARAMS['no_split'] - return data_preparation_params - -def _generate_scoring_pipeline(): - from axolotl.utils import schemas as schemas_utils - scoring_pipeline = schemas_utils.get_scoring_pipeline() - return scoring_pipeline - -def _generate_data_preparation_pipeline(): - from axolotl.utils import schemas as schemas_utils - data_preparation_pipeline = schemas_utils.get_splitting_pipeline("TRAINING_DATA") - return data_preparation_pipeline - -def _generate_pipline(combinations): - from d3m import index - from d3m.metadata.base import ArgumentType - from d3m.metadata.pipeline import Pipeline, PrimitiveStep - - piplines = [] - for combination in combinations: - # Creating pipeline - pipeline_description = Pipeline() - pipeline_description.add_input(name='inputs') - - # The first three steps are fixed - # Step 0: dataset_to_dataframe - step_0 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.data_transformation.dataset_to_dataframe.Common')) - step_0.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='inputs.0') - step_0.add_output('produce') - pipeline_description.add_step(step_0) - - # Step 1: column_parser - step_1 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.data_transformation.column_parser.Common')) - step_1.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce') - step_1.add_output('produce') - pipeline_description.add_step(step_1) - - # Step 2: extract_columns_by_semantic_types(attributes) - step_2 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.data_transformation.extract_columns_by_semantic_types.Common')) - step_2.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce') - step_2.add_output('produce') - step_2.add_hyperparameter(name='semantic_types', argument_type=ArgumentType.VALUE, - data=['https://metadata.datadrivendiscovery.org/types/Attribute']) - pipeline_description.add_step(step_2) - - # Step 3: extract_columns_by_semantic_types(targets) - step_3 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.data_transformation.extract_columns_by_semantic_types.Common')) - step_3.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce') - step_3.add_output('produce') - step_3.add_hyperparameter(name='semantic_types', argument_type=ArgumentType.VALUE, - data=['https://metadata.datadrivendiscovery.org/types/TrueTarget']) - pipeline_description.add_step(step_3) - - attributes = 'steps.2.produce' - targets = 'steps.3.produce' - - tods_step_4 = PrimitiveStep(primitive=index.get_primitive(combination[0])) - tods_step_4.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference=attributes) - tods_step_4.add_output('produce') - pipeline_description.add_step(tods_step_4) - - tods_step_5 = PrimitiveStep(primitive=index.get_primitive(combination[1])) - tods_step_5.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.4.produce') - tods_step_5.add_output('produce') - pipeline_description.add_step(tods_step_5) - - tods_step_6= PrimitiveStep(primitive=index.get_primitive(combination[2])) - tods_step_6.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.5.produce') - tods_step_6.add_output('produce') - pipeline_description.add_step(tods_step_6) - - #tods_step_7 = PrimitiveStep(primitive=index.get_primitive(combination[3])) - #tods_step_7.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.6.produce') - #tods_step_7.add_output('produce') - #pipeline_description.add_step(tods_step_7) - - # Finalize the pipeline - final_step = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.data_transformation.construct_predictions.Common')) - final_step.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.6.produce') - final_step.add_argument(name='reference', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce') - final_step.add_output('produce') - pipeline_description.add_step(final_step) - - pipeline_description.add_output(name='output predictions', data_reference='steps.7.produce') - - pipeline_description.id = str(uuid.uuid4()) - pipeline_description.created = Pipeline().created - - piplines.append(pipeline_description) - return piplines - -def _generate_pipelines(primitive_python_paths, cpu_count=40): - """ - Args: - primitive_python_paths: a list of primitive Python paths for algorithms - - Returns: - the pipline description json - """ - import itertools - import multiprocessing as mp - - #components = ['data_processing', 'timeseries_processing', 'feature_analysis', 'detection_algorithm'] - components = ['timeseries_processing', 'feature_analysis', 'detection_algorithm'] - combinations = itertools.product(*(primitive_python_paths[k] for k in components)) - - - return _generate_pipline(combinations) - #pipelines = [] - - ## Allocate tasks - #combination_each_core_list = [[] for i in range(cpu_count)] - #for idx, combination in enumerate(combinations): - # core = idx % cpu_count - # combination_each_core_list[core].append(combination) - - ## Obtain all the pipelines - #pool = mp.Pool(processes=cpu_count) - #results = [pool.apply_async(_generate_pipline, - # args=(combinations,)) - # for combinations in combination_each_core_list] - #piplines = [] - #for p in results: - # piplines.extend(p.get()) - - return piplines diff --git a/tods/tods/utils.py b/tods/tods/utils.py deleted file mode 100644 index f41bb03..0000000 --- a/tods/tods/utils.py +++ /dev/null @@ -1,59 +0,0 @@ - -def generate_dataset_problem(df, target_index, metric): - """ - A wrapper for generating dataset and problem - - Args: - df (pandas.DataFrame): dataset - target_index (int): The column index of the target - metric (str): `F1` for computing F1 on label 1, 'F1_MACRO` for - macro-F1 on both 0 and 1 - - returns: - dataset, problem - """ - from axolotl.utils import data_problem - from d3m.metadata.problem import TaskKeyword, PerformanceMetric - - if metric == 'F1': - performance_metrics = [{'metric': PerformanceMetric.F1, 'params': {'pos_label': '1'}}] - elif metric == 'F1_MACRO': - performance_metrics = [{'metric': PerformanceMetric.F1_MACRO, 'params': {}}] - else: - raise ValueError('The metric {} not supported.'.format(metric)) - - - dataset, problem_description = data_problem.generate_dataset_problem(df, - target_index=target_index, - task_keywords=[TaskKeyword.ANOMALY_DETECTION,], - performance_metrics=performance_metrics) - - return dataset, problem_description - -def evaluate_pipeline(problem_description, dataset, pipeline): - from axolotl.utils import schemas as schemas_utils - from axolotl.backend.simple import SimpleRunner - data_preparation_pipeline = schemas_utils.get_splitting_pipeline("TRAINING_DATA") - scoring_pipeline = schemas_utils.get_scoring_pipeline() - data_preparation_params = schemas_utils.DATA_PREPARATION_PARAMS['no_split'] - metrics = problem_description['problem']['performance_metrics'] - - backend = SimpleRunner(random_seed=0) - pipeline_result = backend.evaluate_pipeline(problem_description=problem_description, - pipeline=pipeline, - input_data=[dataset], - metrics=metrics, - data_preparation_pipeline=data_preparation_pipeline, - scoring_pipeline=scoring_pipeline, - data_preparation_params=data_preparation_params) - try: - for error in pipeline_result.error: - if error is not None: - raise error - except: - import traceback - traceback.print_exc() - - return pipeline_result - -