From 8eef3a0baa632af1f44933f100ab33480ae4af5d Mon Sep 17 00:00:00 2001 From: lhenry15 Date: Wed, 3 Feb 2021 21:55:17 -0600 Subject: [PATCH] resolve construct prediction bug Former-commit-id: e09e96da62b5379935020fc7bd54ce060e715883 [formerly a8cb5982ed00de36db4ad9df5d35751c7ac601b6] [formerly ec7bbd0204c2078220a85fa0e412e8a81e2a9c1d [formerly c1b0617dda408a158e01ca9fa614ae0257e4aca5]] [formerly 8d51c3dc71eecf80528034689468f40d1e570cfe [formerly c9abb06f7fbcae929af4295e8e52b9a8af82e35a] [formerly 3a77611d4a0481472b19ae861324c5100ec8d3a2 [formerly beb337f107c7ae42e0eba4122af7aa1d0a54ec8e]]] [formerly 92c7972ca4613311982bfff5a59e700a9ef0fa13 [formerly c5e642630455308a0a2fc8eef9d0c0dddbd5d1da] [formerly d929ff85bd7a39f162915f5a4bb709f8a35d723b [formerly 3e436d432a1afdf9675c9e957baa7263f2afbc7c]] [formerly 7639c8a1e2c3e6c48c62dc8f75c397604e6d7780 [formerly c1b3c51db764daf88c03695225e9b059400f5283] [formerly f55464bfac450e45c8047b6d749103279d9f87a8 [formerly fc5e005ecb192c9dfd09b6f49da89fa855779bac]]]] [formerly 53b8e8d0d1aa9af1426cc862df07d70b794b62e9 [formerly 0e6a992a6062655ab4472f49243e48e066bf3a58] [formerly 4d6ea58f8614bee29e0970e48869ba18b88b4908 [formerly 571088207ade6b01a07d9c4dec0494fdc89a3805]] [formerly 2ad36dabade1e665adf8c477a4d00a654594e7f2 [formerly 75fc126cd1c5eb307ac98bb213d5b7b528bf7c2d] [formerly b307aaca22bfa8e853244870908e37d1d07fc008 [formerly a73468b0587f5da3f039ef3a1a87e1cea21cc929]]] [formerly 6a13942666ef4816ada53425b00636c0764b14ba [formerly a6d09fa80d79ffe2d3d1d5f3677cfcda532bd0ba] [formerly a8010c86ee9e4099ab4164cd795b4ff56dc48c97 [formerly 68144e3eed6a5e7c5dc6a207c85a81e4ec7eab38]] [formerly 848f3e7cdbed3b17e706febdf26a966082b77a9c [formerly a2c824d305eefe05d50f64f2bccfea27c6dd4567] [formerly a6577468a182e742d0928c43f36122148578b278 [formerly 5e237b61f1f357f862a13c5712791bddd691bf58]]]]] [formerly 3ed63112d36dcc9babce5dcf560bee7212154577 [formerly 1baf9e8af21f704a31c14bd1fcfcb7f8769f08b7] [formerly da7c68ac57e4d7922fce1d7ddc6faaabc6fe9af1 [formerly 147d719c6846a8e8868f0865987fd37d94758d00]] [formerly 54c5c07e6c80ba7504d0fb52df1879095c7dab19 [formerly 4ad822cf9880c3eb1a7af1f618050db84b029b87] [formerly db190aa1e03753e38e8d42560b7c51078bbc5636 [formerly 55e8dd24182814f78f95947e958608eaa4975911]]] [formerly e7da822bacbaea7a2d5765c2acb8606ad30edf11 [formerly 3464d83a01cd793ee9493f24a850cc39910cc9ab] [formerly 18c8f4f009b3bb6e275f67f523b3c17e785d1f98 [formerly 15a28c10537eb2ed3abe6fec48866a44a565c8ac]] [formerly fe817c3d7005c9ace50438c88bc91a736cb28355 [formerly edd625b115dadc4210a235b7ae9558c9ab26fc90] [formerly 4aa5a5468a68b011c5490a1c3324c1842916565a [formerly c276f626e89c85456e47a9922aa450bed49ba7fc]]]] [formerly 2bc7ff351894d18fa5df6f3a126627eb4a9aabc5 [formerly 8012cd11398104f9a1a0d333a8e5097807f9b191] [formerly e399702d7749c8c15279eec22c3301d6a110cadb [formerly 585f464185c0bc4c98181306be5c152ae598d055]] [formerly d367a3d9715bd0a1c755ab9225b7b74ab457e202 [formerly f9b66ea5e1f1d6a9aac61d95d9185fd355da9b70] [formerly 097a9e537836d9c673bf722f017e5f4efe0a19ae [formerly 2a2f4c0c01a9dfff36417809efcead66c6a2e6e3]]] [formerly cfc2bdb3411d0e6a8c782e980f9a71e8251b533e [formerly 89c9106d1f56a542d8b5485f67cc385bbf0697d4] [formerly 2b01232957b4eeb38318491b5e8201a0b27f49de [formerly 6c018ff4873bd3cb095f8e92a700a7b128e4f5bd]] [formerly 67a18efc65a48a247f349b76314ce904bc60c2a9 [formerly 045590cf2504018d423e2042108aeabb2ba540ca] [formerly 7e64f611030b3401f81ac255fae86a3ade51e064 [formerly 9bdc4fe6654ec4a60fb72f05916db7b502b1e6ac]]]]]] Former-commit-id: 73550fce0f620fa394ff503b21efdc8f08ca6f5c [formerly 634af6f3b94ead71c62c8750bb68e18f7cc685e2] [formerly d6aaa038897810c1d5d60ff24bcdc28551992ef1 [formerly 682bdc5673e3dd0e57f4d99fd0173d811892764c]] [formerly 03083e99971cd36d411c624ed793443f04505a41 [formerly bf83c73a28b482fb18fd3e7eb9a6182c7b243ec7] [formerly c2363b6a7a33238a4696b7fe3fcd19fa5558f600 [formerly cd1046fc880426bb51f7cf7787863c54ed771350]]] [formerly ee07c8ccd9e6fd32a936ef908f7e3234493f0434 [formerly 608cf89946eb2b1d0ccace3d4b48e8be4ca80036] [formerly 84204596fca541740504734b1607fa8ebbbdeb8a [formerly 70057be7709bd2808c4054186fcb02873aba843a]] [formerly 551af69bb0884c677de0a080e295c50285a8fc36 [formerly 91c21a445921eead142b93a0b3a9b8479a833e1d] [formerly c2fda8a1e767ddd12f4f678446fc21bf245116e2 [formerly 508383cffd40e12f31cc48657bdd8db6bb1d3795]]]] [formerly 963dcb939a96b96a7733f810c9ab965a2b53e986 [formerly 54735416d8676bea0dcb3b6a59b711bd86655c46] [formerly 30b6124a12e56912e9bb1deb3dcd58f7ca930772 [formerly 4053364617c2e6f42763e3a6a72fb4b3402a65a6]] [formerly 55753c5ef49a050e8b84c17d6fce57ab30ae8a38 [formerly 0e10bdc7fa3e7f229d6047ff38cefe9b8ef8fb0f] [formerly 407ac8e875f6a91abd5976a73eac7e8117504efa [formerly c3377650e87eb8f3cb4fa61df8cdf426e1a806e0]]] [formerly efd116f4b4bc97951a55bdce0a80c44732c3d055 [formerly bc5962224578ee63c3831cf881b6d174e486bba8] [formerly 0c0e6d431dda2db9b26dca63708352cc8e85557d [formerly 7d3bd74e7961b5dadf8e45f6a90f92ed8ee67d5f]] [formerly 5d5a66934155f98c53fe8c0e0928ec79484d0269 [formerly df92dd3f5f75da00037a7fece5e73be6fc9b1ead] [formerly 7e64f611030b3401f81ac255fae86a3ade51e064]]]] Former-commit-id: 34bba1d69b92b5eef4c8a9aed383c7f0d98bf540 [formerly 98114628698580a6b2352bf9890d91353b8ed9e4] [formerly d322125fc5f47e3eb51cdaeacdcbe65cd73f3a68 [formerly 84aaeeb8a9d25f4829cdf1839bbc80453d6feb5e]] [formerly aa96e46a9aa8fb99404a8cf3d3c32071be940773 [formerly 69b07ace14e5ac6efc52dc253b3f53c01c980714] [formerly cb41af137f022c0d78dd1e662ba20225c23c2ca8 [formerly 008d0ade6ee17d9676b8e1b83f8d89e3fc5d4a7b]]] [formerly 241d53490615ec45e8eb7531e9e4add4fc1f9ecb [formerly 6e2c83b4e2dbaeb20180d28bf99bf8ee2e518ff6] [formerly 5d6a805f2eec1f81908e4c6c9c02780db20a94d0 [formerly 2819c3ed8cc88b7d8737c20a78c2e4a235c11b6c]] [formerly 26a162b4e913c244d90906b75e1ad1de801a64d9 [formerly d29919bc4f880dc19e1343df906bf9ff3eaf0255] [formerly 8f226ed24a542710be96cd46742d84aad05cf5c4 [formerly 622231b2f85da90f0376dd426f069f855a58eec6]]]] Former-commit-id: 654b1d582ad9db167afe3a3ad3d0721b124ac0a7 [formerly db0d68e0927fee673c4fe99223bdda6565021e91] [formerly 53d73a7b4e8904ab484879ce91cdf71301962049 [formerly 0e46b0f61f4479523c95f041ab2dc5ad3a0aa7c9]] [formerly b231e002289c73461cff159676162b53af38d167 [formerly d5f2960b1e1379a8c13a1be2171b90920d46c055] [formerly bef506be8614a19a183892704881b80b460312dc [formerly 8c25f9e63fd19cec7c32c992ddd05ddd05a0574d]]] Former-commit-id: 02c292f318e88686dbb01a61c09ce0ff4b9b12fc [formerly 68b1db509b2810ebbf5e3a8238f53258e5e9bb7b] [formerly aae9c16fc7cd2822e9c4016915a848c06fa165c4 [formerly ad5347c30f7dbd3c8da9168e24988fc60873fa3c]] Former-commit-id: 66964b0c36278e89d174ce5f7505171fa25ff6e5 [formerly fd39d202bae95aeaaf0035bb7a80cf641bf7147f] Former-commit-id: af3b0a0a425a5a8af7d18720a6b89f7c63739165 --- .../script/build_AutoEncoder_pipeline.py | 10 +- .../script/build_Autocorrelation_pipeline.py | 70 ------- .../script/build_DeepLog_pipeline.py | 70 ------- .../example_pipelines/script/build_Ensemble.py | 72 ------- .../script/build_IsolationForest_pipline.py | 103 ---------- .../example_pipelines/script/build_LODA_pipline.py | 72 ------- .../script/build_MatrixProfile_pipeline.py | 70 ------- .../example_pipelines/script/build_SOD_pipeline.py | 70 ------- .../script/build_System_Wise_Detection_pipeline.py | 74 ------- examples/axolotl_interface/run_pipeline.py | 7 +- tods/tests/common/test_fixed_split.py | 148 ++++++++++++++ tods/tests/common/test_kfold_split.py | 100 +++++++++ tods/tests/common/test_kfold_timeseries_split.py | 223 +++++++++++++++++++++ tods/tests/common/test_no_split.py | 71 +++++++ tods/tests/common/test_redact_columns.py | 173 ++++++++++++++++ tods/tests/common/test_train_score_split.py | 130 ++++++++++++ 16 files changed, 855 insertions(+), 608 deletions(-) delete mode 100644 examples/axolotl_interface/example_pipelines/script/build_Autocorrelation_pipeline.py delete mode 100644 examples/axolotl_interface/example_pipelines/script/build_DeepLog_pipeline.py delete mode 100644 examples/axolotl_interface/example_pipelines/script/build_Ensemble.py delete mode 100644 examples/axolotl_interface/example_pipelines/script/build_IsolationForest_pipline.py delete mode 100644 examples/axolotl_interface/example_pipelines/script/build_LODA_pipline.py delete mode 100644 examples/axolotl_interface/example_pipelines/script/build_MatrixProfile_pipeline.py delete mode 100644 examples/axolotl_interface/example_pipelines/script/build_SOD_pipeline.py delete mode 100644 examples/axolotl_interface/example_pipelines/script/build_System_Wise_Detection_pipeline.py create mode 100644 tods/tests/common/test_fixed_split.py create mode 100644 tods/tests/common/test_kfold_split.py create mode 100644 tods/tests/common/test_kfold_timeseries_split.py create mode 100644 tods/tests/common/test_no_split.py create mode 100644 tods/tests/common/test_redact_columns.py create mode 100644 tods/tests/common/test_train_score_split.py diff --git a/examples/axolotl_interface/example_pipelines/script/build_AutoEncoder_pipeline.py b/examples/axolotl_interface/example_pipelines/script/build_AutoEncoder_pipeline.py index f6af364..79cc54f 100644 --- a/examples/axolotl_interface/example_pipelines/script/build_AutoEncoder_pipeline.py +++ b/examples/axolotl_interface/example_pipelines/script/build_AutoEncoder_pipeline.py @@ -10,19 +10,19 @@ pipeline_description = Pipeline() pipeline_description.add_input(name='inputs') # Step 0: dataset_to_dataframe -step_0 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.data_transformation.dataset_to_dataframe.Common')) +step_0 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.data_processing.dataset_to_dataframe')) step_0.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='inputs.0') step_0.add_output('produce') pipeline_description.add_step(step_0) # Step 1: column_parser -step_1 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.data_transformation.column_parser.Common')) +step_1 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.data_processing.column_parser')) step_1.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce') step_1.add_output('produce') pipeline_description.add_step(step_1) # Step 2: extract_columns_by_semantic_types(attributes) -step_2 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.data_transformation.extract_columns_by_semantic_types.Common')) +step_2 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.data_processing.extract_columns_by_semantic_types')) step_2.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce') step_2.add_output('produce') step_2.add_hyperparameter(name='semantic_types', argument_type=ArgumentType.VALUE, @@ -30,7 +30,7 @@ step_2.add_hyperparameter(name='semantic_types', argument_type=ArgumentType.VALU pipeline_description.add_step(step_2) # Step 3: extract_columns_by_semantic_types(targets) -step_3 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.data_transformation.extract_columns_by_semantic_types.Common')) +step_3 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.data_processing.extract_columns_by_semantic_types')) step_3.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce') step_3.add_output('produce') step_3.add_hyperparameter(name='semantic_types', argument_type=ArgumentType.VALUE, @@ -53,7 +53,7 @@ step_5.add_output('produce') pipeline_description.add_step(step_5) # Step 6: Predictions -step_6 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.data_transformation.construct_predictions.Common')) +step_6 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.data_processing.construct_predictions')) step_6.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.5.produce') step_6.add_argument(name='reference', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce') step_6.add_output('produce') diff --git a/examples/axolotl_interface/example_pipelines/script/build_Autocorrelation_pipeline.py b/examples/axolotl_interface/example_pipelines/script/build_Autocorrelation_pipeline.py deleted file mode 100644 index 4242e73..0000000 --- a/examples/axolotl_interface/example_pipelines/script/build_Autocorrelation_pipeline.py +++ /dev/null @@ -1,70 +0,0 @@ -from d3m import index -from d3m.metadata.base import ArgumentType -from d3m.metadata.pipeline import Pipeline, PrimitiveStep - -# -> dataset_to_dataframe -> column_parser -> extract_columns_by_semantic_types(attributes) -> imputer -> random_forest -# extract_columns_by_semantic_types(targets) -> ^ - -# Creating pipeline -pipeline_description = Pipeline() -pipeline_description.add_input(name='inputs') - -# Step 0: dataset_to_dataframe -step_0 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.data_transformation.dataset_to_dataframe.Common')) -step_0.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='inputs.0') -step_0.add_output('produce') -pipeline_description.add_step(step_0) - -# Step 1: column_parser -step_1 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.data_transformation.column_parser.Common')) -step_1.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce') -step_1.add_output('produce') -pipeline_description.add_step(step_1) - -# Step 2: extract_columns_by_semantic_types(attributes) -step_2 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.data_transformation.extract_columns_by_semantic_types.Common')) -step_2.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce') -step_2.add_output('produce') -step_2.add_hyperparameter(name='semantic_types', argument_type=ArgumentType.VALUE, - data=['https://metadata.datadrivendiscovery.org/types/Attribute']) -pipeline_description.add_step(step_2) - -# Step 3: extract_columns_by_semantic_types(targets) -step_3 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.data_transformation.extract_columns_by_semantic_types.Common')) -step_3.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce') -step_3.add_output('produce') -step_3.add_hyperparameter(name='semantic_types', argument_type=ArgumentType.VALUE, - data=['https://metadata.datadrivendiscovery.org/types/TrueTarget']) -pipeline_description.add_step(step_3) - -attributes = 'steps.2.produce' -targets = 'steps.3.produce' - -# Step 4: processing -step_4 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.timeseries_processing.transformation.axiswise_scaler')) -step_4.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference=attributes) -step_4.add_output('produce') -pipeline_description.add_step(step_4) - -# Step 5: algorithm` -step_5 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.feature_analysis.auto_correlation')) -step_5.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.4.produce') -step_5.add_output('produce') -pipeline_description.add_step(step_5) - -# Step 6: Predictions -step_6 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.data_transformation.construct_predictions.Common')) -step_6.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.5.produce') -step_6.add_argument(name='reference', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce') -step_6.add_output('produce') -pipeline_description.add_step(step_6) - -# Final Output -pipeline_description.add_output(name='output predictions', data_reference='steps.6.produce') - -# Output to json -data = pipeline_description.to_json() -with open('example_pipeline.json', 'w') as f: - f.write(data) - print(data) - diff --git a/examples/axolotl_interface/example_pipelines/script/build_DeepLog_pipeline.py b/examples/axolotl_interface/example_pipelines/script/build_DeepLog_pipeline.py deleted file mode 100644 index 21fd586..0000000 --- a/examples/axolotl_interface/example_pipelines/script/build_DeepLog_pipeline.py +++ /dev/null @@ -1,70 +0,0 @@ -from d3m import index -from d3m.metadata.base import ArgumentType -from d3m.metadata.pipeline import Pipeline, PrimitiveStep - -# -> dataset_to_dataframe -> column_parser -> extract_columns_by_semantic_types(attributes) -> imputer -> random_forest -# extract_columns_by_semantic_types(targets) -> ^ - -# Creating pipeline -pipeline_description = Pipeline() -pipeline_description.add_input(name='inputs') - -# Step 0: dataset_to_dataframe -step_0 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.data_transformation.dataset_to_dataframe.Common')) -step_0.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='inputs.0') -step_0.add_output('produce') -pipeline_description.add_step(step_0) - -# Step 1: column_parser -step_1 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.data_transformation.column_parser.Common')) -step_1.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce') -step_1.add_output('produce') -pipeline_description.add_step(step_1) - -# Step 2: extract_columns_by_semantic_types(attributes) -step_2 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.data_transformation.extract_columns_by_semantic_types.Common')) -step_2.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce') -step_2.add_output('produce') -step_2.add_hyperparameter(name='semantic_types', argument_type=ArgumentType.VALUE, - data=['https://metadata.datadrivendiscovery.org/types/Attribute']) -pipeline_description.add_step(step_2) - -# Step 3: extract_columns_by_semantic_types(targets) -step_3 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.data_transformation.extract_columns_by_semantic_types.Common')) -step_3.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce') -step_3.add_output('produce') -step_3.add_hyperparameter(name='semantic_types', argument_type=ArgumentType.VALUE, - data=['https://metadata.datadrivendiscovery.org/types/TrueTarget']) -pipeline_description.add_step(step_3) - -attributes = 'steps.2.produce' -targets = 'steps.3.produce' - -# Step 4: processing -step_4 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.timeseries_processing.transformation.axiswise_scaler')) -step_4.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference=attributes) -step_4.add_output('produce') -pipeline_description.add_step(step_4) - -# Step 5: algorithm` -step_5 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.detection_algorithm.deeplog')) -step_5.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.4.produce') -step_5.add_output('produce') -pipeline_description.add_step(step_5) - -# Step 6: Predictions -step_6 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.data_transformation.construct_predictions.Common')) -step_6.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.5.produce') -step_6.add_argument(name='reference', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce') -step_6.add_output('produce') -pipeline_description.add_step(step_6) - -# Final Output -pipeline_description.add_output(name='output predictions', data_reference='steps.6.produce') - -# Output to json -data = pipeline_description.to_json() -with open('example_pipeline.json', 'w') as f: - f.write(data) - print(data) - diff --git a/examples/axolotl_interface/example_pipelines/script/build_Ensemble.py b/examples/axolotl_interface/example_pipelines/script/build_Ensemble.py deleted file mode 100644 index 8534676..0000000 --- a/examples/axolotl_interface/example_pipelines/script/build_Ensemble.py +++ /dev/null @@ -1,72 +0,0 @@ -from d3m import index -from d3m.metadata.base import ArgumentType -from d3m.metadata.pipeline import Pipeline, PrimitiveStep - -# -> dataset_to_dataframe -> column_parser -> extract_columns_by_semantic_types(attributes) -> imputer -> random_forest -# extract_columns_by_semantic_types(targets) -> ^ - -# Creating pipeline -pipeline_description = Pipeline() -pipeline_description.add_input(name='inputs') - -# Step 0: dataset_to_dataframe -step_0 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.data_transformation.dataset_to_dataframe.Common')) -step_0.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='inputs.0') -step_0.add_output('produce') -pipeline_description.add_step(step_0) - -# Step 1: column_parser -step_1 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.data_transformation.column_parser.Common')) -step_1.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce') -step_1.add_output('produce') -pipeline_description.add_step(step_1) - -# Step 2: extract_columns_by_semantic_types(attributes) -step_2 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.data_transformation.extract_columns_by_semantic_types.Common')) -step_2.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce') -step_2.add_output('produce') -step_2.add_hyperparameter(name='semantic_types', argument_type=ArgumentType.VALUE, - data=['https://metadata.datadrivendiscovery.org/types/Attribute']) -pipeline_description.add_step(step_2) - -# Step 3: extract_columns_by_semantic_types(targets) -step_3 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.data_transformation.extract_columns_by_semantic_types.Common')) -step_3.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce') -step_3.add_output('produce') -step_3.add_hyperparameter(name='semantic_types', argument_type=ArgumentType.VALUE, - data=['https://metadata.datadrivendiscovery.org/types/TrueTarget']) -pipeline_description.add_step(step_3) - -attributes = 'steps.2.produce' -targets = 'steps.3.produce' - -# Step 4: auto encoder -step_4 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.detection_algorithm.pyod_ae')) -step_4.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference=attributes) -step_4.add_output('produce_score') -step_4.add_hyperparameter(name='use_columns', argument_type=ArgumentType.VALUE, data=[0,1,2]) -step_4.add_hyperparameter(name='use_semantic_types', argument_type=ArgumentType.VALUE, data=True) -step_4.add_hyperparameter(name='return_result', argument_type=ArgumentType.VALUE, data='append') -pipeline_description.add_step(step_4) - -# Step 5: ensemble -step_5 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.detection_algorithm.Ensemble')) -step_5.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.4.produce_score') -step_5.add_output('produce') -pipeline_description.add_step(step_5) - - -# Final Output -pipeline_description.add_output(name='output predictions', data_reference='steps.5.produce') - -# Output to YAML -#yaml = pipeline_description.to_yaml() -#with open('pipeline.yml', 'w') as f: -# f.write(yaml) -#prin(yaml) - -# Output to json -data = pipeline_description.to_json() -with open('example_pipeline.json', 'w') as f: - f.write(data) - print(data) diff --git a/examples/axolotl_interface/example_pipelines/script/build_IsolationForest_pipline.py b/examples/axolotl_interface/example_pipelines/script/build_IsolationForest_pipline.py deleted file mode 100644 index 8ff1d38..0000000 --- a/examples/axolotl_interface/example_pipelines/script/build_IsolationForest_pipline.py +++ /dev/null @@ -1,103 +0,0 @@ -from d3m import index -from d3m.metadata.base import ArgumentType -from d3m.metadata.pipeline import Pipeline, PrimitiveStep -from d3m.metadata import hyperparams - -# Creating pipeline -pipeline_description = Pipeline() -pipeline_description.add_input(name='inputs') - -# Step 0: dataset_to_dataframe -primitive_0 = index.get_primitive('d3m.primitives.tods.data_processing.dataset_to_dataframe') -step_0 = PrimitiveStep(primitive=primitive_0) -step_0.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='inputs.0') -step_0.add_output('produce') -pipeline_description.add_step(step_0) - -# # Step 1: column_parser -primitive_1 = index.get_primitive('d3m.primitives.data_transformation.column_parser.Common') -step_1 = PrimitiveStep(primitive=primitive_1) -step_1.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce') -step_1.add_output('produce') -pipeline_description.add_step(step_1) - -# Step 2: extract_columns_by_semantic_types(attributes) -step_2 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.data_transformation.extract_columns_by_semantic_types.Common')) -step_2.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce') -step_2.add_output('produce') -step_2.add_hyperparameter(name='semantic_types', argument_type=ArgumentType.VALUE, data=['https://metadata.datadrivendiscovery.org/types/Attribute']) -pipeline_description.add_step(step_2) - -# Step 3: extract_columns_by_semantic_types(targets) -step_3 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.data_transformation.extract_columns_by_semantic_types.Common')) -step_3.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce') -step_3.add_output('produce') -step_3.add_hyperparameter(name='semantic_types', argument_type=ArgumentType.VALUE, - data=['https://metadata.datadrivendiscovery.org/types/TrueTarget']) -pipeline_description.add_step(step_3) - -attributes = 'steps.2.produce' -targets = 'steps.3.produce' - -# Step 4: Power transformation -primitive_4 = index.get_primitive('d3m.primitives.tods.timeseries_processing.transformation.power_transformer') -step_4 = PrimitiveStep(primitive=primitive_4) -step_4.add_hyperparameter(name='return_result', argument_type=ArgumentType.VALUE, data='new') -step_4.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference=attributes) -step_4.add_output('produce') -pipeline_description.add_step(step_4) - -# Step 5: Axiswise scaling -primitive_5 = index.get_primitive('d3m.primitives.tods.timeseries_processing.transformation.axiswise_scaler') -step_5 = PrimitiveStep(primitive=primitive_5) -step_5.add_hyperparameter(name='return_result', argument_type=ArgumentType.VALUE, data='new') -step_5.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.4.produce') -step_5.add_output('produce') -pipeline_description.add_step(step_5) - -# Step 6: Standarization -primitive_6 = index.get_primitive('d3m.primitives.tods.timeseries_processing.transformation.standard_scaler') -step_6 = PrimitiveStep(primitive=primitive_6) -step_6.add_hyperparameter(name='return_result', argument_type=ArgumentType.VALUE, data='new') -step_6.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.5.produce') -step_6.add_output('produce') -pipeline_description.add_step(step_6) - -# Step 7: Quantile transformation -primitive_7 = index.get_primitive('d3m.primitives.tods.timeseries_processing.transformation.quantile_transformer') -step_7 = PrimitiveStep(primitive=primitive_7) -step_7.add_hyperparameter(name='return_result', argument_type=ArgumentType.VALUE, data='new') -step_7.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.6.produce') -step_7.add_output('produce') -pipeline_description.add_step(step_7) - -# Step 4: Isolation Forest -primitive_8 = index.get_primitive('d3m.primitives.tods.detection_algorithm.pyod_iforest') -step_8 = PrimitiveStep(primitive=primitive_8) -step_8.add_hyperparameter(name='contamination', argument_type=ArgumentType.VALUE, data=0.1) -step_8.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.7.produce') -# step_8.add_output('produce_score') -step_8.add_output('produce') -pipeline_description.add_step(step_8) - -# Step 5: Predictions -step_9 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.data_transformation.construct_predictions.Common')) -step_9.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.8.produce') -step_9.add_argument(name='reference', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce') -step_9.add_output('produce') -pipeline_description.add_step(step_9) - -# Final Output -pipeline_description.add_output(name='output predictions', data_reference='steps.9.produce') - -# Output to json -data = pipeline_description.to_json() -with open('example_pipeline.json', 'w') as f: - f.write(data) - print(data) - -## Output to YAML -#yaml = pipeline_description.to_yaml() -#with open('pipeline.yml', 'w') as f: -# f.write(yaml) -#print(yaml) diff --git a/examples/axolotl_interface/example_pipelines/script/build_LODA_pipline.py b/examples/axolotl_interface/example_pipelines/script/build_LODA_pipline.py deleted file mode 100644 index 039298f..0000000 --- a/examples/axolotl_interface/example_pipelines/script/build_LODA_pipline.py +++ /dev/null @@ -1,72 +0,0 @@ -from d3m import index -from d3m.metadata.base import ArgumentType -from d3m.metadata.pipeline import Pipeline, PrimitiveStep -from d3m.metadata import hyperparams -import copy - -# -> dataset_to_dataframe -> column_parser -> extract_columns_by_semantic_types(attributes) -> imputer -> random_forest -# extract_columns_by_semantic_types(targets) -> ^ - -# Creating pipeline -pipeline_description = Pipeline() -pipeline_description.add_input(name='inputs') - -# Step 0: dataset_to_dataframe -primitive_0 = index.get_primitive('d3m.primitives.tods.data_processing.dataset_to_dataframe') -step_0 = PrimitiveStep(primitive=primitive_0) -step_0.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='inputs.0') -step_0.add_output('produce') -pipeline_description.add_step(step_0) - -# # Step 1: column_parser -primitive_1 = index.get_primitive('d3m.primitives.data_transformation.column_parser.Common') -step_1 = PrimitiveStep(primitive=primitive_1) -step_1.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce') -step_1.add_output('produce') -pipeline_description.add_step(step_1) - - -# Step 2: extract_columns_by_semantic_types(attributes) -step_2 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.data_transformation.extract_columns_by_semantic_types.Common')) -step_2.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce') -step_2.add_output('produce') -step_2.add_hyperparameter(name='semantic_types', argument_type=ArgumentType.VALUE, data=['https://metadata.datadrivendiscovery.org/types/Attribute']) -pipeline_description.add_step(step_2) - - -# Step 3: extract_columns_by_semantic_types(targets) -step_3 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.data_transformation.extract_columns_by_semantic_types.Common')) -step_3.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce') -step_3.add_output('produce') -step_3.add_hyperparameter(name='semantic_types', argument_type=ArgumentType.VALUE, - data=['https://metadata.datadrivendiscovery.org/types/TrueTarget']) -pipeline_description.add_step(step_3) - -attributes = 'steps.2.produce' -targets = 'steps.3.produce' - -# Step 4: test primitive -primitive_4 = index.get_primitive('d3m.primitives.tods.detection_algorithm.pyod_loda') -step_4 = PrimitiveStep(primitive=primitive_4) -step_4.add_hyperparameter(name='contamination', argument_type=ArgumentType.VALUE, data=0.1) -step_4.add_hyperparameter(name='return_result', argument_type=ArgumentType.VALUE, data='new') -step_4.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference=attributes) -step_4.add_output('produce') -pipeline_description.add_step(step_4) - -# Step 5: Predictions -step_5 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.data_transformation.construct_predictions.Common')) -step_5.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.4.produce') -step_5.add_argument(name='reference', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce') -step_5.add_output('produce') -pipeline_description.add_step(step_5) - -# Final Output -pipeline_description.add_output(name='output predictions', data_reference='steps.5.produce') - -# Output to json -data = pipeline_description.to_json() -with open('example_pipeline.json', 'w') as f: - f.write(data) - print(data) - diff --git a/examples/axolotl_interface/example_pipelines/script/build_MatrixProfile_pipeline.py b/examples/axolotl_interface/example_pipelines/script/build_MatrixProfile_pipeline.py deleted file mode 100644 index 3d1e66c..0000000 --- a/examples/axolotl_interface/example_pipelines/script/build_MatrixProfile_pipeline.py +++ /dev/null @@ -1,70 +0,0 @@ -from d3m import index -from d3m.metadata.base import ArgumentType -from d3m.metadata.pipeline import Pipeline, PrimitiveStep - -# -> dataset_to_dataframe -> column_parser -> extract_columns_by_semantic_types(attributes) -> imputer -> random_forest -# extract_columns_by_semantic_types(targets) -> ^ - -# Creating pipeline -pipeline_description = Pipeline() -pipeline_description.add_input(name='inputs') - -# Step 0: dataset_to_dataframe -step_0 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.data_transformation.dataset_to_dataframe.Common')) -step_0.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='inputs.0') -step_0.add_output('produce') -pipeline_description.add_step(step_0) - -# Step 1: column_parser -step_1 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.data_transformation.column_parser.Common')) -step_1.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce') -step_1.add_output('produce') -pipeline_description.add_step(step_1) - -# Step 2: extract_columns_by_semantic_types(attributes) -step_2 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.data_transformation.extract_columns_by_semantic_types.Common')) -step_2.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce') -step_2.add_output('produce') -step_2.add_hyperparameter(name='semantic_types', argument_type=ArgumentType.VALUE, - data=['https://metadata.datadrivendiscovery.org/types/Attribute']) -pipeline_description.add_step(step_2) - -# Step 3: extract_columns_by_semantic_types(targets) -step_3 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.data_transformation.extract_columns_by_semantic_types.Common')) -step_3.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce') -step_3.add_output('produce') -step_3.add_hyperparameter(name='semantic_types', argument_type=ArgumentType.VALUE, - data=['https://metadata.datadrivendiscovery.org/types/TrueTarget']) -pipeline_description.add_step(step_3) - -attributes = 'steps.2.produce' -targets = 'steps.3.produce' - -# Step 4: processing -step_4 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.timeseries_processing.transformation.axiswise_scaler')) -step_4.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference=attributes) -step_4.add_output('produce') -pipeline_description.add_step(step_4) - -# Step 5: algorithm` -step_5 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.detection_algorithm.matrix_profile')) -step_5.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.4.produce') -step_5.add_output('produce') -pipeline_description.add_step(step_5) - -# Step 6: Predictions -step_6 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.data_transformation.construct_predictions.Common')) -step_6.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.5.produce') -step_6.add_argument(name='reference', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce') -step_6.add_output('produce') -pipeline_description.add_step(step_6) - -# Final Output -pipeline_description.add_output(name='output predictions', data_reference='steps.6.produce') - -# Output to json -data = pipeline_description.to_json() -with open('example_pipeline.json', 'w') as f: - f.write(data) - print(data) - diff --git a/examples/axolotl_interface/example_pipelines/script/build_SOD_pipeline.py b/examples/axolotl_interface/example_pipelines/script/build_SOD_pipeline.py deleted file mode 100644 index 9e92d0b..0000000 --- a/examples/axolotl_interface/example_pipelines/script/build_SOD_pipeline.py +++ /dev/null @@ -1,70 +0,0 @@ -from d3m import index -from d3m.metadata.base import ArgumentType -from d3m.metadata.pipeline import Pipeline, PrimitiveStep - -# -> dataset_to_dataframe -> column_parser -> extract_columns_by_semantic_types(attributes) -> imputer -> random_forest -# extract_columns_by_semantic_types(targets) -> ^ - -# Creating pipeline -pipeline_description = Pipeline() -pipeline_description.add_input(name='inputs') - -# Step 0: dataset_to_dataframe -step_0 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.data_transformation.dataset_to_dataframe.Common')) -step_0.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='inputs.0') -step_0.add_output('produce') -pipeline_description.add_step(step_0) - -# Step 1: column_parser -step_1 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.data_transformation.column_parser.Common')) -step_1.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce') -step_1.add_output('produce') -pipeline_description.add_step(step_1) - -# Step 2: extract_columns_by_semantic_types(attributes) -step_2 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.data_transformation.extract_columns_by_semantic_types.Common')) -step_2.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce') -step_2.add_output('produce') -step_2.add_hyperparameter(name='semantic_types', argument_type=ArgumentType.VALUE, - data=['https://metadata.datadrivendiscovery.org/types/Attribute']) -pipeline_description.add_step(step_2) - -# Step 3: extract_columns_by_semantic_types(targets) -step_3 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.data_transformation.extract_columns_by_semantic_types.Common')) -step_3.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce') -step_3.add_output('produce') -step_3.add_hyperparameter(name='semantic_types', argument_type=ArgumentType.VALUE, - data=['https://metadata.datadrivendiscovery.org/types/TrueTarget']) -pipeline_description.add_step(step_3) - -attributes = 'steps.2.produce' -targets = 'steps.3.produce' - -# Step 4: processing -step_4 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.timeseries_processing.transformation.axiswise_scaler')) -step_4.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference=attributes) -step_4.add_output('produce') -pipeline_description.add_step(step_4) - -# Step 5: algorithm` -step_5 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.detection_algorithm.pyod_sod')) -step_5.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.4.produce') -step_5.add_output('produce') -pipeline_description.add_step(step_5) - -# Step 6: Predictions -step_6 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.data_transformation.construct_predictions.Common')) -step_6.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.5.produce') -step_6.add_argument(name='reference', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce') -step_6.add_output('produce') -pipeline_description.add_step(step_6) - -# Final Output -pipeline_description.add_output(name='output predictions', data_reference='steps.6.produce') - -# Output to json -data = pipeline_description.to_json() -with open('example_pipeline.json', 'w') as f: - f.write(data) - print(data) - diff --git a/examples/axolotl_interface/example_pipelines/script/build_System_Wise_Detection_pipeline.py b/examples/axolotl_interface/example_pipelines/script/build_System_Wise_Detection_pipeline.py deleted file mode 100644 index aa5ea69..0000000 --- a/examples/axolotl_interface/example_pipelines/script/build_System_Wise_Detection_pipeline.py +++ /dev/null @@ -1,74 +0,0 @@ -from d3m import index -from d3m.metadata.base import ArgumentType -from d3m.metadata.pipeline import Pipeline, PrimitiveStep - -# -> dataset_to_dataframe -> column_parser -> extract_columns_by_semantic_types(attributes) -> imputer -> random_forest -# extract_columns_by_semantic_types(targets) -> ^ - -# Creating pipeline -pipeline_description = Pipeline() -pipeline_description.add_input(name='inputs') - -# Step 0: dataset_to_dataframe -step_0 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.data_transformation.dataset_to_dataframe.Common')) -step_0.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='inputs.0') -step_0.add_output('produce') -pipeline_description.add_step(step_0) - -# Step 1: column_parser -step_1 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.data_transformation.column_parser.Common')) -step_1.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce') -step_1.add_output('produce') -pipeline_description.add_step(step_1) - -# Step 2: extract_columns_by_semantic_types(attributes) -step_2 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.data_transformation.extract_columns_by_semantic_types.Common')) -step_2.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce') -step_2.add_output('produce') -step_2.add_hyperparameter(name='semantic_types', argument_type=ArgumentType.VALUE, - data=['https://metadata.datadrivendiscovery.org/types/Attribute']) -pipeline_description.add_step(step_2) - -# Step 3: extract_columns_by_semantic_types(targets) -step_3 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.data_transformation.extract_columns_by_semantic_types.Common')) -step_3.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce') -step_3.add_output('produce') -step_3.add_hyperparameter(name='semantic_types', argument_type=ArgumentType.VALUE, - data=['https://metadata.datadrivendiscovery.org/types/TrueTarget']) -pipeline_description.add_step(step_3) - -attributes = 'steps.2.produce' -targets = 'steps.3.produce' - -# Step 4: auto encoder -step_4 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.detection_algorithm.pyod_ae')) -step_4.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference=attributes) -step_4.add_output('produce_score') -#step_4.add_hyperparameter(name='use_columns', argument_type=ArgumentType.VALUE, data=[2]) -#step_4.add_hyperparameter(name='use_semantic_types', argument_type=ArgumentType.VALUE, data=True) -step_4.add_hyperparameter(name='return_result', argument_type=ArgumentType.VALUE, data='append') -pipeline_description.add_step(step_4) - -# Step 5: ensemble -step_5 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.detection_algorithm.system_wise_detection')) -step_5.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.4.produce_score') -step_5.add_hyperparameter(name='return_result', argument_type=ArgumentType.VALUE, data='new') - -step_5.add_output('produce') -pipeline_description.add_step(step_5) - - -# Final Output -pipeline_description.add_output(name='output predictions', data_reference='steps.5.produce') - -# Output to YAML -#yaml = pipeline_description.to_yaml() -#with open('pipeline.yml', 'w') as f: -# f.write(yaml) -#prin(yaml) - -# Output to json -data = pipeline_description.to_json() -with open('example_pipeline.json', 'w') as f: - f.write(data) - print(data) diff --git a/examples/axolotl_interface/run_pipeline.py b/examples/axolotl_interface/run_pipeline.py index 8444dff..aea27e0 100644 --- a/examples/axolotl_interface/run_pipeline.py +++ b/examples/axolotl_interface/run_pipeline.py @@ -9,13 +9,13 @@ this_path = os.path.dirname(os.path.abspath(__file__)) #table_path = 'datasets/NAB/realTweets/labeled_Twitter_volume_IBM.csv' # The path of the dataset parser = argparse.ArgumentParser(description='Arguments for running predefined pipelin.') -parser.add_argument('--table_path', type=str, default=os.path.join(this_path, '../datasets/yahoo_sub_5.csv'), +parser.add_argument('--table_path', type=str, default=os.path.join(this_path, '../../datasets/yahoo_sub_5.csv'), help='Input the path of the input data table') parser.add_argument('--target_index', type=int, default=6, help='Index of the ground truth (for evaluation)') parser.add_argument('--metric',type=str, default='F1_MACRO', help='Evaluation Metric (F1, F1_MACRO)') -parser.add_argument('--pipeline_path', default=os.path.join(this_path, './example_pipeline.json'), +parser.add_argument('--pipeline_path', default=os.path.join(this_path, './example_pipelines/abod_pipeline.json'), help='Input the path of the pre-built pipeline description') # parser.add_argument('--pipeline_path', default=os.path.join(this_path, '../tods/resources/default_pipeline.json'), # help='Input the path of the pre-built pipeline description') @@ -33,6 +33,9 @@ dataset = generate_dataset(df, target_index) # Load the default pipeline pipeline = load_pipeline(pipeline_path) +print(dir(pipeline)) +print(pipeline.steps) +print(dir(dataset)) # Run the pipeline pipeline_result = evaluate_pipeline(dataset, pipeline, metric) diff --git a/tods/tests/common/test_fixed_split.py b/tods/tests/common/test_fixed_split.py new file mode 100644 index 0000000..08f773a --- /dev/null +++ b/tods/tests/common/test_fixed_split.py @@ -0,0 +1,148 @@ +import os +import pickle +import unittest + +from d3m import container +from d3m.metadata import base as metadata_base + +from tods.common import FixedSplit + + +class FixedSplitDatasetSplitPrimitiveTestCase(unittest.TestCase): + def test_produce_train_values(self): + dataset_doc_path = os.path.abspath(os.path.join(os.path.dirname(__file__), '..', 'tests', 'data', 'datasets', 'iris_dataset_1', 'datasetDoc.json')) + + dataset = container.Dataset.load('file://{dataset_doc_path}'.format(dataset_doc_path=dataset_doc_path)) + + # We set semantic types like runtime would. + dataset.metadata = dataset.metadata.add_semantic_type(('learningData', metadata_base.ALL_ELEMENTS, 5), 'https://metadata.datadrivendiscovery.org/types/Target') + dataset.metadata = dataset.metadata.add_semantic_type(('learningData', metadata_base.ALL_ELEMENTS, 5), 'https://metadata.datadrivendiscovery.org/types/TrueTarget') + dataset.metadata = dataset.metadata.remove_semantic_type(('learningData', metadata_base.ALL_ELEMENTS, 5), 'https://metadata.datadrivendiscovery.org/types/Attribute') + + hyperparams_class = FixedSplit.FixedSplitDatasetSplitPrimitive.metadata.get_hyperparams() + + hyperparams = hyperparams_class.defaults().replace({ + 'primary_index_values': ['9', '11', '13'], + }) + + # We want to make sure "primary_index_values" is encoded just as a list and not + # a pickle because runtime populates this primitive as a list from a split file. + self.assertEqual(hyperparams.values_to_json_structure(), {'primary_index_values': ['9', '11', '13'], 'row_indices': [], 'delete_recursive': False}) + + primitive = FixedSplit.FixedSplitDatasetSplitPrimitive(hyperparams=hyperparams) + + primitive.set_training_data(dataset=dataset) + primitive.fit() + + # To test that pickling works. + pickle.dumps(primitive) + + results = primitive.produce(inputs=container.List([0], generate_metadata=True)).value + + self.assertEqual(len(results), 1) + + for dataset in results: + self.assertEqual(len(dataset), 1) + + self.assertEqual(results[0]['learningData'].shape[0], 147) + self.assertEqual(list(results[0]['learningData'].iloc[:, 0]), [str(i) for i in range(150) if i not in [9, 11, 13]]) + + def test_produce_score_values(self): + dataset_doc_path = os.path.abspath(os.path.join(os.path.dirname(__file__), '..', 'tests', 'data', 'datasets', 'iris_dataset_1', 'datasetDoc.json')) + + dataset = container.Dataset.load('file://{dataset_doc_path}'.format(dataset_doc_path=dataset_doc_path)) + + # We set semantic types like runtime would. + dataset.metadata = dataset.metadata.add_semantic_type(('learningData', metadata_base.ALL_ELEMENTS, 5), 'https://metadata.datadrivendiscovery.org/types/Target') + dataset.metadata = dataset.metadata.add_semantic_type(('learningData', metadata_base.ALL_ELEMENTS, 5), 'https://metadata.datadrivendiscovery.org/types/TrueTarget') + dataset.metadata = dataset.metadata.remove_semantic_type(('learningData', metadata_base.ALL_ELEMENTS, 5), 'https://metadata.datadrivendiscovery.org/types/Attribute') + + hyperparams_class = fixed_split.FixedSplitDatasetSplitPrimitive.metadata.get_hyperparams() + + hyperparams = hyperparams_class.defaults().replace({ + 'primary_index_values': ['9', '11', '13'], + }) + + # We want to make sure "primary_index_values" is encoded just as a list and not + # a pickle because runtime populates this primitive as a list from a split file. + self.assertEqual(hyperparams.values_to_json_structure(), {'primary_index_values': ['9', '11', '13'], 'row_indices': [], 'delete_recursive': False}) + + primitive = fixed_split.FixedSplitDatasetSplitPrimitive(hyperparams=hyperparams) + + primitive.set_training_data(dataset=dataset) + primitive.fit() + + results = primitive.produce_score_data(inputs=container.List([0], generate_metadata=True)).value + + self.assertEqual(len(results), 1) + + for dataset in results: + self.assertEqual(len(dataset), 1) + + self.assertEqual(results[0]['learningData'].shape[0], 3) + self.assertEqual(list(results[0]['learningData'].iloc[:, 0]), [str(i) for i in range(150) if i in [9, 11, 13]]) + + def test_produce_train_indices(self): + dataset_doc_path = os.path.abspath(os.path.join(os.path.dirname(__file__), '..', 'tests', 'data', 'datasets', 'iris_dataset_1', 'datasetDoc.json')) + + dataset = container.Dataset.load('file://{dataset_doc_path}'.format(dataset_doc_path=dataset_doc_path)) + + # We set semantic types like runtime would. + dataset.metadata = dataset.metadata.add_semantic_type(('learningData', metadata_base.ALL_ELEMENTS, 5), 'https://metadata.datadrivendiscovery.org/types/Target') + dataset.metadata = dataset.metadata.add_semantic_type(('learningData', metadata_base.ALL_ELEMENTS, 5), 'https://metadata.datadrivendiscovery.org/types/TrueTarget') + dataset.metadata = dataset.metadata.remove_semantic_type(('learningData', metadata_base.ALL_ELEMENTS, 5), 'https://metadata.datadrivendiscovery.org/types/Attribute') + + hyperparams_class = fixed_split.FixedSplitDatasetSplitPrimitive.metadata.get_hyperparams() + + primitive = fixed_split.FixedSplitDatasetSplitPrimitive(hyperparams=hyperparams_class.defaults().replace({ + 'row_indices': [9, 11, 13], + })) + + primitive.set_training_data(dataset=dataset) + primitive.fit() + + # To test that pickling works. + pickle.dumps(primitive) + + results = primitive.produce(inputs=container.List([0], generate_metadata=True)).value + + self.assertEqual(len(results), 1) + + for dataset in results: + self.assertEqual(len(dataset), 1) + + self.assertEqual(results[0]['learningData'].shape[0], 147) + self.assertEqual(list(results[0]['learningData'].iloc[:, 0]), [str(i) for i in range(150) if i not in [9, 11, 13]]) + + def test_produce_score_indices(self): + dataset_doc_path = os.path.abspath(os.path.join(os.path.dirname(__file__), '..', 'tests', 'data', 'datasets', 'iris_dataset_1', 'datasetDoc.json')) + + dataset = container.Dataset.load('file://{dataset_doc_path}'.format(dataset_doc_path=dataset_doc_path)) + + # We set semantic types like runtime would. + dataset.metadata = dataset.metadata.add_semantic_type(('learningData', metadata_base.ALL_ELEMENTS, 5), 'https://metadata.datadrivendiscovery.org/types/Target') + dataset.metadata = dataset.metadata.add_semantic_type(('learningData', metadata_base.ALL_ELEMENTS, 5), 'https://metadata.datadrivendiscovery.org/types/TrueTarget') + dataset.metadata = dataset.metadata.remove_semantic_type(('learningData', metadata_base.ALL_ELEMENTS, 5), 'https://metadata.datadrivendiscovery.org/types/Attribute') + + hyperparams_class = fixed_split.FixedSplitDatasetSplitPrimitive.metadata.get_hyperparams() + + primitive = fixed_split.FixedSplitDatasetSplitPrimitive(hyperparams=hyperparams_class.defaults().replace({ + 'row_indices': [9, 11, 13], + })) + + primitive.set_training_data(dataset=dataset) + primitive.fit() + + results = primitive.produce_score_data(inputs=container.List([0], generate_metadata=True)).value + + self.assertEqual(len(results), 1) + + for dataset in results: + self.assertEqual(len(dataset), 1) + + self.assertEqual(results[0]['learningData'].shape[0], 3) + self.assertEqual(list(results[0]['learningData'].iloc[:, 0]), [str(i) for i in range(150) if i in [9, 11, 13]]) + + +if __name__ == '__main__': + unittest.main() diff --git a/tods/tests/common/test_kfold_split.py b/tods/tests/common/test_kfold_split.py new file mode 100644 index 0000000..9983a6e --- /dev/null +++ b/tods/tests/common/test_kfold_split.py @@ -0,0 +1,100 @@ +import os +import pickle +import unittest + +from d3m import container +from d3m.metadata import base as metadata_base + +from common_primitives import kfold_split + + +class KFoldDatasetSplitPrimitiveTestCase(unittest.TestCase): + def test_produce_train(self): + dataset_doc_path = os.path.abspath(os.path.join(os.path.dirname(__file__), '..', 'tests', 'data', 'datasets', 'database_dataset_1', 'datasetDoc.json')) + + dataset = container.Dataset.load('file://{dataset_doc_path}'.format(dataset_doc_path=dataset_doc_path)) + + # We set semantic types like runtime would. + dataset.metadata = dataset.metadata.add_semantic_type(('learningData', metadata_base.ALL_ELEMENTS, 4), 'https://metadata.datadrivendiscovery.org/types/Target') + dataset.metadata = dataset.metadata.add_semantic_type(('learningData', metadata_base.ALL_ELEMENTS, 4), 'https://metadata.datadrivendiscovery.org/types/TrueTarget') + dataset.metadata = dataset.metadata.remove_semantic_type(('learningData', metadata_base.ALL_ELEMENTS, 4), 'https://metadata.datadrivendiscovery.org/types/Attribute') + + hyperparams_class = kfold_split.KFoldDatasetSplitPrimitive.metadata.get_hyperparams() + + primitive = kfold_split.KFoldDatasetSplitPrimitive(hyperparams=hyperparams_class.defaults().replace({ + 'number_of_folds': 10, + 'shuffle': True, + 'delete_recursive': True, + })) + + primitive.set_training_data(dataset=dataset) + primitive.fit() + + # To test that pickling works. + pickle.dumps(primitive) + + results = primitive.produce(inputs=container.List([0, 1], generate_metadata=True)).value + + self.assertEqual(len(results), 2) + + for dataset in results: + self.assertEqual(len(dataset), 4) + + self.assertEqual(results[0]['codes'].shape[0], 3) + self.assertEqual(results[1]['codes'].shape[0], 3) + + self.assertEqual(set(results[0]['codes'].iloc[:, 0]), {'AAA', 'BBB', 'CCC'}) + self.assertEqual(len(results[0]['learningData'].iloc[:, 0]), 40) + self.assertEqual(set(results[0]['learningData'].iloc[:, 1]), {'AAA', 'BBB', 'CCC'}) + self.assertEqual(set(results[0]['learningData'].iloc[:, 2]), {'aaa', 'bbb', 'ccc', 'ddd', 'eee'}) + self.assertEqual(set(results[0]['learningData'].iloc[:, 3]), {'1990', '2000', '2010'}) + + self.assertEqual(set(results[1]['codes'].iloc[:, 0]), {'AAA', 'BBB', 'CCC'}) + self.assertEqual(len(results[1]['learningData'].iloc[:, 0]), 40) + self.assertEqual(set(results[1]['learningData'].iloc[:, 1]), {'AAA', 'BBB', 'CCC'}) + self.assertEqual(set(results[1]['learningData'].iloc[:, 2]), {'aaa', 'bbb', 'ccc', 'ddd', 'eee'}) + self.assertEqual(set(results[1]['learningData'].iloc[:, 3]), {'1990', '2000', '2010'}) + + def test_produce_score(self): + dataset_doc_path = os.path.abspath(os.path.join(os.path.dirname(__file__), '..', 'tests', 'data', 'datasets', 'database_dataset_1', 'datasetDoc.json')) + + dataset = container.Dataset.load('file://{dataset_doc_path}'.format(dataset_doc_path=dataset_doc_path)) + + # We set semantic types like runtime would. + dataset.metadata = dataset.metadata.add_semantic_type(('learningData', metadata_base.ALL_ELEMENTS, 4), 'https://metadata.datadrivendiscovery.org/types/Target') + dataset.metadata = dataset.metadata.add_semantic_type(('learningData', metadata_base.ALL_ELEMENTS, 4), 'https://metadata.datadrivendiscovery.org/types/TrueTarget') + dataset.metadata = dataset.metadata.remove_semantic_type(('learningData', metadata_base.ALL_ELEMENTS, 4), 'https://metadata.datadrivendiscovery.org/types/Attribute') + + hyperparams_class = kfold_split.KFoldDatasetSplitPrimitive.metadata.get_hyperparams() + + primitive = kfold_split.KFoldDatasetSplitPrimitive(hyperparams=hyperparams_class.defaults().replace({ + 'number_of_folds': 10, + 'shuffle': True, + 'delete_recursive': True, + })) + + primitive.set_training_data(dataset=dataset) + primitive.fit() + + results = primitive.produce_score_data(inputs=container.List([0, 1], generate_metadata=True)).value + + self.assertEqual(len(results), 2) + + for dataset in results: + self.assertEqual(len(dataset), 4) + + self.assertEqual(set(results[0]['codes'].iloc[:, 0]), {'AAA', 'BBB'}) + self.assertEqual(set(results[0]['learningData'].iloc[:, 0]), {'5', '11', '28', '31', '38'}) + self.assertEqual(set(results[0]['learningData'].iloc[:, 1]), {'AAA', 'BBB'}) + self.assertEqual(set(results[0]['learningData'].iloc[:, 2]), {'aaa', 'bbb', 'ddd', 'eee'}) + self.assertEqual(set(results[0]['learningData'].iloc[:, 3]), {'1990', '2000'}) + + self.assertEqual(set(results[1]['codes'].iloc[:, 0]), {'BBB', 'CCC'}) + self.assertEqual(set(results[1]['learningData'].iloc[:, 0]), {'12', '26', '29', '32', '39'}) + self.assertEqual(set(results[1]['learningData'].iloc[:, 1]), {'BBB', 'CCC'}) + self.assertEqual(set(results[1]['learningData'].iloc[:, 2]), {'bbb', 'ccc', 'ddd', 'eee'}) + self.assertEqual(set(results[1]['learningData'].iloc[:, 3]), {'1990', '2000', '2010'}) + + +if __name__ == '__main__': + unittest.main() diff --git a/tods/tests/common/test_kfold_timeseries_split.py b/tods/tests/common/test_kfold_timeseries_split.py new file mode 100644 index 0000000..885ab2e --- /dev/null +++ b/tods/tests/common/test_kfold_timeseries_split.py @@ -0,0 +1,223 @@ +import os +import pickle +import unittest + +from d3m import container +from d3m.metadata import base as metadata_base + +from common_primitives import kfold_split_timeseries + + +class KFoldTimeSeriesSplitPrimitiveTestCase(unittest.TestCase): + def test_produce_train_timeseries_1(self): + dataset_doc_path = os.path.abspath(os.path.join(os.path.dirname(__file__), '..', 'tests', 'data', 'datasets', 'timeseries_dataset_1', 'datasetDoc.json')) + + dataset = container.Dataset.load('file://{dataset_doc_path}'.format(dataset_doc_path=dataset_doc_path)) + + # We set semantic types like runtime would. + dataset.metadata = dataset.metadata.add_semantic_type(('learningData', metadata_base.ALL_ELEMENTS, 4), 'https://metadata.datadrivendiscovery.org/types/Target') + dataset.metadata = dataset.metadata.add_semantic_type(('learningData', metadata_base.ALL_ELEMENTS, 4), 'https://metadata.datadrivendiscovery.org/types/TrueTarget') + dataset.metadata = dataset.metadata.remove_semantic_type(('learningData', metadata_base.ALL_ELEMENTS, 4), 'https://metadata.datadrivendiscovery.org/types/Attribute') + + hyperparams_class = kfold_split_timeseries.KFoldTimeSeriesSplitPrimitive.metadata.get_hyperparams() + + folds = 5 + primitive = kfold_split_timeseries.KFoldTimeSeriesSplitPrimitive(hyperparams=hyperparams_class.defaults().replace({ + 'number_of_folds': folds, + 'number_of_window_folds': 1, + })) + + primitive.set_training_data(dataset=dataset) + primitive.fit() + + # To test that pickling works. + pickle.dumps(primitive) + + results = primitive.produce(inputs=container.List([0, 1], generate_metadata=True)).value + + self.assertEqual(len(results), 2) + + for dataset in results: + self.assertEqual(len(dataset), 1) + + self.assertEqual(len(results[0]['learningData'].iloc[:, 0]), 8) + self.assertEqual(set(results[0]['learningData'].iloc[:, 3]), {'2013-11-05', '2013-11-06', '2013-11-07', '2013-11-08', '2013-11-11', + '2013-11-12', '2013-11-13', '2013-11-14'}) + + self.assertEqual(len(results[1]['learningData'].iloc[:, 0]), 8) + self.assertEqual(set(results[1]['learningData'].iloc[:, 3]), {'2013-11-13', '2013-11-14', '2013-11-15', '2013-11-18', '2013-11-19', + '2013-11-20', '2013-11-21', '2013-11-22'}) + + def test_produce_score_timeseries_1(self): + dataset_doc_path = os.path.abspath(os.path.join(os.path.dirname(__file__), '..', 'tests', 'data', 'datasets', 'timeseries_dataset_1', 'datasetDoc.json')) + + dataset = container.Dataset.load('file://{dataset_doc_path}'.format(dataset_doc_path=dataset_doc_path)) + + # We set semantic types like runtime would. + dataset.metadata = dataset.metadata.add_semantic_type(('learningData', metadata_base.ALL_ELEMENTS, 4), 'https://metadata.datadrivendiscovery.org/types/Target') + dataset.metadata = dataset.metadata.add_semantic_type(('learningData', metadata_base.ALL_ELEMENTS, 4), 'https://metadata.datadrivendiscovery.org/types/TrueTarget') + dataset.metadata = dataset.metadata.remove_semantic_type(('learningData', metadata_base.ALL_ELEMENTS, 4), 'https://metadata.datadrivendiscovery.org/types/Attribute') + + hyperparams_class = kfold_split_timeseries.KFoldTimeSeriesSplitPrimitive.metadata.get_hyperparams() + + folds = 5 + primitive = kfold_split_timeseries.KFoldTimeSeriesSplitPrimitive(hyperparams=hyperparams_class.defaults().replace({ + 'number_of_folds': folds, + 'number_of_window_folds': 1, + })) + + primitive.set_training_data(dataset=dataset) + primitive.fit() + + results = primitive.produce_score_data(inputs=container.List([0, 1], generate_metadata=True)).value + + self.assertEqual(len(results), 2) + + for dataset in results: + self.assertEqual(len(dataset), 1) + + self.assertEqual(len(results[0]['learningData'].iloc[:, 0]), 6) + self.assertEqual(set(results[0]['learningData'].iloc[:, 3]), {'2013-11-15', '2013-11-18', '2013-11-19', + '2013-11-20', '2013-11-21', '2013-11-22'}) + + self.assertEqual(len(results[1]['learningData'].iloc[:, 0]), 6) + self.assertEqual(set(results[1]['learningData'].iloc[:, 3]), {'2013-11-25', '2013-11-26', '2013-11-27', + '2013-11-29', '2013-12-02', '2013-12-03'}) + + def test_produce_train(self): + dataset_doc_path = os.path.abspath(os.path.join(os.path.dirname(__file__), '..', 'tests', 'data', 'datasets', 'database_dataset_1', 'datasetDoc.json')) + + dataset = container.Dataset.load('file://{dataset_doc_path}'.format(dataset_doc_path=dataset_doc_path)) + + # We set semantic types like runtime would. + dataset.metadata = dataset.metadata.add_semantic_type(('learningData', metadata_base.ALL_ELEMENTS, 4), 'https://metadata.datadrivendiscovery.org/types/Target') + dataset.metadata = dataset.metadata.add_semantic_type(('learningData', metadata_base.ALL_ELEMENTS, 4), 'https://metadata.datadrivendiscovery.org/types/TrueTarget') + dataset.metadata = dataset.metadata.remove_semantic_type(('learningData', metadata_base.ALL_ELEMENTS, 4), 'https://metadata.datadrivendiscovery.org/types/Attribute') + + # We fake that the dataset is time-series. + dataset.metadata = dataset.metadata.add_semantic_type(('learningData', metadata_base.ALL_ELEMENTS, 3), 'https://metadata.datadrivendiscovery.org/types/Time') + + hyperparams_class = kfold_split_timeseries.KFoldTimeSeriesSplitPrimitive.metadata.get_hyperparams() + + folds = 5 + primitive = kfold_split_timeseries.KFoldTimeSeriesSplitPrimitive(hyperparams=hyperparams_class.defaults().replace({ + 'number_of_folds': folds, + 'number_of_window_folds': 1, + })) + + primitive.set_training_data(dataset=dataset) + primitive.fit() + + # To test that pickling works. + pickle.dumps(primitive) + + results = primitive.produce(inputs=container.List([0, 1], generate_metadata=True)).value + + self.assertEqual(len(results), 2) + + for dataset in results: + self.assertEqual(len(dataset), 4) + + self.assertEqual(results[0]['codes'].shape[0], 3) + self.assertEqual(results[1]['codes'].shape[0], 3) + + self.assertEqual(set(results[0]['codes'].iloc[:, 0]), {'AAA', 'BBB', 'CCC'}) + self.assertEqual(len(results[0]['learningData'].iloc[:, 0]), 9) + self.assertEqual(set(results[0]['learningData'].iloc[:, 1]), {'AAA', 'BBB', 'CCC'}) + self.assertEqual(set(results[0]['learningData'].iloc[:, 2]), {'bbb', 'ccc', 'ddd'}) + self.assertEqual(set(results[0]['learningData'].iloc[:, 3]), {'1990'}) + + self.assertEqual(set(results[1]['codes'].iloc[:, 0]), {'AAA', 'BBB', 'CCC'}) + self.assertEqual(len(results[1]['learningData'].iloc[:, 0]), 9) + self.assertEqual(set(results[1]['learningData'].iloc[:, 1]), {'AAA', 'BBB', 'CCC'}) + self.assertEqual(set(results[1]['learningData'].iloc[:, 2]), {'aaa', 'bbb', 'ddd', 'eee'}) + self.assertEqual(set(results[1]['learningData'].iloc[:, 3]), {'1990', '2000'}) + + def test_produce_score(self): + dataset_doc_path = os.path.abspath(os.path.join(os.path.dirname(__file__), '..', 'tests', 'data', 'datasets', 'database_dataset_1', 'datasetDoc.json')) + + dataset = container.Dataset.load('file://{dataset_doc_path}'.format(dataset_doc_path=dataset_doc_path)) + + # We set semantic types like runtime would. + dataset.metadata = dataset.metadata.add_semantic_type(('learningData', metadata_base.ALL_ELEMENTS, 4), 'https://metadata.datadrivendiscovery.org/types/Target') + dataset.metadata = dataset.metadata.add_semantic_type(('learningData', metadata_base.ALL_ELEMENTS, 4), 'https://metadata.datadrivendiscovery.org/types/TrueTarget') + dataset.metadata = dataset.metadata.remove_semantic_type(('learningData', metadata_base.ALL_ELEMENTS, 4), 'https://metadata.datadrivendiscovery.org/types/Attribute') + + # We fake that the dataset is time-series. + dataset.metadata = dataset.metadata.add_semantic_type(('learningData', metadata_base.ALL_ELEMENTS, 3), 'https://metadata.datadrivendiscovery.org/types/Time') + + hyperparams_class = kfold_split_timeseries.KFoldTimeSeriesSplitPrimitive.metadata.get_hyperparams() + + folds = 5 + primitive = kfold_split_timeseries.KFoldTimeSeriesSplitPrimitive(hyperparams=hyperparams_class.defaults().replace({ + 'number_of_folds': folds, + 'number_of_window_folds': 1, + })) + + primitive.set_training_data(dataset=dataset) + primitive.fit() + + results = primitive.produce_score_data(inputs=container.List([0, 1], generate_metadata=True)).value + + self.assertEqual(len(results), 2) + + for dataset in results: + self.assertEqual(len(dataset), 4) + + self.assertEqual(results[0]['codes'].shape[0], 3) + self.assertEqual(results[1]['codes'].shape[0], 3) + + self.assertEqual(set(results[0]['codes'].iloc[:, 0]), {'AAA', 'BBB', 'CCC'}) + self.assertEqual(set(results[0]['learningData'].iloc[:, 0]), {'2', '3', '32', '33', '37', '38', '39'}) + self.assertEqual(set(results[0]['learningData'].iloc[:, 1]), {'AAA', 'BBB', 'CCC'}) + self.assertEqual(set(results[0]['learningData'].iloc[:, 2]), {'aaa', 'ddd', 'eee'}) + self.assertEqual(set(results[0]['learningData'].iloc[:, 3]), {'1990', '2000'}) + + self.assertEqual(set(results[1]['codes'].iloc[:, 0]), {'AAA', 'BBB', 'CCC'}) + self.assertEqual(set(results[1]['learningData'].iloc[:, 0]), {'22', '23', '24', '31', '40', '41', '42'}) + self.assertEqual(set(results[1]['learningData'].iloc[:, 1]), {'AAA', 'BBB', 'CCC'}) + self.assertEqual(set(results[1]['learningData'].iloc[:, 2]), {'ccc', 'ddd', 'eee'}) + self.assertEqual(set(results[1]['learningData'].iloc[:, 3]), {'2000'}) + + def test_unsorted_datetimes_timeseries_4(self): + dataset_doc_path = os.path.abspath(os.path.join(os.path.dirname(__file__), '..', 'tests', 'data', 'datasets', 'timeseries_dataset_4', 'datasetDoc.json')) + + dataset = container.Dataset.load('file://{dataset_doc_path}'.format(dataset_doc_path=dataset_doc_path)) + + # We set semantic types like runtime would. + dataset.metadata = dataset.metadata.add_semantic_type(('learningData', metadata_base.ALL_ELEMENTS, 4), 'https://metadata.datadrivendiscovery.org/types/Target') + dataset.metadata = dataset.metadata.add_semantic_type(('learningData', metadata_base.ALL_ELEMENTS, 4), 'https://metadata.datadrivendiscovery.org/types/TrueTarget') + dataset.metadata = dataset.metadata.remove_semantic_type(('learningData', metadata_base.ALL_ELEMENTS, 4), 'https://metadata.datadrivendiscovery.org/types/Attribute') + + hyperparams_class = kfold_split_timeseries.KFoldTimeSeriesSplitPrimitive.metadata.get_hyperparams() + + folds = 5 + primitive = kfold_split_timeseries.KFoldTimeSeriesSplitPrimitive(hyperparams=hyperparams_class.defaults().replace({ + 'number_of_folds': folds, + 'number_of_window_folds': 1, + })) + + primitive.set_training_data(dataset=dataset) + primitive.fit() + + # To test that pickling works. + pickle.dumps(primitive) + + results = primitive.produce(inputs=container.List([0, 1], generate_metadata=True)).value + + self.assertEqual(len(results), 2) + + for dataset in results: + self.assertEqual(len(dataset), 1) + + self.assertEqual(len(results[0]['learningData'].iloc[:, 0]), 8) + self.assertEqual(set(results[0]['learningData'].iloc[:, 3]), {'2013-11-05', '2013-11-06', '2013-11-07', '2013-11-08', '2013-11-11', + '2013-11-12', '2013-11-13', '2013-11-14'}) + + self.assertEqual(len(results[1]['learningData'].iloc[:, 0]), 8) + self.assertEqual(set(results[1]['learningData'].iloc[:, 3]), {'2013-11-13', '2013-11-14', '2013-11-15', '2013-11-18', '2013-11-19', + '2013-11-20', '2013-11-21', '2013-11-22'}) + + +if __name__ == '__main__': + unittest.main() diff --git a/tods/tests/common/test_no_split.py b/tods/tests/common/test_no_split.py new file mode 100644 index 0000000..f61f476 --- /dev/null +++ b/tods/tests/common/test_no_split.py @@ -0,0 +1,71 @@ +import os +import pickle +import unittest + +from d3m import container +from d3m.metadata import base as metadata_base + +from common_primitives import no_split + + +class NoSplitDatasetSplitPrimitiveTestCase(unittest.TestCase): + def test_produce_train(self): + dataset_doc_path = os.path.abspath(os.path.join(os.path.dirname(__file__), '..', 'tests', 'data', 'datasets', 'iris_dataset_1', 'datasetDoc.json')) + + dataset = container.Dataset.load('file://{dataset_doc_path}'.format(dataset_doc_path=dataset_doc_path)) + + # We set semantic types like runtime would. + dataset.metadata = dataset.metadata.add_semantic_type(('learningData', metadata_base.ALL_ELEMENTS, 5), 'https://metadata.datadrivendiscovery.org/types/Target') + dataset.metadata = dataset.metadata.add_semantic_type(('learningData', metadata_base.ALL_ELEMENTS, 5), 'https://metadata.datadrivendiscovery.org/types/TrueTarget') + dataset.metadata = dataset.metadata.remove_semantic_type(('learningData', metadata_base.ALL_ELEMENTS, 5), 'https://metadata.datadrivendiscovery.org/types/Attribute') + + hyperparams_class = no_split.NoSplitDatasetSplitPrimitive.metadata.get_hyperparams() + + primitive = no_split.NoSplitDatasetSplitPrimitive(hyperparams=hyperparams_class.defaults()) + + primitive.set_training_data(dataset=dataset) + primitive.fit() + + # To test that pickling works. + pickle.dumps(primitive) + + results = primitive.produce(inputs=container.List([0], generate_metadata=True)).value + + self.assertEqual(len(results), 1) + + for dataset in results: + self.assertEqual(len(dataset), 1) + + self.assertEqual(results[0]['learningData'].shape[0], 150) + self.assertEqual(list(results[0]['learningData'].iloc[:, 0]), [str(i) for i in range(150)]) + + def test_produce_score(self): + dataset_doc_path = os.path.abspath(os.path.join(os.path.dirname(__file__), '..', 'tests', 'data', 'datasets', 'iris_dataset_1', 'datasetDoc.json')) + + dataset = container.Dataset.load('file://{dataset_doc_path}'.format(dataset_doc_path=dataset_doc_path)) + + # We set semantic types like runtime would. + dataset.metadata = dataset.metadata.add_semantic_type(('learningData', metadata_base.ALL_ELEMENTS, 5), 'https://metadata.datadrivendiscovery.org/types/Target') + dataset.metadata = dataset.metadata.add_semantic_type(('learningData', metadata_base.ALL_ELEMENTS, 5), 'https://metadata.datadrivendiscovery.org/types/TrueTarget') + dataset.metadata = dataset.metadata.remove_semantic_type(('learningData', metadata_base.ALL_ELEMENTS, 5), 'https://metadata.datadrivendiscovery.org/types/Attribute') + + hyperparams_class = no_split.NoSplitDatasetSplitPrimitive.metadata.get_hyperparams() + + primitive = no_split.NoSplitDatasetSplitPrimitive(hyperparams=hyperparams_class.defaults()) + + primitive.set_training_data(dataset=dataset) + primitive.fit() + + results = primitive.produce_score_data(inputs=container.List([0], generate_metadata=True)).value + + self.assertEqual(len(results), 1) + + for dataset in results: + self.assertEqual(len(dataset), 1) + + self.assertEqual(results[0]['learningData'].shape[0], 150) + self.assertEqual(list(results[0]['learningData'].iloc[:, 0]), [str(i) for i in range(150)]) + + +if __name__ == '__main__': + unittest.main() diff --git a/tods/tests/common/test_redact_columns.py b/tods/tests/common/test_redact_columns.py new file mode 100644 index 0000000..5bd5df0 --- /dev/null +++ b/tods/tests/common/test_redact_columns.py @@ -0,0 +1,173 @@ +import os +import unittest + +from d3m import container, utils +from d3m.metadata import base as metadata_base + +from common_primitives import redact_columns + + +class RedactColumnsPrimitiveTestCase(unittest.TestCase): + def _get_datasets(self): + dataset_doc_path = os.path.abspath(os.path.join(os.path.dirname(__file__), 'data', 'datasets', 'iris_dataset_1', 'datasetDoc.json')) + + dataset = container.Dataset.load('file://{dataset_doc_path}'.format(dataset_doc_path=dataset_doc_path)) + + # We set semantic types like runtime would. + dataset.metadata = dataset.metadata.add_semantic_type(('learningData', metadata_base.ALL_ELEMENTS, 5), 'https://metadata.datadrivendiscovery.org/types/Target') + dataset.metadata = dataset.metadata.add_semantic_type(('learningData', metadata_base.ALL_ELEMENTS, 5), 'https://metadata.datadrivendiscovery.org/types/TrueTarget') + dataset.metadata = dataset.metadata.remove_semantic_type(('learningData', metadata_base.ALL_ELEMENTS, 5), 'https://metadata.datadrivendiscovery.org/types/Attribute') + + datasets = container.List([dataset], { + 'schema': metadata_base.CONTAINER_SCHEMA_VERSION, + 'structural_type': container.List, + 'dimension': { + 'length': 1, + }, + }, generate_metadata=False) + + # We update metadata based on metadata of each dataset. + # TODO: In the future this might be done automatically by generate_metadata. + # See: https://gitlab.com/datadrivendiscovery/d3m/issues/119 + for index, dataset in enumerate(datasets): + datasets.metadata = dataset.metadata.copy_to(datasets.metadata, (), (index,)) + + return dataset_doc_path, datasets + + def test_basic(self): + dataset_doc_path, datasets = self._get_datasets() + + hyperparams_class = redact_columns.RedactColumnsPrimitive.metadata.get_hyperparams() + + primitive = redact_columns.RedactColumnsPrimitive(hyperparams=hyperparams_class.defaults().replace({ + 'semantic_types': ('https://metadata.datadrivendiscovery.org/types/TrueTarget',), + 'add_semantic_types': ('https://metadata.datadrivendiscovery.org/types/RedactedTarget', 'https://metadata.datadrivendiscovery.org/types/MissingData'), + })) + redacted_datasets = primitive.produce(inputs=datasets).value + + self.assertTrue(len(redacted_datasets), 1) + + redacted_dataset = redacted_datasets[0] + + self.assertIsInstance(redacted_dataset, container.Dataset) + self.assertEqual(redacted_dataset['learningData']['species'].values.tolist(), [''] * 150) + + self._test_metadata(redacted_datasets.metadata, dataset_doc_path, True) + self._test_metadata(redacted_dataset.metadata, dataset_doc_path, False) + + def _test_metadata(self, metadata, dataset_doc_path, is_list): + top_metadata = { + 'structural_type': 'd3m.container.dataset.Dataset', + 'id': 'iris_dataset_1', + 'version': '4.0.0', + 'name': 'Iris Dataset', + 'location_uris': [ + 'file://{dataset_doc_path}'.format(dataset_doc_path=dataset_doc_path), + ], + 'dimension': { + 'name': 'resources', + 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/DatasetResource'], + 'length': 1, + }, + 'digest': '49404bf166238fbdac2b6d6baa899a0d1bf8ed5976525fa7353fd732ac218a85', + 'source': { + 'license': 'CC', + 'redacted': False, + 'human_subjects_research': False, + }, + } + + if is_list: + prefix = [0] + list_metadata = [{ + 'selector': [], + 'metadata': { + 'dimension': { + 'length': 1, + }, + 'schema': metadata_base.CONTAINER_SCHEMA_VERSION, + 'structural_type': 'd3m.container.list.List', + }, + }] + else: + prefix = [] + list_metadata = [] + top_metadata['schema'] = metadata_base.CONTAINER_SCHEMA_VERSION + + self.assertEqual(utils.to_json_structure(metadata.to_internal_simple_structure()), list_metadata + [{ + 'selector': prefix + [], + 'metadata': top_metadata, + }, { + 'selector': prefix + ['learningData'], + 'metadata': { + 'structural_type': 'd3m.container.pandas.DataFrame', + 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/Table', 'https://metadata.datadrivendiscovery.org/types/DatasetEntryPoint'], + 'dimension': { + 'name': 'rows', + 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/TabularRow'], + 'length': 150, + }, + }, + }, { + 'selector': prefix + ['learningData', '__ALL_ELEMENTS__'], + 'metadata': { + 'dimension': { + 'name': 'columns', + 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/TabularColumn'], + 'length': 6, + }, + }, + }, { + 'selector': prefix + ['learningData', '__ALL_ELEMENTS__', 0], + 'metadata': { + 'name': 'd3mIndex', + 'structural_type': 'str', + 'semantic_types': ['http://schema.org/Integer', 'https://metadata.datadrivendiscovery.org/types/PrimaryKey'], + }, + }, { + 'selector': prefix + ['learningData', '__ALL_ELEMENTS__', 1], + 'metadata': { + 'name': 'sepalLength', + 'structural_type': 'str', + 'semantic_types': ['http://schema.org/Float', 'https://metadata.datadrivendiscovery.org/types/Attribute'], + }, + }, { + 'selector': prefix + ['learningData', '__ALL_ELEMENTS__', 2], + 'metadata': { + 'name': 'sepalWidth', + 'structural_type': 'str', + 'semantic_types': ['http://schema.org/Float', 'https://metadata.datadrivendiscovery.org/types/Attribute'], + }, + }, { + 'selector': prefix + ['learningData', '__ALL_ELEMENTS__', 3], + 'metadata': { + 'name': 'petalLength', + 'structural_type': 'str', + 'semantic_types': ['http://schema.org/Float', 'https://metadata.datadrivendiscovery.org/types/Attribute'], + }, + }, { + 'selector': prefix + ['learningData', '__ALL_ELEMENTS__', 4], + 'metadata': { + 'name': 'petalWidth', + 'structural_type': 'str', + 'semantic_types': ['http://schema.org/Float', 'https://metadata.datadrivendiscovery.org/types/Attribute'], + }, + }, { + 'selector': prefix + ['learningData', '__ALL_ELEMENTS__', 5], + 'metadata': { + 'name': 'species', + 'structural_type': 'str', + 'semantic_types': [ + 'https://metadata.datadrivendiscovery.org/types/CategoricalData', + 'https://metadata.datadrivendiscovery.org/types/SuggestedTarget', + 'https://metadata.datadrivendiscovery.org/types/Target', + 'https://metadata.datadrivendiscovery.org/types/TrueTarget', + 'https://metadata.datadrivendiscovery.org/types/RedactedTarget', + 'https://metadata.datadrivendiscovery.org/types/MissingData', + ], + }, + }]) + + +if __name__ == '__main__': + unittest.main() diff --git a/tods/tests/common/test_train_score_split.py b/tods/tests/common/test_train_score_split.py new file mode 100644 index 0000000..b2f9a4e --- /dev/null +++ b/tods/tests/common/test_train_score_split.py @@ -0,0 +1,130 @@ +import os +import pickle +import unittest + +from d3m import container +from d3m.metadata import base as metadata_base + +from common_primitives import train_score_split + + +class TrainScoreDatasetSplitPrimitiveTestCase(unittest.TestCase): + def test_produce_train(self): + dataset_doc_path = os.path.abspath(os.path.join(os.path.dirname(__file__), '..', 'tests', 'data', 'datasets', 'iris_dataset_1', 'datasetDoc.json')) + + dataset = container.Dataset.load('file://{dataset_doc_path}'.format(dataset_doc_path=dataset_doc_path)) + + # We set semantic types like runtime would. + dataset.metadata = dataset.metadata.add_semantic_type(('learningData', metadata_base.ALL_ELEMENTS, 5), 'https://metadata.datadrivendiscovery.org/types/Target') + dataset.metadata = dataset.metadata.add_semantic_type(('learningData', metadata_base.ALL_ELEMENTS, 5), 'https://metadata.datadrivendiscovery.org/types/TrueTarget') + dataset.metadata = dataset.metadata.remove_semantic_type(('learningData', metadata_base.ALL_ELEMENTS, 5), 'https://metadata.datadrivendiscovery.org/types/Attribute') + + hyperparams_class = train_score_split.TrainScoreDatasetSplitPrimitive.metadata.get_hyperparams() + + primitive = train_score_split.TrainScoreDatasetSplitPrimitive(hyperparams=hyperparams_class.defaults().replace({ + 'shuffle': True, + })) + + primitive.set_training_data(dataset=dataset) + primitive.fit() + + # To test that pickling works. + pickle.dumps(primitive) + + results = primitive.produce(inputs=container.List([0], generate_metadata=True)).value + + self.assertEqual(len(results), 1) + + for dataset in results: + self.assertEqual(len(dataset), 1) + + self.assertEqual(results[0]['learningData'].shape[0], 112) + self.assertEqual(list(results[0]['learningData'].iloc[:, 0]), [ + '0', '1', '2', '3', '4', '5', '6', '9', '10', '11', '12', '13', '14', '15', '17', '19', '20', + '21', '23', '25', '28', '29', '30', '31', '32', '34', '35', '36', '38', '39', '41', '42', '43', + '46', '47', '48', '49', '50', '52', '53', '55', '56', '57', '58', '60', '61', '64', '65', '67', + '68', '69', '70', '72', '74', '75', '77', '79', '80', '81', '82', '85', '87', '88', '89', '91', + '92', '94', '95', '96', '98', '99', '101', '102', '103', '104', '105', '106', '108', '109', '110', + '111', '112', '113', '115', '116', '117', '118', '119', '120', '122', '123', '124', '125', '128', + '129', '130', '131', '133', '135', '136', '138', '139', '140', '141', '142', '143', '144', '145', + '146', '147', '148', '149', + ]) + + self.assertEqual(results.metadata.query((0, 'learningData'))['dimension']['length'], 112) + + column_names = ['d3mIndex', 'sepalLength', 'sepalWidth', 'petalLength', 'petalWidth', 'species'] + for i in range(6): + self.assertEqual(results.metadata.query((0, 'learningData', metadata_base.ALL_ELEMENTS, i))['name'], column_names[i]) + + self.assertEqual(results.metadata.query((0, 'learningData', metadata_base.ALL_ELEMENTS, 0))['semantic_types'], ( + "http://schema.org/Integer", "https://metadata.datadrivendiscovery.org/types/PrimaryKey" + )) + for i in range(1, 5): + self.assertEqual(results.metadata.query((0, 'learningData', metadata_base.ALL_ELEMENTS, i))['semantic_types'], ( + 'http://schema.org/Float', 'https://metadata.datadrivendiscovery.org/types/Attribute' + )) + self.assertEqual(results.metadata.query((0, 'learningData', metadata_base.ALL_ELEMENTS, 5))['semantic_types'],( + 'https://metadata.datadrivendiscovery.org/types/CategoricalData', + 'https://metadata.datadrivendiscovery.org/types/SuggestedTarget', + 'https://metadata.datadrivendiscovery.org/types/Target', + 'https://metadata.datadrivendiscovery.org/types/TrueTarget' + )) + + def test_produce_score(self): + dataset_doc_path = os.path.abspath(os.path.join(os.path.dirname(__file__), '..', 'tests', 'data', 'datasets', 'iris_dataset_1', 'datasetDoc.json')) + + dataset = container.Dataset.load('file://{dataset_doc_path}'.format(dataset_doc_path=dataset_doc_path)) + + # We set semantic types like runtime would. + dataset.metadata = dataset.metadata.add_semantic_type(('learningData', metadata_base.ALL_ELEMENTS, 5), 'https://metadata.datadrivendiscovery.org/types/Target') + dataset.metadata = dataset.metadata.add_semantic_type(('learningData', metadata_base.ALL_ELEMENTS, 5), 'https://metadata.datadrivendiscovery.org/types/TrueTarget') + dataset.metadata = dataset.metadata.remove_semantic_type(('learningData', metadata_base.ALL_ELEMENTS, 5), 'https://metadata.datadrivendiscovery.org/types/Attribute') + + hyperparams_class = train_score_split.TrainScoreDatasetSplitPrimitive.metadata.get_hyperparams() + + primitive = train_score_split.TrainScoreDatasetSplitPrimitive(hyperparams=hyperparams_class.defaults().replace({ + 'shuffle': True, + })) + + primitive.set_training_data(dataset=dataset) + primitive.fit() + + results = primitive.produce_score_data(inputs=container.List([0], generate_metadata=True)).value + + self.assertEqual(len(results), 1) + + for dataset in results: + self.assertEqual(len(dataset), 1) + + self.assertEqual(results[0]['learningData'].shape[0], 38) + self.assertEqual(list(results[0]['learningData'].iloc[:, 0]), [ + '7', '8', '16', '18', '22', '24', '26', '27', '33', '37', '40', '44', '45', '51', '54', + '59', '62', '63', '66', '71', '73', '76', '78', '83', '84', '86', '90', '93', '97', '100', + '107', '114', '121', '126', '127', '132', '134', '137', + ]) + + self.assertEqual(results.metadata.query((0, 'learningData'))['dimension']['length'], 38) + + column_names = ['d3mIndex', 'sepalLength', 'sepalWidth', 'petalLength', 'petalWidth', 'species'] + for i in range(6): + self.assertEqual(results.metadata.query((0, 'learningData', metadata_base.ALL_ELEMENTS, i))['name'], + column_names[i]) + + self.assertEqual(results.metadata.query((0, 'learningData', metadata_base.ALL_ELEMENTS, 0))['semantic_types'], ( + "http://schema.org/Integer", "https://metadata.datadrivendiscovery.org/types/PrimaryKey" + )) + for i in range(1, 5): + self.assertEqual( + results.metadata.query((0, 'learningData', metadata_base.ALL_ELEMENTS, i))['semantic_types'], ( + 'http://schema.org/Float', 'https://metadata.datadrivendiscovery.org/types/Attribute' + )) + self.assertEqual(results.metadata.query((0, 'learningData', metadata_base.ALL_ELEMENTS, 5))['semantic_types'], ( + 'https://metadata.datadrivendiscovery.org/types/CategoricalData', + 'https://metadata.datadrivendiscovery.org/types/SuggestedTarget', + 'https://metadata.datadrivendiscovery.org/types/Target', + 'https://metadata.datadrivendiscovery.org/types/TrueTarget' + )) + + +if __name__ == '__main__': + unittest.main()