From 77d72c7b86e1dbdf1071d434b2154c8f03d10218 Mon Sep 17 00:00:00 2001 From: Devesh Kumar Date: Mon, 9 Nov 2020 23:23:10 -0600 Subject: [PATCH] intial commit Former-commit-id: 94043c2300f8ed1f7d676612700421a5183ff32e [formerly b45733a1d000ab3045d6f38d6118be345f835a45] [formerly d5042bb1361c9d75d644dfc3eedc004d3dcc4dca [formerly 51bb45768e19583c06d611610d26c7c2a04b181a]] [formerly 95ffadd6d860e1f3380c089f513b735fa3fddbf5 [formerly e90788f72939b4885271198c8260feca74c1b3af] [formerly 4baa1bbd360d23adfa8dba0aea20a9e67cbe7f6b [formerly e7c713266cc151eb60ab07751720dadf7bb52133]]] [formerly 772bf095ea019fb7d3f33964e678326ac5be620a [formerly d03cf02caa122a742ddd7b61f7ed4a1fa551d840] [formerly fea615997e5ed4c0561ad88767a0b6f8bd9b4666 [formerly fcae9cb8b6ad50e3f830449a118325f8b62f4d8d]] [formerly b8967be9621245419b7b5bee1b18f6974d340a85 [formerly 55dbea29edf58c39bad8cd8a9bf2358722dd9df1] [formerly 5f76bf379a704e6ca30a32d0394b679eaca433e4 [formerly 25720b71213364ebdf13d5b6b2700aaf8037a6a8]]]] [formerly 5d0c719dce4a0c713337d6cd052e16455b2bbaf0 [formerly f5dbbf1595ae57beb49de4ae616cff731be1cdd7] [formerly 836b4006c709579450d4107e1f8145ba0f09adc7 [formerly 7e18c6b3330e70c01a42fa130cfd8695051cac82]] [formerly 7e836793075e02ba49c444518c3376057ebb378c [formerly 3f42064f1e3821a1745b7fe8ae01046406ca383f] [formerly dcdf152eeab68e09fd006acf6bb5f420f7f75738 [formerly 28079a22ff9994e13d6c6fa60c4cce8a7375d7e5]]] [formerly 103942552aa9e77bd1cf6e52a2f9378cb23661f0 [formerly 4a083d8357e7de9e3ea41137549c41d2c3f07ec4] [formerly 3faae413507425f454e2d274ae987f535a07f9c6 [formerly a9cf74645861b8b08897084769508bcc1a1204a0]] [formerly f7a29ee08b2c0c57ba95f4879a534330eea4db2b [formerly 74f3ddb1cb29da8ee00163ccecc6bbc8c8502995] [formerly 8080c82fc1f574939594ac2ba4e52f7be01813ce [formerly 527e8c2ab36d902a68f17b0f98233938909f5a6d]]]]] [formerly 9c40821770d672749081d7941613e421f6d0b372 [formerly a0a0b218c0daaf35243b13d5568c5555b6256c15] [formerly 1a6449b3da9592753ed56d712ddb9a569927708f [formerly f28562eed42d64950c30b74684dcc23669855e21]] [formerly 6d2aa951248c19e39f2bd6a1b845798ce4288e03 [formerly fe33e93915c1afe5fecbc1bc4e057f10949ac4ed] [formerly 4fc4c5d6936f27e7446e0b84bfaab4b06236342c [formerly 595c3bc4683593003480a8101c594b1baa3c56d0]]] [formerly d041990a94b005b6f3e64877db9830945ccc7602 [formerly 8928771710d8e329946b2f0d489c69f95b132090] [formerly 6ce7b1b42aa9662cd15db1980afd83fdd04e7b00 [formerly 04df89fd7ee159470e419020fc12d315132173d7]] [formerly 6a0bfe64a94a23b7cfb056c0e7662b2185e6030c [formerly e3484903429a1cfb3953061d979299663dd2cf7f] [formerly 8ff59698a34a4b5753e6b5246a36cbf46b8e7a17 [formerly 45034f2ad26dc97dcc5294579223cdc731df0830]]]] [formerly c382eece4528c1f892e9c85888eda2a6841b98c5 [formerly 9a3e9804d38f60ef94af9752c84541af041692d5] [formerly c6dd56bdfb9bda63e21ca915010ca341490370d9 [formerly 4316d74e908debdb20b5f911126796295685fa0f]] [formerly cd5cc08abfa003938ceec8505315d3a337eca638 [formerly df4670ec947a0dfe0fa022481cd33f95fac4515e] [formerly a3920bd132c271a0df677c33a64a98c59205a913 [formerly 43850829262f4b5e27d60259caa188ab76102690]]] [formerly f2d68829faec7163a23dc0552faf522bdd17bf44 [formerly d617b51f8ed4896ff07ec99170bab747da67115c] [formerly 2c0363e76139fd557654da8ae160491010c3c254 [formerly d30774ccb9b7e3dbd97536a83f2d41c38971e216]] [formerly 1aefdb7a6d1ec0e5f8ad3682c8aaa7fa65200bc0 [formerly 0983b312de7d42cd159134d891f4d67646df376e] [formerly 16af77255b1e008f20dd4dba4ec9f119c2efc6bc [formerly 351d94a1a085d941f36fc7b0d972808b6aa5562e]]]]]] Former-commit-id: 0aac720ed915c5217616f5e245f26aa9343a4d98 [formerly 4c9b0637b00704e8b71835c46bc3e3e83e014630] [formerly e7ab703570838b27d396aa047782d8cccb537f03 [formerly 065727f4b8a5801d15afd2b91fcfce2b42e244f3]] [formerly a919abf3ebf5f473d10f3392ddba67d54657f294 [formerly 6fa494b0b32eb41c6990da0c791b8dee1925e459] [formerly d7e6376187c4b324a898f0a196888fe85d4832a1 [formerly ceee8b5ad677746fe4ff9b73aef020150ca4814b]]] [formerly 8135d4aaf571470296ee135c1dcd554d823096ff [formerly 0477e8c3775b95022a8236a17b68a899e3e36ad0] [formerly 78272aa1a38dcacfafcc64e00e7c43baadf2312a [formerly 43b5408fd257f569355f6db4aad80a13a01dd4d1]] [formerly 8364ea76fab1d7cc00865851b8ddee5470ff6a00 [formerly 14305a7625964855833da7722453f746c41e9989] [formerly 3f6573a401ab18783ed4b87461eb2f8912333c7f [formerly c56850302d11ba87fe5a58d1651a1f80559cc3b8]]]] [formerly 4e2575e559fc0e853565324819ee22b207c611ba [formerly d8068f502691af1123a878a4099c33c3eab6201c] [formerly e51a3dd42fb3f4fd9fb15006b8201487f6a0f9d1 [formerly 81dd7e2f1d4b73a47d7e2e8ca501e98f4c59e5a1]] [formerly f604061af2b80906784d5d3a8dde03a6f7a2dfa2 [formerly e407b5d0b1d393a0dd4eec1fb54d718ff40cda39] [formerly 8d3602120d6406a4d3d139b17e1b2e73cf13b1f3 [formerly e563b7e7c551a2313e07d4d08e50f181c29172d7]]] [formerly aab553bb2f4be77f9123932ac6be519671f862ae [formerly 631d8d76c34a457e48e4055dd3616c945507162d] [formerly 1cec7b5f2f5d1b97ad657b8dc7a8ced100b34ff9 [formerly 945f34dbca68c3b9ee3c0e14fb19cd010f4670a3]] [formerly 080e18f0be8b6bf3adb7b953272e36998fa2562a [formerly f1132fa762a1232558c7ce18361687caf079305b] [formerly 16af77255b1e008f20dd4dba4ec9f119c2efc6bc]]]] Former-commit-id: ff7eb7ad1865310dc4622b5e989481d8e310ef08 [formerly e2e1163ef1ffae29d02ba78fd6b638103a07ea80] [formerly abc1fff948774fff8b34a8495551d49273f7e3d5 [formerly da6d629afa117ab8844235eadaf27286820c6398]] [formerly ff7dfe520b20803d969820dce67e0d59c1c81991 [formerly 015cc45265c6cb41598ae070e97cc7c64bc8148b] [formerly 8f9148b059ddcedfd3c43a53d881b95034ff01e4 [formerly 4d8370bc4514c6724cd6beb5855d1345c24d9a1c]]] [formerly be941c6dc41dd0a97692c4767657c3549051c11a [formerly dde58294984f723486c7a52e1e07a79bd6f0971e] [formerly 69d9c1a20a6b67623bf315359a8a49638a564e3c [formerly fcc8faa99322b698e34cb61e118e50fc68bc7a45]] [formerly 71bb6b09def26ec2dee4340a6d7af101846fc5c8 [formerly 7cb44f0d9d184d43e6dbf8081853ab88c479e577] [formerly 282bb412613c4ac759d8fa2f8a960935978c0511 [formerly 6d9106988971fce22763242d9c0017e16d1d4fc2]]]] Former-commit-id: e5a920084c8dc03117d73b27bb4776b9b9b4fe4d [formerly e8ac010da9dc53fb1de4d8287d48ae15dff2b8bc] [formerly dc836f921f454728d1987e9b8f90968d0d19f7c7 [formerly 6fbd3b41e4ca7c94956536012a7e7d8617612f50]] [formerly 710c662c1423b614a47481a625114dc6e5bc47d9 [formerly 737958f86dad54fc51d487e8ad08bebc63096806] [formerly b38a85b15158c020114ef013400f74fc4b0e76d0 [formerly e9ce6a2129327c275c1dc6e06e2545a1c4929759]]] Former-commit-id: 46f2a522ddf879112539d13753985f079d416ad4 [formerly ba84173dab2fe91ad2766482689836708c49f7de] [formerly 7990bdcb4c348a53c612c5244331f14d3ace2ea0 [formerly b6dc6ec409b20d439d2963473d3d37b38cb55b16]] Former-commit-id: 90c1b4b06eb7da5638b2cb73219910b904d8596c [formerly aab646e394843e53c2ecaea90fe286a5f2b38841] Former-commit-id: 7532188e1b60dd2bd4853544e9a998d1d3ff74d4 --- new_tests/build_ABOD_pipline.py | 70 ++++ new_tests/build_AutoEncoder.py | 67 ++++ new_tests/build_AutoRegODetect_pipeline.py | 71 ++++ new_tests/build_AxiswiseScale_pipline.py | 50 +++ new_tests/build_BKFilter_pipline.py | 44 +++ new_tests/build_CBLOF_pipline.py | 51 +++ new_tests/build_CategoricalToBinary.py | 48 +++ new_tests/build_ColumnFilter_pipeline.py | 49 +++ new_tests/build_ContinuityValidation_pipline.py | 43 +++ new_tests/build_DeepLog_pipeline.py | 49 +++ new_tests/build_DiscreteCosineTransform.py | 50 +++ new_tests/build_DuplicationValidation_pipline.py | 42 +++ new_tests/build_FastFourierTransform.py | 48 +++ new_tests/build_HBOS_pipline.py | 68 ++++ new_tests/build_HBOS_score_pipline.py | 71 ++++ new_tests/build_HPFilter_pipline.py | 46 +++ new_tests/build_HoltSmoothing_pipline.py | 76 ++++ ...uild_HoltWintersExponentialSmoothing_pipline.py | 76 ++++ new_tests/build_IsolationForest_pipline.py | 59 +++ new_tests/build_KDiscord_pipeline.py | 71 ++++ new_tests/build_KNN_pipline.py | 51 +++ new_tests/build_LODA_pipline.py | 51 +++ new_tests/build_LOF_pipline.py | 51 +++ new_tests/build_LSTMOD_pipline.py | 70 ++++ new_tests/build_MatrixProfile_pipeline.py | 49 +++ new_tests/build_MeanAverageTransform_pipline.py | 77 ++++ new_tests/build_NonNegativeMatrixFactorization.py | 50 +++ new_tests/build_OCSVM_pipline.py | 51 +++ new_tests/build_PCAODetect_pipeline.py | 71 ++++ new_tests/build_PowerTransform_pipline.py | 49 +++ new_tests/build_PyodCOF.py | 51 +++ new_tests/build_QuantileTransform_pipline.py | 49 +++ new_tests/build_RuleBasedFilter_pipline.py | 54 +++ new_tests/build_SOD_pipeline.py | 49 +++ .../build_SimpleExponentialSmoothing_pipline.py | 76 ++++ new_tests/build_Standardize_pipline.py | 49 +++ new_tests/build_TRMF_pipline.py | 44 +++ new_tests/build_Telemanom.py | 48 +++ new_tests/build_TimeIntervalTransform_pipeline.py | 86 +++++ new_tests/build_TruncatedSVD_pipline.py | 44 +++ new_tests/build_VariationalAutoEncoder.py | 67 ++++ new_tests/build_WaveletTransform_pipline.py | 64 ++++ .../build_test_detection_algorithm_PyodMoGaal.py | 50 +++ .../build_test_detection_algorithm_PyodSoGaal.py | 50 +++ ...nalysis_spectral_residual_transform_pipeline.py | 61 ++++ ...test_feature_analysis_statistical_abs_energy.py | 62 ++++ ...ld_test_feature_analysis_statistical_abs_sum.py | 62 ++++ ...uild_test_feature_analysis_statistical_gmean.py | 62 ++++ ...uild_test_feature_analysis_statistical_hmean.py | 62 ++++ ...d_test_feature_analysis_statistical_kurtosis.py | 62 ++++ ...ld_test_feature_analysis_statistical_maximum.py | 62 ++++ ...build_test_feature_analysis_statistical_mean.py | 62 ++++ ...d_test_feature_analysis_statistical_mean_abs.py | 62 ++++ ...sis_statistical_mean_abs_temporal_derivative.py | 62 ++++ ...nalysis_statistical_mean_temporal_derivative.py | 62 ++++ ...ild_test_feature_analysis_statistical_median.py | 62 ++++ ...alysis_statistical_median_absolute_deviation.py | 63 ++++ ...ld_test_feature_analysis_statistical_minimum.py | 62 ++++ ...build_test_feature_analysis_statistical_skew.py | 62 ++++ .../build_test_feature_analysis_statistical_std.py | 62 ++++ .../build_test_feature_analysis_statistical_var.py | 62 ++++ ..._test_feature_analysis_statistical_variation.py | 62 ++++ ...ld_test_feature_analysis_statistical_vec_sum.py | 62 ++++ ...ture_analysis_statistical_willison_amplitude.py | 62 ++++ ...t_feature_analysis_statistical_zero_crossing.py | 62 ++++ ..._time_series_seasonality_trend_decomposition.py | 61 ++++ test.sh | 2 +- tods/data_processing/ColumnParser.py | 398 +++++++++++++++++++++ tods/data_processing/ContructPredictions.py | 261 ++++++++++++++ .../ExtractColumnsBySemanticTypes.py | 141 ++++++++ tods/resources/.entry_points.ini | 4 +- 71 files changed, 4667 insertions(+), 2 deletions(-) create mode 100644 new_tests/build_ABOD_pipline.py create mode 100644 new_tests/build_AutoEncoder.py create mode 100644 new_tests/build_AutoRegODetect_pipeline.py create mode 100644 new_tests/build_AxiswiseScale_pipline.py create mode 100644 new_tests/build_BKFilter_pipline.py create mode 100644 new_tests/build_CBLOF_pipline.py create mode 100644 new_tests/build_CategoricalToBinary.py create mode 100644 new_tests/build_ColumnFilter_pipeline.py create mode 100644 new_tests/build_ContinuityValidation_pipline.py create mode 100644 new_tests/build_DeepLog_pipeline.py create mode 100644 new_tests/build_DiscreteCosineTransform.py create mode 100644 new_tests/build_DuplicationValidation_pipline.py create mode 100644 new_tests/build_FastFourierTransform.py create mode 100644 new_tests/build_HBOS_pipline.py create mode 100644 new_tests/build_HBOS_score_pipline.py create mode 100644 new_tests/build_HPFilter_pipline.py create mode 100644 new_tests/build_HoltSmoothing_pipline.py create mode 100644 new_tests/build_HoltWintersExponentialSmoothing_pipline.py create mode 100644 new_tests/build_IsolationForest_pipline.py create mode 100644 new_tests/build_KDiscord_pipeline.py create mode 100644 new_tests/build_KNN_pipline.py create mode 100644 new_tests/build_LODA_pipline.py create mode 100644 new_tests/build_LOF_pipline.py create mode 100644 new_tests/build_LSTMOD_pipline.py create mode 100644 new_tests/build_MatrixProfile_pipeline.py create mode 100644 new_tests/build_MeanAverageTransform_pipline.py create mode 100644 new_tests/build_NonNegativeMatrixFactorization.py create mode 100644 new_tests/build_OCSVM_pipline.py create mode 100644 new_tests/build_PCAODetect_pipeline.py create mode 100644 new_tests/build_PowerTransform_pipline.py create mode 100644 new_tests/build_PyodCOF.py create mode 100644 new_tests/build_QuantileTransform_pipline.py create mode 100644 new_tests/build_RuleBasedFilter_pipline.py create mode 100644 new_tests/build_SOD_pipeline.py create mode 100644 new_tests/build_SimpleExponentialSmoothing_pipline.py create mode 100644 new_tests/build_Standardize_pipline.py create mode 100644 new_tests/build_TRMF_pipline.py create mode 100644 new_tests/build_Telemanom.py create mode 100644 new_tests/build_TimeIntervalTransform_pipeline.py create mode 100644 new_tests/build_TruncatedSVD_pipline.py create mode 100644 new_tests/build_VariationalAutoEncoder.py create mode 100644 new_tests/build_WaveletTransform_pipline.py create mode 100644 new_tests/build_test_detection_algorithm_PyodMoGaal.py create mode 100644 new_tests/build_test_detection_algorithm_PyodSoGaal.py create mode 100644 new_tests/build_test_feature_analysis_spectral_residual_transform_pipeline.py create mode 100644 new_tests/build_test_feature_analysis_statistical_abs_energy.py create mode 100644 new_tests/build_test_feature_analysis_statistical_abs_sum.py create mode 100644 new_tests/build_test_feature_analysis_statistical_gmean.py create mode 100644 new_tests/build_test_feature_analysis_statistical_hmean.py create mode 100644 new_tests/build_test_feature_analysis_statistical_kurtosis.py create mode 100644 new_tests/build_test_feature_analysis_statistical_maximum.py create mode 100644 new_tests/build_test_feature_analysis_statistical_mean.py create mode 100644 new_tests/build_test_feature_analysis_statistical_mean_abs.py create mode 100644 new_tests/build_test_feature_analysis_statistical_mean_abs_temporal_derivative.py create mode 100644 new_tests/build_test_feature_analysis_statistical_mean_temporal_derivative.py create mode 100644 new_tests/build_test_feature_analysis_statistical_median.py create mode 100644 new_tests/build_test_feature_analysis_statistical_median_absolute_deviation.py create mode 100644 new_tests/build_test_feature_analysis_statistical_minimum.py create mode 100644 new_tests/build_test_feature_analysis_statistical_skew.py create mode 100644 new_tests/build_test_feature_analysis_statistical_std.py create mode 100644 new_tests/build_test_feature_analysis_statistical_var.py create mode 100644 new_tests/build_test_feature_analysis_statistical_variation.py create mode 100644 new_tests/build_test_feature_analysis_statistical_vec_sum.py create mode 100644 new_tests/build_test_feature_analysis_statistical_willison_amplitude.py create mode 100644 new_tests/build_test_feature_analysis_statistical_zero_crossing.py create mode 100644 new_tests/build_test_time_series_seasonality_trend_decomposition.py create mode 100644 tods/data_processing/ColumnParser.py create mode 100644 tods/data_processing/ContructPredictions.py create mode 100644 tods/data_processing/ExtractColumnsBySemanticTypes.py diff --git a/new_tests/build_ABOD_pipline.py b/new_tests/build_ABOD_pipline.py new file mode 100644 index 0000000..5faccc2 --- /dev/null +++ b/new_tests/build_ABOD_pipline.py @@ -0,0 +1,70 @@ +from d3m import index +from d3m.metadata.base import ArgumentType +from d3m.metadata.pipeline import Pipeline, PrimitiveStep + + +# Creating pipeline +pipeline_description = Pipeline() +pipeline_description.add_input(name='inputs') + +# Step 0: dataset_to_dataframe +step_0 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.data_processing.dataset_to_dataframe')) +step_0.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='inputs.0') +step_0.add_output('produce') +pipeline_description.add_step(step_0) + +# Step 1: column_parser +step_1 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.data_processing.column_parser')) +step_1.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce') +step_1.add_output('produce') +pipeline_description.add_step(step_1) + +# Step 2: extract_columns_by_semantic_types(attributes) +step_2 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.data_processing.extract_columns_by_semantic_types')) +step_2.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce') +step_2.add_output('produce') +step_2.add_hyperparameter(name='semantic_types', argument_type=ArgumentType.VALUE, + data=['https://metadata.datadrivendiscovery.org/types/Attribute']) +pipeline_description.add_step(step_2) + +# Step 3: extract_columns_by_semantic_types(targets) +step_3 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.data_processing.extract_columns_by_semantic_types')) +step_3.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce') +step_3.add_output('produce') +step_3.add_hyperparameter(name='semantic_types', argument_type=ArgumentType.VALUE, + data=['https://metadata.datadrivendiscovery.org/types/TrueTarget']) +pipeline_description.add_step(step_3) + +attributes = 'steps.2.produce' +targets = 'steps.3.produce' + +# Step 4: imputer +step_4 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.data_processing.impute_missing')) +step_4.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference=attributes) +step_4.add_output('produce') +pipeline_description.add_step(step_4) + +# Step 5: ABOD +step_5 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.detection_algorithm.pyod_abod')) +step_5.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.4.produce') + +step_5.add_hyperparameter(name='use_semantic_types', argument_type=ArgumentType.VALUE, data=True) +step_5.add_hyperparameter(name='contamination', argument_type=ArgumentType.VALUE, data=0.1) +step_5.add_hyperparameter(name='use_columns', argument_type=ArgumentType.VALUE, data=(2, 4,)) +step_5.add_hyperparameter(name='return_result', argument_type=ArgumentType.VALUE, data='replace') + +step_5.add_output('produce') +pipeline_description.add_step(step_5) + +# Final Output +pipeline_description.add_output(name='output predictions', data_reference='steps.5.produce') + +# Output to YAML +yaml = pipeline_description.to_yaml() +with open('pipeline.yml', 'w') as f: + f.write(yaml) +print(yaml) + +# Or you can output json +#data = pipline_description.to_json() + diff --git a/new_tests/build_AutoEncoder.py b/new_tests/build_AutoEncoder.py new file mode 100644 index 0000000..7482be5 --- /dev/null +++ b/new_tests/build_AutoEncoder.py @@ -0,0 +1,67 @@ +from d3m import index +from d3m.metadata.base import ArgumentType +from d3m.metadata.pipeline import Pipeline, PrimitiveStep + +# -> dataset_to_dataframe -> column_parser -> extract_columns_by_semantic_types(attributes) -> imputer -> random_forest +# extract_columns_by_semantic_types(targets) -> ^ + +# Creating pipeline +pipeline_description = Pipeline() +pipeline_description.add_input(name='inputs') + +# Step 0: dataset_to_dataframe +step_0 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.data_processing.dataset_to_dataframe')) +step_0.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='inputs.0') +step_0.add_output('produce') +pipeline_description.add_step(step_0) + +# Step 1: column_parser +step_1 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.data_processing.column_parser')) +step_1.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce') +step_1.add_output('produce') +pipeline_description.add_step(step_1) + +# Step 2: extract_columns_by_semantic_types(attributes) +step_2 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.data_processing.extract_columns_by_semantic_types')) +step_2.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce') +step_2.add_output('produce') +step_2.add_hyperparameter(name='semantic_types', argument_type=ArgumentType.VALUE, + data=['https://metadata.datadrivendiscovery.org/types/Attribute']) +pipeline_description.add_step(step_2) + +# Step 3: extract_columns_by_semantic_types(targets) +step_3 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.data_processing.extract_columns_by_semantic_types')) +step_3.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce') +step_3.add_output('produce') +step_3.add_hyperparameter(name='semantic_types', argument_type=ArgumentType.VALUE, + data=['https://metadata.datadrivendiscovery.org/types/TrueTarget']) +pipeline_description.add_step(step_3) + +attributes = 'steps.2.produce' +targets = 'steps.3.produce' + +# Step 4: imputer +step_4 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.data_processing.impute_missing')) +step_4.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference=attributes) +step_4.add_output('produce') +pipeline_description.add_step(step_4) + +# Step 5: auto encoder +step_5 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.detection_algorithm.pyod_ae')) +step_5.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference=attributes) +step_5.add_output('produce') +pipeline_description.add_step(step_5) + + +# Final Output +pipeline_description.add_output(name='output predictions', data_reference='steps.5.produce') + +# Output to YAML +yaml = pipeline_description.to_yaml() +with open('pipeline.yml', 'w') as f: + f.write(yaml) +print(yaml) + +# Or you can output json +#data = pipline_description.to_json() + diff --git a/new_tests/build_AutoRegODetect_pipeline.py b/new_tests/build_AutoRegODetect_pipeline.py new file mode 100644 index 0000000..e6debfa --- /dev/null +++ b/new_tests/build_AutoRegODetect_pipeline.py @@ -0,0 +1,71 @@ +from d3m import index +from d3m.metadata.base import ArgumentType +from d3m.metadata.pipeline import Pipeline, PrimitiveStep +from d3m.metadata import hyperparams +import numpy as np + +# -> dataset_to_dataframe -> column_parser -> extract_columns_by_semantic_types(attributes) -> imputer -> random_forest +# extract_columns_by_semantic_types(targets) -> ^ + +# Creating pipeline +pipeline_description = Pipeline() +pipeline_description.add_input(name='inputs') + +# Step 0: dataset_to_dataframe +primitive_0 = index.get_primitive('d3m.primitives.tods.data_processing.dataset_to_dataframe') +step_0 = PrimitiveStep(primitive=primitive_0) +step_0.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='inputs.0') +step_0.add_output('produce') +pipeline_description.add_step(step_0) + +# # Step 1: column_parser +primitive_1 = index.get_primitive('d3m.primitives.tods.data_processing.column_parser') +step_1 = PrimitiveStep(primitive=primitive_1) +step_1.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce') +step_1.add_output('produce') +pipeline_description.add_step(step_1) + +# Step 2: extract_columns_by_semantic_types(attributes) +step_2 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.data_processing.extract_columns_by_semantic_types')) +step_2.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce') +step_2.add_output('produce') +step_2.add_hyperparameter(name='semantic_types', argument_type=ArgumentType.VALUE, data=['https://metadata.datadrivendiscovery.org/types/Attribute']) +pipeline_description.add_step(step_2) + +# # Step 3: Standardization +primitive_3 = index.get_primitive('d3m.primitives.tods.timeseries_processing.transformation.standard_scaler') +step_3 = PrimitiveStep(primitive=primitive_3) +step_3.add_hyperparameter(name='use_semantic_types', argument_type=ArgumentType.VALUE, data=True) +step_3.add_hyperparameter(name='use_columns', argument_type=ArgumentType.VALUE, data=(1,2,3,4,5,)) +step_3.add_hyperparameter(name='return_result', argument_type=ArgumentType.VALUE, data='new') +step_3.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.2.produce') +step_3.add_output('produce') +pipeline_description.add_step(step_3) + +# # Step 4: test primitive +primitive_4 = index.get_primitive('d3m.primitives.tods.detection_algorithm.AutoRegODetector') +step_4 = PrimitiveStep(primitive=primitive_4) +step_4.add_hyperparameter(name='contamination', argument_type=ArgumentType.VALUE, data=0.1) +step_4.add_hyperparameter(name='window_size', argument_type=ArgumentType.VALUE, data=10) +# step_4.add_hyperparameter(name='weights', argument_type=ArgumentType.VALUE, data=weights_ndarray) +step_4.add_hyperparameter(name='use_semantic_types', argument_type=ArgumentType.VALUE, data=False) +# step_4.add_hyperparameter(name='use_columns', argument_type=ArgumentType.VALUE, data=(2,3,4,5,6)) # There is sth wrong with multi-dimensional +step_4.add_hyperparameter(name='return_result', argument_type=ArgumentType.VALUE, data='append') +step_4.add_hyperparameter(name='return_subseq_inds', argument_type=ArgumentType.VALUE, data=True) +step_4.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.3.produce') +step_4.add_output('produce') +step_4.add_output('produce_score') +pipeline_description.add_step(step_4) + +# Final Output +pipeline_description.add_output(name='output predictions', data_reference='steps.4.produce') + +# Output to YAML +yaml = pipeline_description.to_yaml() +with open('pipeline.yml', 'w') as f: + f.write(yaml) +print(yaml) + +# Or you can output json +#data = pipline_description.to_json() + diff --git a/new_tests/build_AxiswiseScale_pipline.py b/new_tests/build_AxiswiseScale_pipline.py new file mode 100644 index 0000000..3352f48 --- /dev/null +++ b/new_tests/build_AxiswiseScale_pipline.py @@ -0,0 +1,50 @@ +from d3m import index +from d3m.metadata.base import ArgumentType +from d3m.metadata.pipeline import Pipeline, PrimitiveStep +from d3m.metadata import hyperparams +import copy + +# -> dataset_to_dataframe -> column_parser -> extract_columns_by_semantic_types(attributes) -> imputer -> random_forest +# extract_columns_by_semantic_types(targets) -> ^ + +# Creating pipeline +pipeline_description = Pipeline() +pipeline_description.add_input(name='inputs') + +# Step 0: dataset_to_dataframe +primitive_0 = index.get_primitive('d3m.primitives.tods.data_processing.dataset_to_dataframe') +step_0 = PrimitiveStep(primitive=primitive_0) +step_0.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='inputs.0') +step_0.add_output('produce') +pipeline_description.add_step(step_0) + +# # Step 1: column_parser +primitive_1 = index.get_primitive('d3m.primitives.tods.data_processing.column_parser') +step_1 = PrimitiveStep(primitive=primitive_1) +step_1.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce') +step_1.add_output('produce') +pipeline_description.add_step(step_1) + +# # Step 2: test primitive +primitive_2 = index.get_primitive('d3m.primitives.tods.timeseries_processing.transformation.axiswise_scaler') +step_2 = PrimitiveStep(primitive=primitive_2) +step_2.add_hyperparameter(name='use_semantic_types', argument_type=ArgumentType.VALUE, data=True) +step_2.add_hyperparameter(name='use_columns', argument_type=ArgumentType.VALUE, data=(2,)) +step_2.add_hyperparameter(name='return_result', argument_type=ArgumentType.VALUE, data='append') +step_2.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce') +step_2.add_output('produce') +pipeline_description.add_step(step_2) + + +# Final Output +pipeline_description.add_output(name='output predictions', data_reference='steps.2.produce') + +# Output to YAML +yaml = pipeline_description.to_yaml() +with open('pipeline.yml', 'w') as f: + f.write(yaml) +print(yaml) + +# Or you can output json +#data = pipline_description.to_json() + diff --git a/new_tests/build_BKFilter_pipline.py b/new_tests/build_BKFilter_pipline.py new file mode 100644 index 0000000..c2b306f --- /dev/null +++ b/new_tests/build_BKFilter_pipline.py @@ -0,0 +1,44 @@ +from d3m import index +from d3m.metadata.base import ArgumentType +from d3m.metadata.pipeline import Pipeline, PrimitiveStep + + +# Creating pipeline +pipeline_description = Pipeline() +pipeline_description.add_input(name='inputs') + +# Step 0: dataset_to_dataframe +step_0 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.data_processing.dataset_to_dataframe')) +step_0.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='inputs.0') +step_0.add_output('produce') +pipeline_description.add_step(step_0) + + +# Step 1: column_parser +step_1 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.data_processing.column_parser')) +step_1.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce') +step_1.add_output('produce') +pipeline_description.add_step(step_1) + + +# Step 2: BKFilter +step_2 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.feature_analysis.bk_filter')) +# step_2.add_hyperparameter(name = 'columns_using_method', argument_type=ArgumentType.VALUE, data = 'name') +step_2.add_hyperparameter(name = 'use_semantic_types', argument_type=ArgumentType.VALUE, data = True) +step_2.add_hyperparameter(name = 'use_columns', argument_type=ArgumentType.VALUE, data = (2,3)) +step_2.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce') +step_2.add_output('produce') +pipeline_description.add_step(step_2) + +# Final Output +pipeline_description.add_output(name='output predictions', data_reference='steps.2.produce') + +# Output to YAML +yaml = pipeline_description.to_yaml() +with open('pipeline.yml', 'w') as f: + f.write(yaml) +print(yaml) + +# Or you can output json +#data = pipline_description.to_json() + diff --git a/new_tests/build_CBLOF_pipline.py b/new_tests/build_CBLOF_pipline.py new file mode 100644 index 0000000..2180b6d --- /dev/null +++ b/new_tests/build_CBLOF_pipline.py @@ -0,0 +1,51 @@ +from d3m import index +from d3m.metadata.base import ArgumentType +from d3m.metadata.pipeline import Pipeline, PrimitiveStep +from d3m.metadata import hyperparams +import copy + +# -> dataset_to_dataframe -> column_parser -> extract_columns_by_semantic_types(attributes) -> imputer -> random_forest +# extract_columns_by_semantic_types(targets) -> ^ + +# Creating pipeline +pipeline_description = Pipeline() +pipeline_description.add_input(name='inputs') + +# Step 0: dataset_to_dataframe +primitive_0 = index.get_primitive('d3m.primitives.tods.data_processing.dataset_to_dataframe') +step_0 = PrimitiveStep(primitive=primitive_0) +step_0.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='inputs.0') +step_0.add_output('produce') +pipeline_description.add_step(step_0) + +# # Step 1: column_parser +primitive_1 = index.get_primitive('d3m.primitives.tods.data_processing.column_parser') +step_1 = PrimitiveStep(primitive=primitive_1) +step_1.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce') +step_1.add_output('produce') +pipeline_description.add_step(step_1) + +# # Step 2: test primitive +primitive_2 = index.get_primitive('d3m.primitives.tods.detection_algorithm.pyod_cblof') + +step_2 = PrimitiveStep(primitive=primitive_2) +step_2.add_hyperparameter(name='contamination', argument_type=ArgumentType.VALUE, data=0.1) +step_2.add_hyperparameter(name='use_semantic_types', argument_type=ArgumentType.VALUE, data=True) +step_2.add_hyperparameter(name='use_columns', argument_type=ArgumentType.VALUE, data=(2,)) # There is sth wrong with multi-dimensional +step_2.add_hyperparameter(name='return_result', argument_type=ArgumentType.VALUE, data='append') +step_2.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce') +step_2.add_output('produce') +pipeline_description.add_step(step_2) + +# Final Output +pipeline_description.add_output(name='output predictions', data_reference='steps.2.produce') + +# Output to YAML +yaml = pipeline_description.to_yaml() +with open('pipeline.yml', 'w') as f: + f.write(yaml) +print(yaml) + +# Or you can output json +#data = pipline_description.to_json() + diff --git a/new_tests/build_CategoricalToBinary.py b/new_tests/build_CategoricalToBinary.py new file mode 100644 index 0000000..9f9782e --- /dev/null +++ b/new_tests/build_CategoricalToBinary.py @@ -0,0 +1,48 @@ +from d3m import index +from d3m.metadata.base import ArgumentType +from d3m.metadata.pipeline import Pipeline, PrimitiveStep + +# -> dataset_to_dataframe -> column_parser -> extract_columns_by_semantic_types(attributes) -> imputer -> random_forest +# extract_columns_by_semantic_types(targets) -> ^ + +# Creating pipeline +pipeline_description = Pipeline() +pipeline_description.add_input(name='inputs') + + +# Step 0: dataset_to_dataframe +primitive_0 = index.get_primitive('d3m.primitives.tods.data_processing.dataset_to_dataframe') +step_0 = PrimitiveStep(primitive=primitive_0) +step_0.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='inputs.0') +step_0.add_output('produce') +pipeline_description.add_step(step_0) + +# Step 1: Column Parser +primitive_1 = index.get_primitive('d3m.primitives.tods.data_processing.column_parser') +step_1 = PrimitiveStep(primitive=primitive_1) +step_1.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce') +step_1.add_output('produce') +pipeline_description.add_step(step_1) + +# Step 2: Categorical to Binary +primitive_2 = index.get_primitive('d3m.primitives.tods.data_processing.categorical_to_binary') +step_2 = PrimitiveStep(primitive=primitive_2) +step_2.add_hyperparameter(name='use_semantic_types', argument_type=ArgumentType.VALUE, data=True) +step_2.add_hyperparameter(name='use_columns', argument_type=ArgumentType.VALUE, data=(3,)) +step_2.add_hyperparameter(name='return_result', argument_type=ArgumentType.VALUE, data='append') +step_2.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce') +step_2.add_output('produce') +pipeline_description.add_step(step_2) + + +# Final Output +pipeline_description.add_output(name='output predictions', data_reference='steps.2.produce') + +# Output to YAML +yaml = pipeline_description.to_yaml() +with open('pipeline.yml', 'w') as f: + f.write(yaml) +print(yaml) + +# Or you can output json +#data = pipline_description.to_json() diff --git a/new_tests/build_ColumnFilter_pipeline.py b/new_tests/build_ColumnFilter_pipeline.py new file mode 100644 index 0000000..3dd3be3 --- /dev/null +++ b/new_tests/build_ColumnFilter_pipeline.py @@ -0,0 +1,49 @@ +from d3m import index +from d3m.metadata.base import ArgumentType +from d3m.metadata.pipeline import Pipeline, PrimitiveStep + +# -> dataset_to_dataframe -> column_parser -> extract_columns_by_semantic_types(attributes) -> imputer -> random_forest +# extract_columns_by_semantic_types(targets) -> ^ + +# Creating pipeline +pipeline_description = Pipeline() +pipeline_description.add_input(name='inputs') + +# Step 0: dataset_to_dataframe +primitive_0 = index.get_primitive('d3m.primitives.data_transformation.dataset_to_dataframe.Common') +step_0 = PrimitiveStep(primitive=primitive_0) +step_0.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='inputs.0') +step_0.add_output('produce') +pipeline_description.add_step(step_0) + +#Step 1: column_parser +step_1 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.data_processing.column_parser')) +step_1.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce') +step_1.add_output('produce') +pipeline_description.add_step(step_1) + +primitive_2 = index.get_primitive('d3m.primitives.tods.feature_analysis.auto_correlation') +step_2 = PrimitiveStep(primitive=primitive_2) +step_2.add_hyperparameter(name="use_semantic_types", argument_type=ArgumentType.VALUE, data = True) +step_2.add_hyperparameter(name="use_columns", argument_type=ArgumentType.VALUE, data = (2, 3)) +step_2.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce') +step_2.add_output('produce') +pipeline_description.add_step(step_2) + +primitive_3 = index.get_primitive('d3m.primitives.tods.data_processing.column_filter') +step_3 = PrimitiveStep(primitive=primitive_3) +step_3.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.2.produce') +step_3.add_output('produce') +pipeline_description.add_step(step_3) + +# Final Output +pipeline_description.add_output(name='output predictions', data_reference='steps.3.produce') + +# Output to YAML +yaml = pipeline_description.to_yaml() +with open('pipeline.yml', 'w') as f: + f.write(yaml) +print(yaml) + +# Or you can output json +#data = pipline_description.to_json() diff --git a/new_tests/build_ContinuityValidation_pipline.py b/new_tests/build_ContinuityValidation_pipline.py new file mode 100644 index 0000000..3b76d84 --- /dev/null +++ b/new_tests/build_ContinuityValidation_pipline.py @@ -0,0 +1,43 @@ +from d3m import index +from d3m.metadata.base import ArgumentType +from d3m.metadata.pipeline import Pipeline, PrimitiveStep + +# Creating pipeline +pipeline_description = Pipeline() +pipeline_description.add_input(name='inputs') + +# Step 0: dataset_to_dataframe +step_0 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.data_processing.dataset_to_dataframe')) +step_0.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='inputs.0') +step_0.add_output('produce') +pipeline_description.add_step(step_0) + +# Step 1: column_parser +step_1 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.data_processing.column_parser')) +step_1.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce') +step_1.add_output('produce') +pipeline_description.add_step(step_1) + + +# Step 2: ContinuityValidation +step_2 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.data_processing.continuity_validation')) +step_2.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce') +step_2.add_output('produce') +step_2.add_hyperparameter(name = 'continuity_option', argument_type=ArgumentType.VALUE, data = 'imputation') +step_2.add_hyperparameter(name = 'interval', argument_type=ArgumentType.VALUE, data = 0.3) +# Or: +# step_2.add_hyperparameter(name = 'continuity_option', argument_type=ArgumentType.VALUE, data = 'ablation') +pipeline_description.add_step(step_2) + +# Final Output +pipeline_description.add_output(name='output predictions', data_reference='steps.2.produce') + +# Output to YAML +yaml = pipeline_description.to_yaml() +with open('pipeline.yml', 'w') as f: + f.write(yaml) +print(yaml) + +# Or you can output json +#data = pipline_description.to_json() + diff --git a/new_tests/build_DeepLog_pipeline.py b/new_tests/build_DeepLog_pipeline.py new file mode 100644 index 0000000..110c6d3 --- /dev/null +++ b/new_tests/build_DeepLog_pipeline.py @@ -0,0 +1,49 @@ +from d3m import index +from d3m.metadata.base import ArgumentType +from d3m.metadata.pipeline import Pipeline, PrimitiveStep +from d3m.metadata import hyperparams + +# -> dataset_to_dataframe -> column_parser -> extract_columns_by_semantic_types(attributes) -> imputer -> random_forest +# extract_columns_by_semantic_types(targets) -> ^ + +# Creating pipeline +pipeline_description = Pipeline() +pipeline_description.add_input(name='inputs') + +# Step 0: dataset_to_dataframe +primitive_0 = index.get_primitive('d3m.primitives.data_transformation.dataset_to_dataframe.Common') +step_0 = PrimitiveStep(primitive=primitive_0) +step_0.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='inputs.0') +step_0.add_output('produce') +pipeline_description.add_step(step_0) + +# # Step 1: column_parser +primitive_1 = index.get_primitive('d3m.primitives.tods.data_processing.column_parser') +step_1 = PrimitiveStep(primitive=primitive_1) +step_1.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce') +step_1.add_output('produce') +pipeline_description.add_step(step_1) + +# # Step 2: test primitive +primitive_2 = index.get_primitive('d3m.primitives.tods.detection_algorithm.deeplog') + +step_2 = PrimitiveStep(primitive=primitive_2) +#step_2.add_hyperparameter(name='contamination', argument_type=ArgumentType.VALUE, data=0.1) +step_2.add_hyperparameter(name='use_semantic_types', argument_type=ArgumentType.VALUE, data=True) +step_2.add_hyperparameter(name='use_columns', argument_type=ArgumentType.VALUE, data=(2,)) # There is sth wrong with multi-dimensional +step_2.add_hyperparameter(name='return_result', argument_type=ArgumentType.VALUE, data='append') +step_2.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce') +step_2.add_output('produce') +pipeline_description.add_step(step_2) + +# # Final Output +pipeline_description.add_output(name='output predictions', data_reference='steps.2.produce') + +# # Output to YAML +yaml = pipeline_description.to_yaml() +with open('pipeline.yml', 'w') as f: + f.write(yaml) +print(yaml) + +# Or you can output json +#data = pipline_description.to_json() diff --git a/new_tests/build_DiscreteCosineTransform.py b/new_tests/build_DiscreteCosineTransform.py new file mode 100644 index 0000000..c052207 --- /dev/null +++ b/new_tests/build_DiscreteCosineTransform.py @@ -0,0 +1,50 @@ +from d3m import index +from d3m.metadata.base import ArgumentType +from d3m.metadata.pipeline import Pipeline, PrimitiveStep + +# -> dataset_to_dataframe -> column_parser -> extract_columns_by_semantic_types(attributes) -> imputer -> random_forest +# extract_columns_by_semantic_types(targets) -> ^ + +# Creating pipeline +pipeline_description = Pipeline() +pipeline_description.add_input(name='inputs') + + +# Step 0: dataset_to_dataframe +primitive_0 = index.get_primitive('d3m.primitives.tods.data_processing.dataset_to_dataframe') +step_0 = PrimitiveStep(primitive=primitive_0) +step_0.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='inputs.0') +step_0.add_output('produce') +pipeline_description.add_step(step_0) + +# Step 1: Column Parser +primitive_1 = index.get_primitive('d3m.primitives.tods.data_processing.column_parser') +step_1 = PrimitiveStep(primitive=primitive_1) +step_1.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce') +step_1.add_output('produce') +pipeline_description.add_step(step_1) + + +# Step 2: Discrete Cosine Transform +primitive_2 = index.get_primitive('d3m.primitives.tods.feature_analysis.discrete_cosine_transform') +step_2 = PrimitiveStep(primitive=primitive_2) +step_2.add_hyperparameter(name='use_semantic_types', argument_type=ArgumentType.VALUE, data=True) +step_2.add_hyperparameter(name='use_columns', argument_type=ArgumentType.VALUE, data=(2,3,4)) +step_2.add_hyperparameter(name='return_result', argument_type=ArgumentType.VALUE, data='append') +step_2.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce') +step_2.add_output('produce') +pipeline_description.add_step(step_2) + + +# Final Output +pipeline_description.add_output(name='output predictions', data_reference='steps.2.produce') + +# Output to YAML +yaml = pipeline_description.to_yaml() +with open('pipeline.yml', 'w') as f: + f.write(yaml) +print(yaml) + +# Or you can output json +#data = pipline_description.to_json() + diff --git a/new_tests/build_DuplicationValidation_pipline.py b/new_tests/build_DuplicationValidation_pipline.py new file mode 100644 index 0000000..57673d2 --- /dev/null +++ b/new_tests/build_DuplicationValidation_pipline.py @@ -0,0 +1,42 @@ +from d3m import index +from d3m.metadata.base import ArgumentType +from d3m.metadata.pipeline import Pipeline, PrimitiveStep + + +# Creating pipeline +pipeline_description = Pipeline() +pipeline_description.add_input(name='inputs') + +# Step 0: dataset_to_dataframe +step_0 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.data_processing.dataset_to_dataframe')) +step_0.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='inputs.0') +step_0.add_output('produce') +pipeline_description.add_step(step_0) + + +# Step 1: column_parser +step_1 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.data_processing.column_parser')) +step_1.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce') +step_1.add_output('produce') +pipeline_description.add_step(step_1) + + +# Step 2: DuplicationValidation +step_2 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.data_processing.duplication_validation')) +step_2.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce') +step_2.add_output('produce') +step_2.add_hyperparameter(name = 'keep_option', argument_type=ArgumentType.VALUE, data = 'average') # Or: 'first' +pipeline_description.add_step(step_2) + +# Final Output +pipeline_description.add_output(name='output predictions', data_reference='steps.2.produce') + +# Output to YAML +yaml = pipeline_description.to_yaml() +with open('pipeline.yml', 'w') as f: + f.write(yaml) +print(yaml) + +# Or you can output json +#data = pipline_description.to_json() + diff --git a/new_tests/build_FastFourierTransform.py b/new_tests/build_FastFourierTransform.py new file mode 100644 index 0000000..5c7f083 --- /dev/null +++ b/new_tests/build_FastFourierTransform.py @@ -0,0 +1,48 @@ +from d3m import index +from d3m.metadata.base import ArgumentType +from d3m.metadata.pipeline import Pipeline, PrimitiveStep + +# -> dataset_to_dataframe -> column_parser -> extract_columns_by_semantic_types(attributes) -> imputer -> random_forest +# extract_columns_by_semantic_types(targets) -> ^ + +# Creating pipeline +pipeline_description = Pipeline() +pipeline_description.add_input(name='inputs') + + +# Step 0: dataset_to_dataframe +primitive_0 = index.get_primitive('d3m.primitives.tods.data_processing.dataset_to_dataframe') +step_0 = PrimitiveStep(primitive=primitive_0) +step_0.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='inputs.0') +step_0.add_output('produce') +pipeline_description.add_step(step_0) + +# Step 1: Column Parser +primitive_1 = index.get_primitive('d3m.primitives.tods.data_processing.column_parser') +step_1 = PrimitiveStep(primitive=primitive_1) +step_1.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce') +step_1.add_output('produce') +pipeline_description.add_step(step_1) + +# Step 2: Fast Fourier Transform +primitive_2 = index.get_primitive('d3m.primitives.tods.feature_analysis.fast_fourier_transform') +step_2 = PrimitiveStep(primitive=primitive_2) +step_2.add_hyperparameter(name='use_semantic_types', argument_type=ArgumentType.VALUE, data=True) +step_2.add_hyperparameter(name='use_columns', argument_type=ArgumentType.VALUE, data=(2,3,4)) +step_2.add_hyperparameter(name='return_result', argument_type=ArgumentType.VALUE, data='append') +step_2.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce') +step_2.add_output('produce') +pipeline_description.add_step(step_2) + + +# Final Output +pipeline_description.add_output(name='output predictions', data_reference='steps.2.produce') + +# Output to YAML +yaml = pipeline_description.to_yaml() +with open('pipeline.yml', 'w') as f: + f.write(yaml) +print(yaml) + +# Or you can output json +#data = pipline_description.to_json() diff --git a/new_tests/build_HBOS_pipline.py b/new_tests/build_HBOS_pipline.py new file mode 100644 index 0000000..b281ba0 --- /dev/null +++ b/new_tests/build_HBOS_pipline.py @@ -0,0 +1,68 @@ +from d3m import index +from d3m.metadata.base import ArgumentType +from d3m.metadata.pipeline import Pipeline, PrimitiveStep + + +# Creating pipeline +pipeline_description = Pipeline() +pipeline_description.add_input(name='inputs') + +# Step 0: dataset_to_dataframe +step_0 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.data_processing.dataset_to_dataframe')) +step_0.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='inputs.0') +step_0.add_output('produce') +pipeline_description.add_step(step_0) + +# Step 1: column_parser +step_1 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.data_processing.column_parser')) +step_1.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce') +step_1.add_output('produce') +pipeline_description.add_step(step_1) + +# Step 2: extract_columns_by_semantic_types(attributes) +step_2 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.data_processing.extract_columns_by_semantic_types')) +step_2.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce') +step_2.add_output('produce') +step_2.add_hyperparameter(name='semantic_types', argument_type=ArgumentType.VALUE, + data=['https://metadata.datadrivendiscovery.org/types/Attribute']) +pipeline_description.add_step(step_2) + +# Step 3: extract_columns_by_semantic_types(targets) +step_3 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.data_processing.extract_columns_by_semantic_types')) +step_3.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce') +step_3.add_output('produce') +step_3.add_hyperparameter(name='semantic_types', argument_type=ArgumentType.VALUE, + data=['https://metadata.datadrivendiscovery.org/types/TrueTarget']) +pipeline_description.add_step(step_3) + +attributes = 'steps.2.produce' +targets = 'steps.3.produce' + +# Step 4: imputer +step_4 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.data_processing.impute_missing')) +step_4.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference=attributes) +step_4.add_output('produce') +pipeline_description.add_step(step_4) + +# Step 5: HBOS +step_5 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.detection_algorithm.pyod_hbos')) +step_5.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.4.produce') + +step_5.add_hyperparameter(name='contamination', argument_type=ArgumentType.VALUE, data=0.1) +# step_5.add_hyperparameter(name='return_result', argument_type=ArgumentType.VALUE, data='append') + +step_5.add_output('produce') +pipeline_description.add_step(step_5) + +# Final Output +pipeline_description.add_output(name='output predictions', data_reference='steps.5.produce') + +# Output to YAML +yaml = pipeline_description.to_yaml() +with open('pipeline.yml', 'w') as f: + f.write(yaml) +print(yaml) + +# Or you can output json +#data = pipline_description.to_json() + diff --git a/new_tests/build_HBOS_score_pipline.py b/new_tests/build_HBOS_score_pipline.py new file mode 100644 index 0000000..b389a1e --- /dev/null +++ b/new_tests/build_HBOS_score_pipline.py @@ -0,0 +1,71 @@ +from d3m import index +from d3m.metadata.base import ArgumentType +from d3m.metadata.pipeline import Pipeline, PrimitiveStep + + +# Creating pipeline +pipeline_description = Pipeline() +pipeline_description.add_input(name='inputs') + +# Step 0: dataset_to_dataframe +step_0 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.data_processing.dataset_to_dataframe')) +step_0.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='inputs.0') +step_0.add_output('produce') +pipeline_description.add_step(step_0) + +# Step 1: column_parser +step_1 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.data_processing.column_parser')) +step_1.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce') +step_1.add_output('produce') +pipeline_description.add_step(step_1) + +# Step 2: extract_columns_by_semantic_types(attributes) +step_2 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.data_processing.extract_columns_by_semantic_types')) +step_2.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce') +step_2.add_output('produce') +step_2.add_hyperparameter(name='semantic_types', argument_type=ArgumentType.VALUE, + data=['https://metadata.datadrivendiscovery.org/types/Attribute']) +pipeline_description.add_step(step_2) + +# Step 3: extract_columns_by_semantic_types(targets) +step_3 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.data_processing.extract_columns_by_semantic_types')) +step_3.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce') +step_3.add_output('produce') +step_3.add_hyperparameter(name='semantic_types', argument_type=ArgumentType.VALUE, + data=['https://metadata.datadrivendiscovery.org/types/TrueTarget']) +pipeline_description.add_step(step_3) + +attributes = 'steps.2.produce' +targets = 'steps.3.produce' + +# Step 4: imputer +step_4 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.data_processing.impute_missing')) +step_4.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference=attributes) +step_4.add_output('produce') +pipeline_description.add_step(step_4) + +# Step 5: HBOS +step_5 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.detection_algorithm.pyod_hbos')) +step_5.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.4.produce') + +step_5.add_hyperparameter(name='contamination', argument_type=ArgumentType.VALUE, data=0.1) +step_5.add_hyperparameter(name='return_subseq_inds', argument_type=ArgumentType.VALUE, data=True) +# step_5.add_hyperparameter(name='return_result', argument_type=ArgumentType.VALUE, data='append') + +step_5.add_output('produce_score') +step_5.add_output('produce') +pipeline_description.add_step(step_5) + +# Final Output +pipeline_description.add_output(name='output predictions', data_reference='steps.5.produce') +# pipeline_description.add_output(name='output score', data_reference='steps.5.produce_score') + +# Output to YAML +yaml = pipeline_description.to_yaml() +with open('pipeline.yml', 'w') as f: + f.write(yaml) +print(yaml) + +# Or you can output json +#data = pipline_description.to_json() + diff --git a/new_tests/build_HPFilter_pipline.py b/new_tests/build_HPFilter_pipline.py new file mode 100644 index 0000000..355c076 --- /dev/null +++ b/new_tests/build_HPFilter_pipline.py @@ -0,0 +1,46 @@ +from d3m import index +from d3m.metadata.base import ArgumentType +from d3m.metadata.pipeline import Pipeline, PrimitiveStep + + +# Creating pipeline +pipeline_description = Pipeline() +pipeline_description.add_input(name='inputs') + +# Step 0: dataset_to_dataframe +step_0 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.data_transformation.dataset_to_dataframe.Common')) +step_0.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='inputs.0') +step_0.add_output('produce') +pipeline_description.add_step(step_0) + + +# Step 1: column_parser +step_1 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.data_processing.column_parser')) +step_1.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce') +step_1.add_output('produce') +pipeline_description.add_step(step_1) + + +# Step 2: HPFilter +step_2 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.feature_analysis.hp_filter')) +step_2.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce') +step_2.add_output('produce') + +step_2.add_hyperparameter(name = 'use_columns', argument_type=ArgumentType.VALUE, data = [2,3,6]) + +step_2.add_hyperparameter(name = 'use_semantic_types', argument_type=ArgumentType.VALUE, data = True) +step_2.add_hyperparameter(name = 'return_result', argument_type=ArgumentType.VALUE, data = 'append') +pipeline_description.add_step(step_2) + +# Final Output +pipeline_description.add_output(name='output predictions', data_reference='steps.2.produce') + +# Output to YAML +yaml = pipeline_description.to_yaml() +with open('pipeline.yml', 'w') as f: + f.write(yaml) +print(yaml) + +# Or you can output json +#data = pipline_description.to_json() + diff --git a/new_tests/build_HoltSmoothing_pipline.py b/new_tests/build_HoltSmoothing_pipline.py new file mode 100644 index 0000000..8f8a31e --- /dev/null +++ b/new_tests/build_HoltSmoothing_pipline.py @@ -0,0 +1,76 @@ +from d3m import index +from d3m.metadata.base import ArgumentType +from d3m.metadata.pipeline import Pipeline, PrimitiveStep + +# -> dataset_to_dataframe -> column_parser -> extract_columns_by_semantic_types(attributes) -> imputer -> random_forest +# extract_columns_by_semantic_types(targets) -> ^ + +# Creating pipeline +pipeline_description = Pipeline() +pipeline_description.add_input(name='inputs') + +# Step 0: dataset_to_dataframe +primitive_0 = index.get_primitive('d3m.primitives.tods.data_processing.dataset_to_dataframe') +step_0 = PrimitiveStep(primitive=primitive_0) +step_0.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='inputs.0') +step_0.add_output('produce') +pipeline_description.add_step(step_0) + +# Step 1: column_parser +step_1 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.data_processing.column_parser')) +step_1.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce') +step_1.add_output('produce') +pipeline_description.add_step(step_1) + +# Step 2: extract_columns_by_semantic_types(attributes) +step_2 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.data_processing.extract_columns_by_semantic_types')) +step_2.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce') +step_2.add_output('produce') +step_2.add_hyperparameter(name='semantic_types', argument_type=ArgumentType.VALUE, + data=['https://metadata.datadrivendiscovery.org/types/Attribute']) +pipeline_description.add_step(step_2) + +# Step 3: extract_columns_by_semantic_types(targets) +step_3 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.data_processing.extract_columns_by_semantic_types')) +step_3.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce') +step_3.add_output('produce') +step_3.add_hyperparameter(name='semantic_types', argument_type=ArgumentType.VALUE, + data=['https://metadata.datadrivendiscovery.org/types/TrueTarget']) +pipeline_description.add_step(step_3) + +attributes = 'steps.2.produce' +targets = 'steps.3.produce' + +# Step 4: imputer +step_4 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.data_processing.impute_missing')) +step_4.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference=attributes) +step_4.add_output('produce') +pipeline_description.add_step(step_4) + +# Step 5: holt smoothing +step_5 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.timeseries_processing.transformation.holt_smoothing')) +step_5.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference=attributes) +step_5.add_hyperparameter(name="exclude_columns", argument_type=ArgumentType.VALUE, data = (2, 3)) +step_5.add_hyperparameter(name="use_semantic_types", argument_type=ArgumentType.VALUE, data = True) +step_5.add_output('produce') +pipeline_description.add_step(step_5) + +# Step 6: isolation forest +#step_6 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.anomaly_detection.isolation_forest.Algorithm')) +#step_6.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.5.produce') +#step_6.add_argument(name='outputs', argument_type=ArgumentType.CONTAINER, data_reference=targets) +#step_6.add_output('produce') +#pipeline_description.add_step(step_6) + +# Final Output +pipeline_description.add_output(name='output predictions', data_reference='steps.5.produce') + +# Output to YAML +yaml = pipeline_description.to_yaml() +with open('pipeline.yml', 'w') as f: + f.write(yaml) +print(yaml) + +# Or you can output json +#data = pipline_description.to_json() + diff --git a/new_tests/build_HoltWintersExponentialSmoothing_pipline.py b/new_tests/build_HoltWintersExponentialSmoothing_pipline.py new file mode 100644 index 0000000..6ede370 --- /dev/null +++ b/new_tests/build_HoltWintersExponentialSmoothing_pipline.py @@ -0,0 +1,76 @@ +from d3m import index +from d3m.metadata.base import ArgumentType +from d3m.metadata.pipeline import Pipeline, PrimitiveStep + +# -> dataset_to_dataframe -> column_parser -> extract_columns_by_semantic_types(attributes) -> imputer -> random_forest +# extract_columns_by_semantic_types(targets) -> ^ + +# Creating pipeline +pipeline_description = Pipeline() +pipeline_description.add_input(name='inputs') + +# Step 0: dataset_to_dataframe +primitive_0 = index.get_primitive('d3m.primitives.tods.data_processing.dataset_to_dataframe') +step_0 = PrimitiveStep(primitive=primitive_0) +step_0.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='inputs.0') +step_0.add_output('produce') +pipeline_description.add_step(step_0) + +# Step 1: column_parser +step_1 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.data_processing.column_parser')) +step_1.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce') +step_1.add_output('produce') +pipeline_description.add_step(step_1) + +# Step 2: extract_columns_by_semantic_types(attributes) +step_2 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.data_processing.extract_columns_by_semantic_types')) +step_2.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce') +step_2.add_output('produce') +step_2.add_hyperparameter(name='semantic_types', argument_type=ArgumentType.VALUE, + data=['https://metadata.datadrivendiscovery.org/types/Attribute']) +pipeline_description.add_step(step_2) + +# Step 3: extract_columns_by_semantic_types(targets) +step_3 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.data_processing.extract_columns_by_semantic_types')) +step_3.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce') +step_3.add_output('produce') +step_3.add_hyperparameter(name='semantic_types', argument_type=ArgumentType.VALUE, + data=['https://metadata.datadrivendiscovery.org/types/TrueTarget']) +pipeline_description.add_step(step_3) + +attributes = 'steps.2.produce' +targets = 'steps.3.produce' + +# Step 4: imputer +step_4 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.data_processing.impute_missing')) +step_4.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference=attributes) +step_4.add_output('produce') +pipeline_description.add_step(step_4) + +# Step 5: holt winters exponential smoothing +step_5 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.timeseries_processing.transformation.holt_winters_exponential_smoothing')) +step_5.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference=attributes) +step_5.add_hyperparameter(name="use_columns", argument_type=ArgumentType.VALUE, data = (2, 3)) +step_5.add_hyperparameter(name="use_semantic_types", argument_type=ArgumentType.VALUE, data = True) +step_5.add_output('produce') +pipeline_description.add_step(step_5) + +# Step 6: isolation forest +#step_6 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.anomaly_detection.isolation_forest.Algorithm')) +#step_6.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.5.produce') +#step_6.add_argument(name='outputs', argument_type=ArgumentType.CONTAINER, data_reference=targets) +#step_6.add_output('produce') +#pipeline_description.add_step(step_6) + +# Final Output +pipeline_description.add_output(name='output predictions', data_reference='steps.5.produce') + +# Output to YAML +yaml = pipeline_description.to_yaml() +with open('pipeline.yml', 'w') as f: + f.write(yaml) +print(yaml) + +# Or you can output json +#data = pipline_description.to_json() + diff --git a/new_tests/build_IsolationForest_pipline.py b/new_tests/build_IsolationForest_pipline.py new file mode 100644 index 0000000..80923c9 --- /dev/null +++ b/new_tests/build_IsolationForest_pipline.py @@ -0,0 +1,59 @@ +from d3m import index +from d3m.metadata.base import ArgumentType +from d3m.metadata.pipeline import Pipeline, PrimitiveStep +from d3m.metadata import hyperparams +import copy + +# -> dataset_to_dataframe -> column_parser -> extract_columns_by_semantic_types(attributes) -> imputer -> random_forest +# extract_columns_by_semantic_types(targets) -> ^ + +# Creating pipeline +pipeline_description = Pipeline() +pipeline_description.add_input(name='inputs') + +# Step 0: dataset_to_dataframe +primitive_0 = index.get_primitive('d3m.primitives.tods.data_processing.dataset_to_dataframe') +step_0 = PrimitiveStep(primitive=primitive_0) +step_0.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='inputs.0') +step_0.add_output('produce') +pipeline_description.add_step(step_0) + +# # Step 1: column_parser +primitive_1 = index.get_primitive('d3m.primitives.tods.data_processing.column_parser') +step_1 = PrimitiveStep(primitive=primitive_1) +step_1.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce') +step_1.add_output('produce') +pipeline_description.add_step(step_1) + +# Step 2: extract_columns_by_semantic_types(attributes) +step_2 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.data_processing.extract_columns_by_semantic_types')) +step_2.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce') +step_2.add_output('produce') +step_2.add_hyperparameter(name='semantic_types', argument_type=ArgumentType.VALUE, data=['https://metadata.datadrivendiscovery.org/types/Attribute']) +pipeline_description.add_step(step_2) + +# # Step 3: test primitive +primitive_3 = index.get_primitive('d3m.primitives.tods.detection_algorithm.pyod_iforest') +step_3 = PrimitiveStep(primitive=primitive_3) +step_3.add_hyperparameter(name='contamination', argument_type=ArgumentType.VALUE, data=0.1) +# step_3.add_hyperparameter(name='use_semantic_types', argument_type=ArgumentType.VALUE, data=True) +# step_3.add_hyperparameter(name='use_columns', argument_type=ArgumentType.VALUE, data=(2,)) # There is sth wrong with multi-dimensional +step_3.add_hyperparameter(name='return_result', argument_type=ArgumentType.VALUE, data='append') +step_3.add_hyperparameter(name='return_subseq_inds', argument_type=ArgumentType.VALUE, data=True) +step_3.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.2.produce') +step_3.add_output('produce_score') +step_3.add_output('produce') +pipeline_description.add_step(step_3) + +# Final Output +pipeline_description.add_output(name='output predictions', data_reference='steps.3.produce_score') + +# Output to YAML +yaml = pipeline_description.to_yaml() +with open('pipeline.yml', 'w') as f: + f.write(yaml) +print(yaml) + +# Or you can output json +#data = pipline_description.to_json() + diff --git a/new_tests/build_KDiscord_pipeline.py b/new_tests/build_KDiscord_pipeline.py new file mode 100644 index 0000000..09d6a7c --- /dev/null +++ b/new_tests/build_KDiscord_pipeline.py @@ -0,0 +1,71 @@ +from d3m import index +from d3m.metadata.base import ArgumentType +from d3m.metadata.pipeline import Pipeline, PrimitiveStep +from d3m.metadata import hyperparams +import numpy as np + +# -> dataset_to_dataframe -> column_parser -> extract_columns_by_semantic_types(attributes) -> imputer -> random_forest +# extract_columns_by_semantic_types(targets) -> ^ + +# Creating pipeline +pipeline_description = Pipeline() +pipeline_description.add_input(name='inputs') + +# Step 0: dataset_to_dataframe +primitive_0 = index.get_primitive('d3m.primitives.data_transformation.dataset_to_dataframe.Common') +step_0 = PrimitiveStep(primitive=primitive_0) +step_0.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='inputs.0') +step_0.add_output('produce') +pipeline_description.add_step(step_0) + +# # Step 1: column_parser +primitive_1 = index.get_primitive('d3m.primitives.tods.data_processing.column_parser') +step_1 = PrimitiveStep(primitive=primitive_1) +step_1.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce') +step_1.add_output('produce') +pipeline_description.add_step(step_1) + +# Step 2: extract_columns_by_semantic_types(attributes) +step_2 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.data_processing.extract_columns_by_semantic_types')) +step_2.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce') +step_2.add_output('produce') +step_2.add_hyperparameter(name='semantic_types', argument_type=ArgumentType.VALUE, data=['https://metadata.datadrivendiscovery.org/types/Attribute']) +pipeline_description.add_step(step_2) + +# # Step 3: Standardization +primitive_3 = index.get_primitive('d3m.primitives.tods.timeseries_processing.transformation.standard_scaler') +step_3 = PrimitiveStep(primitive=primitive_3) +step_3.add_hyperparameter(name='use_semantic_types', argument_type=ArgumentType.VALUE, data=True) +step_3.add_hyperparameter(name='use_columns', argument_type=ArgumentType.VALUE, data=(1,2,3,4,5,)) +step_3.add_hyperparameter(name='return_result', argument_type=ArgumentType.VALUE, data='new') +step_3.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.2.produce') +step_3.add_output('produce') +pipeline_description.add_step(step_3) + +# # Step 4: test primitive +primitive_4 = index.get_primitive('d3m.primitives.tods.detection_algorithm.KDiscordODetector') +step_4 = PrimitiveStep(primitive=primitive_4) +step_4.add_hyperparameter(name='contamination', argument_type=ArgumentType.VALUE, data=0.1) +step_4.add_hyperparameter(name='window_size', argument_type=ArgumentType.VALUE, data=10) +# step_4.add_hyperparameter(name='weights', argument_type=ArgumentType.VALUE, data=weights_ndarray) +step_4.add_hyperparameter(name='use_semantic_types', argument_type=ArgumentType.VALUE, data=False) +# step_4.add_hyperparameter(name='use_columns', argument_type=ArgumentType.VALUE, data=(2,3,4,5,6)) # There is sth wrong with multi-dimensional +step_4.add_hyperparameter(name='return_result', argument_type=ArgumentType.VALUE, data='append') +step_4.add_hyperparameter(name='return_subseq_inds', argument_type=ArgumentType.VALUE, data=True) +step_4.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.3.produce') +step_4.add_output('produce') +step_4.add_output('produce_score') +pipeline_description.add_step(step_4) + +# Final Output +pipeline_description.add_output(name='output predictions', data_reference='steps.4.produce') + +# Output to YAML +yaml = pipeline_description.to_yaml() +with open('pipeline.yml', 'w') as f: + f.write(yaml) +print(yaml) + +# Or you can output json +#data = pipline_description.to_json() + diff --git a/new_tests/build_KNN_pipline.py b/new_tests/build_KNN_pipline.py new file mode 100644 index 0000000..8b31557 --- /dev/null +++ b/new_tests/build_KNN_pipline.py @@ -0,0 +1,51 @@ +from d3m import index +from d3m.metadata.base import ArgumentType +from d3m.metadata.pipeline import Pipeline, PrimitiveStep +from d3m.metadata import hyperparams +import copy + +# -> dataset_to_dataframe -> column_parser -> extract_columns_by_semantic_types(attributes) -> imputer -> random_forest +# extract_columns_by_semantic_types(targets) -> ^ + +# Creating pipeline +pipeline_description = Pipeline() +pipeline_description.add_input(name='inputs') + +# Step 0: dataset_to_dataframe +primitive_0 = index.get_primitive('d3m.primitives.tods.data_processing.dataset_to_dataframe') +step_0 = PrimitiveStep(primitive=primitive_0) +step_0.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='inputs.0') +step_0.add_output('produce') +pipeline_description.add_step(step_0) + +# # Step 1: column_parser +primitive_1 = index.get_primitive('d3m.primitives.tods.data_processing.column_parser') +step_1 = PrimitiveStep(primitive=primitive_1) +step_1.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce') +step_1.add_output('produce') +pipeline_description.add_step(step_1) + +# # Step 2: test primitive +primitive_2 = index.get_primitive('d3m.primitives.tods.detection_algorithm.pyod_knn') + +step_2 = PrimitiveStep(primitive=primitive_2) +step_2.add_hyperparameter(name='contamination', argument_type=ArgumentType.VALUE, data=0.1) +step_2.add_hyperparameter(name='use_semantic_types', argument_type=ArgumentType.VALUE, data=True) +step_2.add_hyperparameter(name='use_columns', argument_type=ArgumentType.VALUE, data=(2,)) # There is sth wrong with multi-dimensional +step_2.add_hyperparameter(name='return_result', argument_type=ArgumentType.VALUE, data='append') +step_2.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce') +step_2.add_output('produce') +pipeline_description.add_step(step_2) + +# Final Output +pipeline_description.add_output(name='output predictions', data_reference='steps.2.produce') + +# Output to YAML +yaml = pipeline_description.to_yaml() +with open('pipeline.yml', 'w') as f: + f.write(yaml) +print(yaml) + +# Or you can output json +#data = pipline_description.to_json() + diff --git a/new_tests/build_LODA_pipline.py b/new_tests/build_LODA_pipline.py new file mode 100644 index 0000000..05b022d --- /dev/null +++ b/new_tests/build_LODA_pipline.py @@ -0,0 +1,51 @@ +from d3m import index +from d3m.metadata.base import ArgumentType +from d3m.metadata.pipeline import Pipeline, PrimitiveStep +from d3m.metadata import hyperparams +import copy + +# -> dataset_to_dataframe -> column_parser -> extract_columns_by_semantic_types(attributes) -> imputer -> random_forest +# extract_columns_by_semantic_types(targets) -> ^ + +# Creating pipeline +pipeline_description = Pipeline() +pipeline_description.add_input(name='inputs') + +# Step 0: dataset_to_dataframe +primitive_0 = index.get_primitive('d3m.primitives.tods.data_processing.dataset_to_dataframe') +step_0 = PrimitiveStep(primitive=primitive_0) +step_0.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='inputs.0') +step_0.add_output('produce') +pipeline_description.add_step(step_0) + +# # Step 1: column_parser +primitive_1 = index.get_primitive('d3m.primitives.tods.data_processing.column_parser') +step_1 = PrimitiveStep(primitive=primitive_1) +step_1.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce') +step_1.add_output('produce') +pipeline_description.add_step(step_1) + +# # Step 2: test primitive +primitive_2 = index.get_primitive('d3m.primitives.tods.detection_algorithm.pyod_loda') + +step_2 = PrimitiveStep(primitive=primitive_2) +step_2.add_hyperparameter(name='contamination', argument_type=ArgumentType.VALUE, data=0.1) +step_2.add_hyperparameter(name='use_semantic_types', argument_type=ArgumentType.VALUE, data=True) +step_2.add_hyperparameter(name='use_columns', argument_type=ArgumentType.VALUE, data=(2,)) # There is sth wrong with multi-dimensional +step_2.add_hyperparameter(name='return_result', argument_type=ArgumentType.VALUE, data='append') +step_2.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce') +step_2.add_output('produce') +pipeline_description.add_step(step_2) + +# Final Output +pipeline_description.add_output(name='output predictions', data_reference='steps.2.produce') + +# Output to YAML +yaml = pipeline_description.to_yaml() +with open('pipeline.yml', 'w') as f: + f.write(yaml) +print(yaml) + +# Or you can output json +#data = pipline_description.to_json() + diff --git a/new_tests/build_LOF_pipline.py b/new_tests/build_LOF_pipline.py new file mode 100644 index 0000000..ec444cf --- /dev/null +++ b/new_tests/build_LOF_pipline.py @@ -0,0 +1,51 @@ +from d3m import index +from d3m.metadata.base import ArgumentType +from d3m.metadata.pipeline import Pipeline, PrimitiveStep +from d3m.metadata import hyperparams +import copy + +# -> dataset_to_dataframe -> column_parser -> extract_columns_by_semantic_types(attributes) -> imputer -> random_forest +# extract_columns_by_semantic_types(targets) -> ^ + +# Creating pipeline +pipeline_description = Pipeline() +pipeline_description.add_input(name='inputs') + +# Step 0: dataset_to_dataframe +primitive_0 = index.get_primitive('d3m.primitives.tods.data_processing.dataset_to_dataframe') +step_0 = PrimitiveStep(primitive=primitive_0) +step_0.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='inputs.0') +step_0.add_output('produce') +pipeline_description.add_step(step_0) + +# # Step 1: column_parser +primitive_1 = index.get_primitive('d3m.primitives.tods.data_processing.column_parser') +step_1 = PrimitiveStep(primitive=primitive_1) +step_1.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce') +step_1.add_output('produce') +pipeline_description.add_step(step_1) + +# # Step 2: test primitive +primitive_2 = index.get_primitive('d3m.primitives.tods.detection_algorithm.pyod_lof') + +step_2 = PrimitiveStep(primitive=primitive_2) +step_2.add_hyperparameter(name='contamination', argument_type=ArgumentType.VALUE, data=0.1) +step_2.add_hyperparameter(name='use_semantic_types', argument_type=ArgumentType.VALUE, data=True) +step_2.add_hyperparameter(name='use_columns', argument_type=ArgumentType.VALUE, data=(2,)) # There is sth wrong with multi-dimensional +step_2.add_hyperparameter(name='return_result', argument_type=ArgumentType.VALUE, data='append') +step_2.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce') +step_2.add_output('produce') +pipeline_description.add_step(step_2) + +# Final Output +pipeline_description.add_output(name='output predictions', data_reference='steps.2.produce') + +# Output to YAML +yaml = pipeline_description.to_yaml() +with open('pipeline.yml', 'w') as f: + f.write(yaml) +print(yaml) + +# Or you can output json +#data = pipline_description.to_json() + diff --git a/new_tests/build_LSTMOD_pipline.py b/new_tests/build_LSTMOD_pipline.py new file mode 100644 index 0000000..3575904 --- /dev/null +++ b/new_tests/build_LSTMOD_pipline.py @@ -0,0 +1,70 @@ +from d3m import index +from d3m.metadata.base import ArgumentType +from d3m.metadata.pipeline import Pipeline, PrimitiveStep +from d3m.metadata import hyperparams +import numpy as np + +# -> dataset_to_dataframe -> column_parser -> extract_columns_by_semantic_types(attributes) -> imputer -> random_forest +# extract_columns_by_semantic_types(targets) -> ^ + +# Creating pipeline +pipeline_description = Pipeline() +pipeline_description.add_input(name='inputs') + +# Step 0: dataset_to_dataframe +primitive_0 = index.get_primitive('d3m.primitives.tods.data_processing.dataset_to_dataframe') +step_0 = PrimitiveStep(primitive=primitive_0) +step_0.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='inputs.0') +step_0.add_output('produce') +pipeline_description.add_step(step_0) + +# # Step 1: column_parser +primitive_1 = index.get_primitive('d3m.primitives.tods.data_processing.column_parser') +step_1 = PrimitiveStep(primitive=primitive_1) +step_1.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce') +step_1.add_output('produce') +pipeline_description.add_step(step_1) + +# Step 2: extract_columns_by_semantic_types(attributes) +step_2 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.data_processing.extract_columns_by_semantic_types')) +step_2.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce') +step_2.add_output('produce') +step_2.add_hyperparameter(name='semantic_types', argument_type=ArgumentType.VALUE, data=['https://metadata.datadrivendiscovery.org/types/Attribute']) +pipeline_description.add_step(step_2) + +# # Step 2: Standardization +primitive_3 = index.get_primitive('d3m.primitives.tods.timeseries_processing.transformation.standard_scaler') +step_3 = PrimitiveStep(primitive=primitive_3) +step_3.add_hyperparameter(name='use_semantic_types', argument_type=ArgumentType.VALUE, data=True) +step_3.add_hyperparameter(name='use_columns', argument_type=ArgumentType.VALUE, data=(1,2,3,4,5,)) +step_3.add_hyperparameter(name='return_result', argument_type=ArgumentType.VALUE, data='new') +step_3.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.2.produce') +step_3.add_output('produce') +pipeline_description.add_step(step_3) + +# # Step 3: test primitive +primitive_4 = index.get_primitive('d3m.primitives.tods.detection_algorithm.LSTMODetector') +step_4 = PrimitiveStep(primitive=primitive_4) +step_4.add_hyperparameter(name='contamination', argument_type=ArgumentType.VALUE, data=0.1) +step_4.add_hyperparameter(name='diff_group_method', argument_type=ArgumentType.VALUE, data='average') +step_4.add_hyperparameter(name='feature_dim', argument_type=ArgumentType.VALUE, data=5) +step_4.add_hyperparameter(name='use_semantic_types', argument_type=ArgumentType.VALUE, data=False) +# step_4.add_hyperparameter(name='use_columns', argument_type=ArgumentType.VALUE, data=(2,3,4,5,6)) # There is sth wrong with multi-dimensional +step_4.add_hyperparameter(name='return_result', argument_type=ArgumentType.VALUE, data='append') +step_4.add_hyperparameter(name='return_subseq_inds', argument_type=ArgumentType.VALUE, data=True) +step_4.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.3.produce') +step_4.add_output('produce') +pipeline_description.add_step(step_4) + +# Final Output +pipeline_description.add_output(name='output predictions', data_reference='steps.4.produce') + +# Output to YAML +yaml = pipeline_description.to_yaml() +with open('pipeline.yml', 'w') as f: + f.write(yaml) +print(yaml) + +# Or you can output json +#data = pipline_description.to_json() + diff --git a/new_tests/build_MatrixProfile_pipeline.py b/new_tests/build_MatrixProfile_pipeline.py new file mode 100644 index 0000000..458823e --- /dev/null +++ b/new_tests/build_MatrixProfile_pipeline.py @@ -0,0 +1,49 @@ +from d3m import index +from d3m.metadata.base import ArgumentType +from d3m.metadata.pipeline import Pipeline, PrimitiveStep +from d3m.metadata import hyperparams + +# -> dataset_to_dataframe -> column_parser -> extract_columns_by_semantic_types(attributes) -> imputer -> random_forest +# extract_columns_by_semantic_types(targets) -> ^ + +# Creating pipeline +pipeline_description = Pipeline() +pipeline_description.add_input(name='inputs') + +# Step 0: dataset_to_dataframe +primitive_0 = index.get_primitive('d3m.primitives.tods.data_processing.dataset_to_dataframe') +step_0 = PrimitiveStep(primitive=primitive_0) +step_0.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='inputs.0') +step_0.add_output('produce') +pipeline_description.add_step(step_0) + +# # Step 1: column_parser +primitive_1 = index.get_primitive('d3m.primitives.tods.data_processing.column_parser') +step_1 = PrimitiveStep(primitive=primitive_1) +step_1.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce') +step_1.add_output('produce') +pipeline_description.add_step(step_1) + +# # Step 2: test primitive +primitive_2 = index.get_primitive('d3m.primitives.tods.detection_algorithm.matrix_profile') + +step_2 = PrimitiveStep(primitive=primitive_2) +step_2.add_hyperparameter(name='use_semantic_types', argument_type=ArgumentType.VALUE, data=True) +step_2.add_hyperparameter(name='use_columns', argument_type=ArgumentType.VALUE, data=(2,3,4)) # There is sth wrong with multi-dimensional +step_2.add_hyperparameter(name='window_size', argument_type=ArgumentType.VALUE, data=3) # There is sth wrong with multi-dimensional +step_2.add_hyperparameter(name='return_result', argument_type=ArgumentType.VALUE, data='append') +step_2.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce') +step_2.add_output('produce') +pipeline_description.add_step(step_2) + +# # Final Output +pipeline_description.add_output(name='output predictions', data_reference='steps.2.produce') + +# # Output to YAML +yaml = pipeline_description.to_yaml() +with open('pipeline.yml', 'w') as f: + f.write(yaml) +print(yaml) + +# Or you can output json +#data = pipline_description.to_json() diff --git a/new_tests/build_MeanAverageTransform_pipline.py b/new_tests/build_MeanAverageTransform_pipline.py new file mode 100644 index 0000000..43bf392 --- /dev/null +++ b/new_tests/build_MeanAverageTransform_pipline.py @@ -0,0 +1,77 @@ +from d3m import index +from d3m.metadata.base import ArgumentType +from d3m.metadata.pipeline import Pipeline, PrimitiveStep + +# -> dataset_to_dataframe -> column_parser -> extract_columns_by_semantic_types(attributes) -> imputer -> random_forest +# extract_columns_by_semantic_types(targets) -> ^ + +# Creating pipeline +pipeline_description = Pipeline() +pipeline_description.add_input(name='inputs') + +# Step 0: dataset_to_dataframe +primitive_0 = index.get_primitive('d3m.primitives.tods.data_processing.dataset_to_dataframe') +step_0 = PrimitiveStep(primitive=primitive_0) +step_0.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='inputs.0') +step_0.add_output('produce') +pipeline_description.add_step(step_0) + +# Step 1: column_parser +step_1 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.data_processing.column_parser')) +step_1.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce') +step_1.add_output('produce') +pipeline_description.add_step(step_1) + + +# Step 2: extract_columns_by_semantic_types(attributes) +step_2 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.data_processing.extract_columns_by_semantic_types')) +step_2.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce') +step_2.add_output('produce') +step_2.add_hyperparameter(name='semantic_types', argument_type=ArgumentType.VALUE, + data=['https://metadata.datadrivendiscovery.org/types/Attribute']) +pipeline_description.add_step(step_2) + +# Step 3: extract_columns_by_semantic_types(targets) +step_3 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.data_processing.extract_columns_by_semantic_types')) +step_3.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce') +step_3.add_output('produce') +step_3.add_hyperparameter(name='semantic_types', argument_type=ArgumentType.VALUE, + data=['https://metadata.datadrivendiscovery.org/types/TrueTarget']) +pipeline_description.add_step(step_3) + +attributes = 'steps.2.produce' +targets = 'steps.3.produce' + +# Step 4: imputer +step_4 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.data_processing.impute_missing')) +step_4.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference=attributes) +step_4.add_output('produce') +pipeline_description.add_step(step_4) + +# Step 5: mean average transform +step_5 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.timeseries_processing.transformation.moving_average_transform')) +step_5.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference=attributes) +step_5.add_hyperparameter(name="use_columns", argument_type=ArgumentType.VALUE, data = (2, 3)) +step_5.add_hyperparameter(name="use_semantic_types", argument_type=ArgumentType.VALUE, data = True) +step_5.add_output('produce') +pipeline_description.add_step(step_5) + +# Step 6: isolation forest +#step_6 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.anomaly_detection.isolation_forest.Algorithm')) +#step_6.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.5.produce') +#step_6.add_argument(name='outputs', argument_type=ArgumentType.CONTAINER, data_reference=targets) +#step_6.add_output('produce') +#pipeline_description.add_step(step_6) + +# Final Output +pipeline_description.add_output(name='output predictions', data_reference='steps.5.produce') + +# Output to YAML +yaml = pipeline_description.to_yaml() +with open('pipeline.yml', 'w') as f: + f.write(yaml) +print(yaml) + +# Or you can output json +#data = pipline_description.to_json() + diff --git a/new_tests/build_NonNegativeMatrixFactorization.py b/new_tests/build_NonNegativeMatrixFactorization.py new file mode 100644 index 0000000..787013c --- /dev/null +++ b/new_tests/build_NonNegativeMatrixFactorization.py @@ -0,0 +1,50 @@ +from d3m import index +from d3m.metadata.base import ArgumentType +from d3m.metadata.pipeline import Pipeline, PrimitiveStep + +# -> dataset_to_dataframe -> column_parser -> extract_columns_by_semantic_types(attributes) -> imputer -> random_forest +# extract_columns_by_semantic_types(targets) -> ^ + +# Creating pipeline +pipeline_description = Pipeline() +pipeline_description.add_input(name='inputs') + + +# Step 0: dataset_to_dataframe +primitive_0 = index.get_primitive('d3m.primitives.tods.data_processing.dataset_to_dataframe') +step_0 = PrimitiveStep(primitive=primitive_0) +step_0.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='inputs.0') +step_0.add_output('produce') +pipeline_description.add_step(step_0) + +# Step 1: Column Parser +primitive_1 = index.get_primitive('d3m.primitives.tods.data_processing.column_parser') +step_1 = PrimitiveStep(primitive=primitive_1) +step_1.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce') +step_1.add_output('produce') +pipeline_description.add_step(step_1) + +# Step 2: Non Negative Matrix Factorization +primitive_2 = index.get_primitive('d3m.primitives.tods.feature_analysis.non_negative_matrix_factorization') +step_2 = PrimitiveStep(primitive=primitive_2) +step_2.add_hyperparameter(name='use_semantic_types', argument_type=ArgumentType.VALUE, data=True) +step_2.add_hyperparameter(name='use_columns', argument_type=ArgumentType.VALUE, data=(2,)) +step_2.add_hyperparameter(name='return_result', argument_type=ArgumentType.VALUE, data='append') +step_2.add_hyperparameter(name='rank', argument_type=ArgumentType.VALUE, data=5) +step_2.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce') +step_2.add_output('produce') +pipeline_description.add_step(step_2) + + +# Final Output +pipeline_description.add_output(name='output predictions', data_reference='steps.2.produce') + +# Output to YAML +yaml = pipeline_description.to_yaml() +with open('pipeline.yml', 'w') as f: + f.write(yaml) +print(yaml) + +# Or you can output json +#data = pipline_description.to_json() + diff --git a/new_tests/build_OCSVM_pipline.py b/new_tests/build_OCSVM_pipline.py new file mode 100644 index 0000000..d8cd8c9 --- /dev/null +++ b/new_tests/build_OCSVM_pipline.py @@ -0,0 +1,51 @@ +from d3m import index +from d3m.metadata.base import ArgumentType +from d3m.metadata.pipeline import Pipeline, PrimitiveStep +from d3m.metadata import hyperparams +import copy + +# -> dataset_to_dataframe -> column_parser -> extract_columns_by_semantic_types(attributes) -> imputer -> random_forest +# extract_columns_by_semantic_types(targets) -> ^ + +# Creating pipeline +pipeline_description = Pipeline() +pipeline_description.add_input(name='inputs') + +# Step 0: dataset_to_dataframe +primitive_0 = index.get_primitive('d3m.primitives.tods.data_processing.dataset_to_dataframe') +step_0 = PrimitiveStep(primitive=primitive_0) +step_0.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='inputs.0') +step_0.add_output('produce') +pipeline_description.add_step(step_0) + +# # Step 1: column_parser +primitive_1 = index.get_primitive('d3m.primitives.tods.data_processing.column_parser') +step_1 = PrimitiveStep(primitive=primitive_1) +step_1.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce') +step_1.add_output('produce') +pipeline_description.add_step(step_1) + +# # Step 2: test primitive +primitive_2 = index.get_primitive('d3m.primitives.tods.detection_algorithm.pyod_ocsvm') + +step_2 = PrimitiveStep(primitive=primitive_2) +step_2.add_hyperparameter(name='contamination', argument_type=ArgumentType.VALUE, data=0.1) +step_2.add_hyperparameter(name='use_semantic_types', argument_type=ArgumentType.VALUE, data=True) +step_2.add_hyperparameter(name='use_columns', argument_type=ArgumentType.VALUE, data=(2,)) # There is sth wrong with multi-dimensional +step_2.add_hyperparameter(name='return_result', argument_type=ArgumentType.VALUE, data='append') +step_2.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce') +step_2.add_output('produce') +pipeline_description.add_step(step_2) + +# Final Output +pipeline_description.add_output(name='output predictions', data_reference='steps.2.produce') + +# Output to YAML +yaml = pipeline_description.to_yaml() +with open('pipeline.yml', 'w') as f: + f.write(yaml) +print(yaml) + +# Or you can output json +#data = pipline_description.to_json() + diff --git a/new_tests/build_PCAODetect_pipeline.py b/new_tests/build_PCAODetect_pipeline.py new file mode 100644 index 0000000..327cacd --- /dev/null +++ b/new_tests/build_PCAODetect_pipeline.py @@ -0,0 +1,71 @@ +from d3m import index +from d3m.metadata.base import ArgumentType +from d3m.metadata.pipeline import Pipeline, PrimitiveStep +from d3m.metadata import hyperparams +import numpy as np + +# -> dataset_to_dataframe -> column_parser -> extract_columns_by_semantic_types(attributes) -> imputer -> random_forest +# extract_columns_by_semantic_types(targets) -> ^ + +# Creating pipeline +pipeline_description = Pipeline() +pipeline_description.add_input(name='inputs') + +# Step 0: dataset_to_dataframe +primitive_0 = index.get_primitive('d3m.primitives.data_transformation.dataset_to_dataframe.Common') +step_0 = PrimitiveStep(primitive=primitive_0) +step_0.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='inputs.0') +step_0.add_output('produce') +pipeline_description.add_step(step_0) + +# # Step 1: column_parser +primitive_1 = index.get_primitive('d3m.primitives.tods.data_processing.column_parser') +step_1 = PrimitiveStep(primitive=primitive_1) +step_1.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce') +step_1.add_output('produce') +pipeline_description.add_step(step_1) + +# Step 2: extract_columns_by_semantic_types(attributes) +step_2 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.data_processing.extract_columns_by_semantic_types')) +step_2.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce') +step_2.add_output('produce') +step_2.add_hyperparameter(name='semantic_types', argument_type=ArgumentType.VALUE, data=['https://metadata.datadrivendiscovery.org/types/Attribute']) +pipeline_description.add_step(step_2) + +# # Step 3: Standardization +primitive_3 = index.get_primitive('d3m.primitives.tods.timeseries_processing.transformation.standard_scaler') +step_3 = PrimitiveStep(primitive=primitive_3) +step_3.add_hyperparameter(name='use_semantic_types', argument_type=ArgumentType.VALUE, data=True) +step_3.add_hyperparameter(name='use_columns', argument_type=ArgumentType.VALUE, data=(1,2,3,4,5,)) +step_3.add_hyperparameter(name='return_result', argument_type=ArgumentType.VALUE, data='new') +step_3.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.2.produce') +step_3.add_output('produce') +pipeline_description.add_step(step_3) + +# # Step 4: test primitive +primitive_4 = index.get_primitive('d3m.primitives.tods.detection_algorithm.PCAODetector') +step_4 = PrimitiveStep(primitive=primitive_4) +step_4.add_hyperparameter(name='contamination', argument_type=ArgumentType.VALUE, data=0.1) +step_4.add_hyperparameter(name='window_size', argument_type=ArgumentType.VALUE, data=10) +# step_4.add_hyperparameter(name='weights', argument_type=ArgumentType.VALUE, data=weights_ndarray) +step_4.add_hyperparameter(name='use_semantic_types', argument_type=ArgumentType.VALUE, data=False) +# step_4.add_hyperparameter(name='use_columns', argument_type=ArgumentType.VALUE, data=(2,3,4,5,6)) # There is sth wrong with multi-dimensional +step_4.add_hyperparameter(name='return_result', argument_type=ArgumentType.VALUE, data='append') +step_4.add_hyperparameter(name='return_subseq_inds', argument_type=ArgumentType.VALUE, data=True) +step_4.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.3.produce') +step_4.add_output('produce') +step_4.add_output('produce_score') +pipeline_description.add_step(step_4) + +# Final Output +pipeline_description.add_output(name='output predictions', data_reference='steps.4.produce') + +# Output to YAML +yaml = pipeline_description.to_yaml() +with open('pipeline.yml', 'w') as f: + f.write(yaml) +print(yaml) + +# Or you can output json +#data = pipline_description.to_json() + diff --git a/new_tests/build_PowerTransform_pipline.py b/new_tests/build_PowerTransform_pipline.py new file mode 100644 index 0000000..b855dc7 --- /dev/null +++ b/new_tests/build_PowerTransform_pipline.py @@ -0,0 +1,49 @@ +from d3m import index +from d3m.metadata.base import ArgumentType +from d3m.metadata.pipeline import Pipeline, PrimitiveStep +from d3m.metadata import hyperparams +import copy + +# -> dataset_to_dataframe -> column_parser -> extract_columns_by_semantic_types(attributes) -> imputer -> random_forest +# extract_columns_by_semantic_types(targets) -> ^ + +# Creating pipeline +pipeline_description = Pipeline() +pipeline_description.add_input(name='inputs') + +# Step 0: dataset_to_dataframe +primitive_0 = index.get_primitive('d3m.primitives.tods.data_processing.dataset_to_dataframe') +step_0 = PrimitiveStep(primitive=primitive_0) +step_0.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='inputs.0') +step_0.add_output('produce') +pipeline_description.add_step(step_0) + +# # Step 1: column_parser +primitive_1 = index.get_primitive('d3m.primitives.tods.data_processing.column_parser') +step_1 = PrimitiveStep(primitive=primitive_1) +step_1.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce') +step_1.add_output('produce') +pipeline_description.add_step(step_1) + +# # Step 2: test primitive +primitive_2 = index.get_primitive('d3m.primitives.tods.timeseries_processing.transformation.power_transformer') +step_2 = PrimitiveStep(primitive=primitive_2) +step_2.add_hyperparameter(name='use_semantic_types', argument_type=ArgumentType.VALUE, data=True) +step_2.add_hyperparameter(name='use_columns', argument_type=ArgumentType.VALUE, data=(2,)) +step_2.add_hyperparameter(name='return_result', argument_type=ArgumentType.VALUE, data='append') +step_2.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce') +step_2.add_output('produce') +pipeline_description.add_step(step_2) + +# Final Output +pipeline_description.add_output(name='output predictions', data_reference='steps.2.produce') + +# Output to YAML +yaml = pipeline_description.to_yaml() +with open('pipeline.yml', 'w') as f: + f.write(yaml) +print(yaml) + +# Or you can output json +#data = pipline_description.to_json() + diff --git a/new_tests/build_PyodCOF.py b/new_tests/build_PyodCOF.py new file mode 100644 index 0000000..fcd0d2b --- /dev/null +++ b/new_tests/build_PyodCOF.py @@ -0,0 +1,51 @@ +from d3m import index +from d3m.metadata.base import ArgumentType +from d3m.metadata.pipeline import Pipeline, PrimitiveStep +from d3m.metadata import hyperparams +import copy + +# -> dataset_to_dataframe -> column_parser -> extract_columns_by_semantic_types(attributes) -> imputer -> random_forest +# extract_columns_by_semantic_types(targets) -> ^ + +# Creating pipeline +pipeline_description = Pipeline() +pipeline_description.add_input(name='inputs') + +# Step 0: dataset_to_dataframe +primitive_0 = index.get_primitive('d3m.primitives.tods.data_processing.dataset_to_dataframe') +step_0 = PrimitiveStep(primitive=primitive_0) +step_0.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='inputs.0') +step_0.add_output('produce') +pipeline_description.add_step(step_0) + +# # Step 1: column_parser +primitive_1 = index.get_primitive('d3m.primitives.tods.data_processing.column_parser') +step_1 = PrimitiveStep(primitive=primitive_1) +step_1.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce') +step_1.add_output('produce') +pipeline_description.add_step(step_1) + +# # Step 2: test primitive +primitive_2 = index.get_primitive('d3m.primitives.tods.detection_algorithm.pyod_cof') + +step_2 = PrimitiveStep(primitive=primitive_2) +step_2.add_hyperparameter(name='contamination', argument_type=ArgumentType.VALUE, data=0.1) +step_2.add_hyperparameter(name='use_semantic_types', argument_type=ArgumentType.VALUE, data=True) +step_2.add_hyperparameter(name='use_columns', argument_type=ArgumentType.VALUE, data=(2,3,4)) # There is sth wrong with multi-dimensional +step_2.add_hyperparameter(name='return_result', argument_type=ArgumentType.VALUE, data='append') +step_2.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce') +step_2.add_output('produce') +pipeline_description.add_step(step_2) + +# Final Output +pipeline_description.add_output(name='output predictions', data_reference='steps.2.produce') + +# Output to YAML +yaml = pipeline_description.to_yaml() +with open('pipeline.yml', 'w') as f: + f.write(yaml) +print(yaml) + +# Or you can output json +#data = pipline_description.to_json() + diff --git a/new_tests/build_QuantileTransform_pipline.py b/new_tests/build_QuantileTransform_pipline.py new file mode 100644 index 0000000..f6c4868 --- /dev/null +++ b/new_tests/build_QuantileTransform_pipline.py @@ -0,0 +1,49 @@ +from d3m import index +from d3m.metadata.base import ArgumentType +from d3m.metadata.pipeline import Pipeline, PrimitiveStep +from d3m.metadata import hyperparams +import copy + +# -> dataset_to_dataframe -> column_parser -> extract_columns_by_semantic_types(attributes) -> imputer -> random_forest +# extract_columns_by_semantic_types(targets) -> ^ + +# Creating pipeline +pipeline_description = Pipeline() +pipeline_description.add_input(name='inputs') + +# Step 0: dataset_to_dataframe +primitive_0 = index.get_primitive('d3m.primitives.tods.data_processing.dataset_to_dataframe') +step_0 = PrimitiveStep(primitive=primitive_0) +step_0.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='inputs.0') +step_0.add_output('produce') +pipeline_description.add_step(step_0) + +# # Step 1: column_parser +primitive_1 = index.get_primitive('d3m.primitives.tods.data_processing.column_parser') +step_1 = PrimitiveStep(primitive=primitive_1) +step_1.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce') +step_1.add_output('produce') +pipeline_description.add_step(step_1) + +# # Step 2: test primitive +primitive_2 = index.get_primitive('d3m.primitives.tods.timeseries_processing.transformation.quantile_transformer') +step_2 = PrimitiveStep(primitive=primitive_2) +step_2.add_hyperparameter(name='use_semantic_types', argument_type=ArgumentType.VALUE, data=True) +step_2.add_hyperparameter(name='use_columns', argument_type=ArgumentType.VALUE, data=(2,)) +step_2.add_hyperparameter(name='return_result', argument_type=ArgumentType.VALUE, data='append') +step_2.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce') +step_2.add_output('produce') +pipeline_description.add_step(step_2) + +# Final Output +pipeline_description.add_output(name='output predictions', data_reference='steps.2.produce') + +# Output to YAML +yaml = pipeline_description.to_yaml() +with open('pipeline.yml', 'w') as f: + f.write(yaml) +print(yaml) + +# Or you can output json +#data = pipline_description.to_json() + diff --git a/new_tests/build_RuleBasedFilter_pipline.py b/new_tests/build_RuleBasedFilter_pipline.py new file mode 100644 index 0000000..87a74b9 --- /dev/null +++ b/new_tests/build_RuleBasedFilter_pipline.py @@ -0,0 +1,54 @@ +from d3m import index +from d3m.metadata.base import ArgumentType +from d3m.metadata.pipeline import Pipeline, PrimitiveStep + + +# Creating pipeline +pipeline_description = Pipeline() +pipeline_description.add_input(name='inputs') + +# Step 0: dataset_to_dataframe +step_0 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.data_processing.dataset_to_dataframe')) +step_0.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='inputs.0') +step_0.add_output('produce') +pipeline_description.add_step(step_0) + +# Step 1: column_parser +step_1 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.data_processing.column_parser')) +step_1.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce') +step_1.add_output('produce') +pipeline_description.add_step(step_1) + +# Step 2: extract_columns_by_semantic_types(attributes) +step_2 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.data_processing.extract_columns_by_semantic_types')) +step_2.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce') +step_2.add_output('produce') +step_2.add_hyperparameter(name='semantic_types', argument_type=ArgumentType.VALUE, data=['https://metadata.datadrivendiscovery.org/types/Attribute']) +pipeline_description.add_step(step_2) + + +step_3 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.reinforcement.rule_filter')) +step_3.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.2.produce') +step_3.add_output('produce') + +step_3.add_hyperparameter(name='use_columns', argument_type=ArgumentType.VALUE, data=(2, 4,)) +step_3.add_hyperparameter(name='rule', argument_type=ArgumentType.VALUE, data='#4# % 2 == 0 and #2# <= 0.3') + +step_3.add_hyperparameter(name='return_result', argument_type=ArgumentType.VALUE, data='append') +pipeline_description.add_step(step_3) + + + + +# Final Output +pipeline_description.add_output(name='output predictions', data_reference='steps.3.produce') + +# Output to YAML +yaml = pipeline_description.to_yaml() +with open('pipeline.yml', 'w') as f: + f.write(yaml) +print(yaml) + +# Or you can output json +#data = pipline_description.to_json() + diff --git a/new_tests/build_SOD_pipeline.py b/new_tests/build_SOD_pipeline.py new file mode 100644 index 0000000..e4ed1b3 --- /dev/null +++ b/new_tests/build_SOD_pipeline.py @@ -0,0 +1,49 @@ +from d3m import index +from d3m.metadata.base import ArgumentType +from d3m.metadata.pipeline import Pipeline, PrimitiveStep +from d3m.metadata import hyperparams + +# -> dataset_to_dataframe -> column_parser -> extract_columns_by_semantic_types(attributes) -> imputer -> random_forest +# extract_columns_by_semantic_types(targets) -> ^ + +# Creating pipeline +pipeline_description = Pipeline() +pipeline_description.add_input(name='inputs') + +# Step 0: dataset_to_dataframe +primitive_0 = index.get_primitive('d3m.primitives.tods.data_processing.dataset_to_dataframe') +step_0 = PrimitiveStep(primitive=primitive_0) +step_0.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='inputs.0') +step_0.add_output('produce') +pipeline_description.add_step(step_0) + +# # Step 1: column_parser +primitive_1 = index.get_primitive('d3m.primitives.tods.data_processing.column_parser') +step_1 = PrimitiveStep(primitive=primitive_1) +step_1.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce') +step_1.add_output('produce') +pipeline_description.add_step(step_1) + +# # Step 2: test primitive +primitive_2 = index.get_primitive('d3m.primitives.tods.detection_algorithm.pyod_sod') + +step_2 = PrimitiveStep(primitive=primitive_2) +step_2.add_hyperparameter(name='contamination', argument_type=ArgumentType.VALUE, data=0.1) +step_2.add_hyperparameter(name='use_semantic_types', argument_type=ArgumentType.VALUE, data=True) +step_2.add_hyperparameter(name='use_columns', argument_type=ArgumentType.VALUE, data=(2,3,4)) # There is sth wrong with multi-dimensional +step_2.add_hyperparameter(name='return_result', argument_type=ArgumentType.VALUE, data='append') +step_2.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce') +step_2.add_output('produce') +pipeline_description.add_step(step_2) + +# # Final Output +pipeline_description.add_output(name='output predictions', data_reference='steps.2.produce') + +# # Output to YAML +yaml = pipeline_description.to_yaml() +with open('pipeline.yml', 'w') as f: + f.write(yaml) +print(yaml) + +# Or you can output json +#data = pipline_description.to_json() diff --git a/new_tests/build_SimpleExponentialSmoothing_pipline.py b/new_tests/build_SimpleExponentialSmoothing_pipline.py new file mode 100644 index 0000000..b33db22 --- /dev/null +++ b/new_tests/build_SimpleExponentialSmoothing_pipline.py @@ -0,0 +1,76 @@ +from d3m import index +from d3m.metadata.base import ArgumentType +from d3m.metadata.pipeline import Pipeline, PrimitiveStep + +# -> dataset_to_dataframe -> column_parser -> extract_columns_by_semantic_types(attributes) -> imputer -> random_forest +# extract_columns_by_semantic_types(targets) -> ^ + +# Creating pipeline +pipeline_description = Pipeline() +pipeline_description.add_input(name='inputs') + +# Step 0: dataset_to_dataframe +primitive_0 = index.get_primitive('d3m.primitives.tods.data_processing.dataset_to_dataframe') +step_0 = PrimitiveStep(primitive=primitive_0) +step_0.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='inputs.0') +step_0.add_output('produce') +pipeline_description.add_step(step_0) + +# Step 1: column_parser +step_1 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.data_processing.column_parser')) +step_1.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce') +step_1.add_output('produce') +pipeline_description.add_step(step_1) + +# Step 2: extract_columns_by_semantic_types(attributes) +step_2 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.data_processing.extract_columns_by_semantic_types')) +step_2.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce') +step_2.add_output('produce') +step_2.add_hyperparameter(name='semantic_types', argument_type=ArgumentType.VALUE, + data=['https://metadata.datadrivendiscovery.org/types/Attribute']) +pipeline_description.add_step(step_2) + +# Step 3: extract_columns_by_semantic_types(targets) +step_3 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.data_processing.extract_columns_by_semantic_types')) +step_3.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce') +step_3.add_output('produce') +step_3.add_hyperparameter(name='semantic_types', argument_type=ArgumentType.VALUE, + data=['https://metadata.datadrivendiscovery.org/types/TrueTarget']) +pipeline_description.add_step(step_3) + +attributes = 'steps.2.produce' +targets = 'steps.3.produce' + +# Step 4: imputer +step_4 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.data_processing.impute_missing')) +step_4.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference=attributes) +step_4.add_output('produce') +pipeline_description.add_step(step_4) + +# Step 5: simple exponential smoothing +step_5 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.timeseries_processing.transformation.simple_exponential_smoothing')) +step_5.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference=attributes) +step_5.add_hyperparameter(name="use_columns", argument_type=ArgumentType.VALUE, data = (1,)) +step_5.add_hyperparameter(name="use_semantic_types", argument_type=ArgumentType.VALUE, data = True) +step_5.add_output('produce') +pipeline_description.add_step(step_5) + +# Step 6: isolation forest +#step_6 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.anomaly_detection.isolation_forest.Algorithm')) +#step_6.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.5.produce') +#step_6.add_argument(name='outputs', argument_type=ArgumentType.CONTAINER, data_reference=targets) +#step_6.add_output('produce') +#pipeline_description.add_step(step_6) + +# Final Output +pipeline_description.add_output(name='output predictions', data_reference='steps.5.produce') + +# Output to YAML +yaml = pipeline_description.to_yaml() +with open('pipeline.yml', 'w') as f: + f.write(yaml) +print(yaml) + +# Or you can output json +#data = pipline_description.to_json() + diff --git a/new_tests/build_Standardize_pipline.py b/new_tests/build_Standardize_pipline.py new file mode 100644 index 0000000..8300d7c --- /dev/null +++ b/new_tests/build_Standardize_pipline.py @@ -0,0 +1,49 @@ +from d3m import index +from d3m.metadata.base import ArgumentType +from d3m.metadata.pipeline import Pipeline, PrimitiveStep +from d3m.metadata import hyperparams +import copy + +# -> dataset_to_dataframe -> column_parser -> extract_columns_by_semantic_types(attributes) -> imputer -> random_forest +# extract_columns_by_semantic_types(targets) -> ^ + +# Creating pipeline +pipeline_description = Pipeline() +pipeline_description.add_input(name='inputs') + +# Step 0: dataset_to_dataframe +primitive_0 = index.get_primitive('d3m.primitives.tods.data_processing.dataset_to_dataframe') +step_0 = PrimitiveStep(primitive=primitive_0) +step_0.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='inputs.0') +step_0.add_output('produce') +pipeline_description.add_step(step_0) + +# # Step 1: column_parser +primitive_1 = index.get_primitive('d3m.primitives.tods.data_processing.column_parser') +step_1 = PrimitiveStep(primitive=primitive_1) +step_1.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce') +step_1.add_output('produce') +pipeline_description.add_step(step_1) + +# # Step 2: test primitive +primitive_2 = index.get_primitive('d3m.primitives.tods.timeseries_processing.transformation.standard_scaler') +step_2 = PrimitiveStep(primitive=primitive_2) +step_2.add_hyperparameter(name='use_semantic_types', argument_type=ArgumentType.VALUE, data=True) +step_2.add_hyperparameter(name='use_columns', argument_type=ArgumentType.VALUE, data=(2,)) +step_2.add_hyperparameter(name='return_result', argument_type=ArgumentType.VALUE, data='append') +step_2.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce') +step_2.add_output('produce') +pipeline_description.add_step(step_2) + +# Final Output +pipeline_description.add_output(name='output predictions', data_reference='steps.2.produce') + +# Output to YAML +yaml = pipeline_description.to_yaml() +with open('pipeline.yml', 'w') as f: + f.write(yaml) +print(yaml) + +# Or you can output json +#data = pipline_description.to_json() + diff --git a/new_tests/build_TRMF_pipline.py b/new_tests/build_TRMF_pipline.py new file mode 100644 index 0000000..7d7c407 --- /dev/null +++ b/new_tests/build_TRMF_pipline.py @@ -0,0 +1,44 @@ +from d3m import index +from d3m.metadata.base import ArgumentType +from d3m.metadata.pipeline import Pipeline, PrimitiveStep + + +# Creating pipeline +pipeline_description = Pipeline() +pipeline_description.add_input(name='inputs') + +# Step 0: dataset_to_dataframe +step_0 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.data_processing.dataset_to_dataframe')) +step_0.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='inputs.0') +step_0.add_output('produce') +pipeline_description.add_step(step_0) + +# Step 1: column_parser +step_1 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.data_processing.column_parser')) +step_1.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce') +step_1.add_output('produce') +pipeline_description.add_step(step_1) + + +# Step 2: TRMF +step_2 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.feature_analysis.trmf')) +step_2.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce') +step_2.add_output('produce') + +step_2.add_hyperparameter(name = 'lags', argument_type=ArgumentType.VALUE, data = [1,2,10,100]) +# step_2.add_hyperparameter(name = 'K', argument_type=ArgumentType.VALUE, data = 3) +# step_2.add_hyperparameter(name = 'use_columns', argument_type=ArgumentType.VALUE, data = (2, 3, 4, 5, 6)) + +pipeline_description.add_step(step_2) + +# Final Output +pipeline_description.add_output(name='output predictions', data_reference='steps.2.produce') + +# Output to YAML +yaml = pipeline_description.to_yaml() +with open('pipeline.yml', 'w') as f: + f.write(yaml) +print(yaml) + +# Or you can output json +#data = pipline_description.to_json() diff --git a/new_tests/build_Telemanom.py b/new_tests/build_Telemanom.py new file mode 100644 index 0000000..06a192c --- /dev/null +++ b/new_tests/build_Telemanom.py @@ -0,0 +1,48 @@ +from d3m import index +from d3m.metadata.base import ArgumentType +from d3m.metadata.pipeline import Pipeline, PrimitiveStep + +# -> dataset_to_dataframe -> column_parser -> extract_columns_by_semantic_types(attributes) -> imputer -> random_forest +# extract_columns_by_semantic_types(targets) -> ^ + +# Creating pipeline +pipeline_description = Pipeline() +pipeline_description.add_input(name='inputs') + + +# Step 0: dataset_to_dataframe +primitive_0 = index.get_primitive('d3m.primitives.tods.data_processing.dataset_to_dataframe') +step_0 = PrimitiveStep(primitive=primitive_0) +step_0.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='inputs.0') +step_0.add_output('produce') +pipeline_description.add_step(step_0) + +# Step 1: Column Parser +primitive_1 = index.get_primitive('d3m.primitives.tods.data_processing.column_parser') +step_1 = PrimitiveStep(primitive=primitive_1) +step_1.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce') +step_1.add_output('produce') +pipeline_description.add_step(step_1) + +# Step 2: Fast Fourier Transform +primitive_2 = index.get_primitive('d3m.primitives.tods.detection_algorithm.telemanom') +step_2 = PrimitiveStep(primitive=primitive_2) +step_2.add_hyperparameter(name='use_semantic_types', argument_type=ArgumentType.VALUE, data=True) +step_2.add_hyperparameter(name='use_columns', argument_type=ArgumentType.VALUE, data=(2,3,4,5,6)) +step_2.add_hyperparameter(name='return_result', argument_type=ArgumentType.VALUE, data='append') +step_2.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce') +step_2.add_output('produce') +pipeline_description.add_step(step_2) + + +# Final Output +pipeline_description.add_output(name='output predictions', data_reference='steps.2.produce') + +# Output to YAML +yaml = pipeline_description.to_yaml() +with open('pipeline.yml', 'w') as f: + f.write(yaml) +print(yaml) + +# Or you can output json +#data = pipline_description.to_json() diff --git a/new_tests/build_TimeIntervalTransform_pipeline.py b/new_tests/build_TimeIntervalTransform_pipeline.py new file mode 100644 index 0000000..be7990f --- /dev/null +++ b/new_tests/build_TimeIntervalTransform_pipeline.py @@ -0,0 +1,86 @@ +from d3m import index +from d3m.metadata.base import ArgumentType +from d3m.metadata.pipeline import Pipeline, PrimitiveStep + +# -> dataset_to_dataframe -> column_parser -> extract_columns_by_semantic_types(attributes) -> imputer -> random_forest +# extract_columns_by_semantic_types(targets) -> ^ + +# Creating pipeline +pipeline_description = Pipeline() +pipeline_description.add_input(name='inputs') + +# Step 0: dataset_to_dataframe +primitive_0 = index.get_primitive('d3m.primitives.tods.data_processing.dataset_to_dataframe') +step_0 = PrimitiveStep(primitive=primitive_0) +step_0.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='inputs.0') +step_0.add_output('produce') +pipeline_description.add_step(step_0) + +# # Step 1: dataframe transformation +# primitive_1 = index.get_primitive('d3m.primitives.data_transformation.SKPowerTransformer') +# primitive_1 = index.get_primitive('d3m.primitives.data_transformation.SKStandardization') +# primitive_1 = index.get_primitive('d3m.primitives.data_transformation.SKQuantileTransformer') + +#Step 1: column_parser +step_1 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.data_processing.column_parser')) +step_1.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce') +step_1.add_output('produce') +pipeline_description.add_step(step_1) + +primitive_2 = index.get_primitive('d3m.primitives.tods.data_processing.time_interval_transform') +step_2 = PrimitiveStep(primitive=primitive_2) +step_2.add_hyperparameter(name="time_interval", argument_type=ArgumentType.VALUE, data = '5T') +step_2.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce') +step_2.add_output('produce') +pipeline_description.add_step(step_2) +# +# # Step 2: column_parser +# step_2 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.data_processing.column_parser')) +# step_2.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce') +# step_2.add_output('produce') +# pipeline_description.add_step(step_2) +# +# +# # Step 3: extract_columns_by_semantic_types(attributes) +# step_3 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.data_processing.extract_columns_by_semantic_types')) +# step_3.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.2.produce') +# step_3.add_output('produce') +# step_3.add_hyperparameter(name='semantic_types', argument_type=ArgumentType.VALUE, +# data=['https://metadata.datadrivendiscovery.org/types/Attribute']) +# pipeline_description.add_step(step_3) +# +# # Step 4: extract_columns_by_semantic_types(targets) +# step_4 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.data_processing.extract_columns_by_semantic_types')) +# step_4.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce') +# step_4.add_output('produce') +# step_4.add_hyperparameter(name='semantic_types', argument_type=ArgumentType.VALUE, +# data=['https://metadata.datadrivendiscovery.org/types/TrueTarget']) +# pipeline_description.add_step(step_4) +# +# attributes = 'steps.3.produce' +# targets = 'steps.4.produce' +# +# # Step 5: imputer +# step_5 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.data_cleaning.imputer.SKlearn')) +# step_5.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference=attributes) +# step_5.add_output('produce') +# pipeline_description.add_step(step_5) +# +# # Step 6: random_forest +# step_6 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.regression.random_forest.SKlearn')) +# step_6.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.5.produce') +# step_6.add_argument(name='outputs', argument_type=ArgumentType.CONTAINER, data_reference=targets) +# step_6.add_output('produce') +# pipeline_description.add_step(step_6) + +# Final Output +pipeline_description.add_output(name='output predictions', data_reference='steps.1.produce') + +# Output to YAML +yaml = pipeline_description.to_yaml() +with open('pipeline.yml', 'w') as f: + f.write(yaml) +print(yaml) + +# Or you can output json +#data = pipline_description.to_json() diff --git a/new_tests/build_TruncatedSVD_pipline.py b/new_tests/build_TruncatedSVD_pipline.py new file mode 100644 index 0000000..290f181 --- /dev/null +++ b/new_tests/build_TruncatedSVD_pipline.py @@ -0,0 +1,44 @@ +from d3m import index +from d3m.metadata.base import ArgumentType +from d3m.metadata.pipeline import Pipeline, PrimitiveStep + + +# Creating pipeline +pipeline_description = Pipeline() +pipeline_description.add_input(name='inputs') + +# Step 0: dataset_to_dataframe +step_0 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.data_processing.dataset_to_dataframe')) +step_0.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='inputs.0') +step_0.add_output('produce') +pipeline_description.add_step(step_0) + +# Step 1: column_parser +step_1 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.data_processing.column_parser')) +step_1.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce') +step_1.add_output('produce') +pipeline_description.add_step(step_1) + + +# Step 2: TruncatedSVD +step_2 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.feature_analysis.truncated_svd')) +step_2.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce') +step_2.add_output('produce') +step_2.add_hyperparameter(name = 'n_components', argument_type=ArgumentType.VALUE, data = 3) +step_2.add_hyperparameter(name = 'use_columns', argument_type=ArgumentType.VALUE, data = (2, 3, 4, 5, 6)) +step_2.add_hyperparameter(name = 'return_result', argument_type=ArgumentType.VALUE, data = 'append') +step_2.add_hyperparameter(name = 'use_semantic_types', argument_type=ArgumentType.VALUE, data = True) +pipeline_description.add_step(step_2) + +# Final Output +pipeline_description.add_output(name='output predictions', data_reference='steps.2.produce') + +# Output to YAML +yaml = pipeline_description.to_yaml() +with open('pipeline.yml', 'w') as f: + f.write(yaml) +print(yaml) + +# Or you can output json +#data = pipline_description.to_json() + diff --git a/new_tests/build_VariationalAutoEncoder.py b/new_tests/build_VariationalAutoEncoder.py new file mode 100644 index 0000000..e585a0a --- /dev/null +++ b/new_tests/build_VariationalAutoEncoder.py @@ -0,0 +1,67 @@ +from d3m import index +from d3m.metadata.base import ArgumentType +from d3m.metadata.pipeline import Pipeline, PrimitiveStep + +# -> dataset_to_dataframe -> column_parser -> extract_columns_by_semantic_types(attributes) -> imputer -> random_forest +# extract_columns_by_semantic_types(targets) -> ^ + +# Creating pipeline +pipeline_description = Pipeline() +pipeline_description.add_input(name='inputs') + +# Step 0: dataset_to_dataframe +step_0 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.data_processing.dataset_to_dataframe')) +step_0.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='inputs.0') +step_0.add_output('produce') +pipeline_description.add_step(step_0) + +# Step 1: column_parser +step_1 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.data_processing.column_parser')) +step_1.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce') +step_1.add_output('produce') +pipeline_description.add_step(step_1) + +# Step 2: extract_columns_by_semantic_types(attributes) +step_2 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.data_processing.extract_columns_by_semantic_types')) +step_2.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce') +step_2.add_output('produce') +step_2.add_hyperparameter(name='semantic_types', argument_type=ArgumentType.VALUE, + data=['https://metadata.datadrivendiscovery.org/types/Attribute']) +pipeline_description.add_step(step_2) + +# Step 3: extract_columns_by_semantic_types(targets) +step_3 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.data_processing.extract_columns_by_semantic_types')) +step_3.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce') +step_3.add_output('produce') +step_3.add_hyperparameter(name='semantic_types', argument_type=ArgumentType.VALUE, + data=['https://metadata.datadrivendiscovery.org/types/TrueTarget']) +pipeline_description.add_step(step_3) + +attributes = 'steps.2.produce' +targets = 'steps.3.produce' + +# Step 4: imputer +step_4 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.data_processing.impute_missing')) +step_4.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference=attributes) +step_4.add_output('produce') +pipeline_description.add_step(step_4) + +# Step 5: variatinal auto encoder +step_5 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.detection_algorithm.pyod_vae')) +step_5.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference=attributes) +step_5.add_output('produce') +pipeline_description.add_step(step_5) + + +# Final Output +pipeline_description.add_output(name='output predictions', data_reference='steps.5.produce') + +# Output to YAML +yaml = pipeline_description.to_yaml() +with open('pipeline.yml', 'w') as f: + f.write(yaml) +print(yaml) + +# Or you can output json +#data = pipline_description.to_json() + diff --git a/new_tests/build_WaveletTransform_pipline.py b/new_tests/build_WaveletTransform_pipline.py new file mode 100644 index 0000000..ee6c766 --- /dev/null +++ b/new_tests/build_WaveletTransform_pipline.py @@ -0,0 +1,64 @@ +from d3m import index +from d3m.metadata.base import ArgumentType +from d3m.metadata.pipeline import Pipeline, PrimitiveStep +from d3m.metadata import hyperparams +import copy + +# -> dataset_to_dataframe -> column_parser -> extract_columns_by_semantic_types(attributes) -> imputer -> random_forest +# extract_columns_by_semantic_types(targets) -> ^ + +# Creating pipeline +pipeline_description = Pipeline() +pipeline_description.add_input(name='inputs') + +# Step 0: dataset_to_dataframe +primitive_0 = index.get_primitive('d3m.primitives.tods.data_processing.dataset_to_dataframe') +step_0 = PrimitiveStep(primitive=primitive_0) +step_0.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='inputs.0') +step_0.add_output('produce') +pipeline_description.add_step(step_0) + +# # Step 1: column_parser +primitive_1 = index.get_primitive('d3m.primitives.tods.data_processing.column_parser') +step_1 = PrimitiveStep(primitive=primitive_1) +step_1.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce') +step_1.add_output('produce') +pipeline_description.add_step(step_1) + +# # Step 2: test WaveletTransform +primitive_2 = index.get_primitive('d3m.primitives.tods.feature_analysis.wavelet_transform') +step_2 = PrimitiveStep(primitive=primitive_2) +step_2.add_hyperparameter(name='wavelet', argument_type=ArgumentType.VALUE, data='db8') +step_2.add_hyperparameter(name='level', argument_type=ArgumentType.VALUE, data=2) +step_2.add_hyperparameter(name='use_semantic_types', argument_type=ArgumentType.VALUE, data=True) +step_2.add_hyperparameter(name='use_columns', argument_type=ArgumentType.VALUE, data=(2,)) +step_2.add_hyperparameter(name='return_result', argument_type=ArgumentType.VALUE, data='new') +step_2.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce') +step_2.add_output('produce') +pipeline_description.add_step(step_2) + +# # Step 2: test inverse WaveletTransform +primitive_3 = index.get_primitive('d3m.primitives.tods.feature_analysis.wavelet_transform') +step_3 = PrimitiveStep(primitive=primitive_3) +step_3.add_hyperparameter(name='wavelet', argument_type=ArgumentType.VALUE, data='db8') +step_3.add_hyperparameter(name='level', argument_type=ArgumentType.VALUE, data=2) +step_3.add_hyperparameter(name='inverse', argument_type=ArgumentType.VALUE, data=1) +step_3.add_hyperparameter(name='use_semantic_types', argument_type=ArgumentType.VALUE, data=False) +step_3.add_hyperparameter(name='return_result', argument_type=ArgumentType.VALUE, data='new') +step_3.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.2.produce') +step_3.add_output('produce') +pipeline_description.add_step(step_3) + + +# Final Output +pipeline_description.add_output(name='output predictions', data_reference='steps.2.produce') + +# Output to YAML +yaml = pipeline_description.to_yaml() +with open('pipeline.yml', 'w') as f: + f.write(yaml) +print(yaml) + +# Or you can output json +#data = pipline_description.to_json() + diff --git a/new_tests/build_test_detection_algorithm_PyodMoGaal.py b/new_tests/build_test_detection_algorithm_PyodMoGaal.py new file mode 100644 index 0000000..713a2cd --- /dev/null +++ b/new_tests/build_test_detection_algorithm_PyodMoGaal.py @@ -0,0 +1,50 @@ +from d3m import index +from d3m.metadata.base import ArgumentType +from d3m.metadata.pipeline import Pipeline, PrimitiveStep +from d3m.metadata import hyperparams +import copy + +# -> dataset_to_dataframe -> column_parser -> extract_columns_by_semantic_types(attributes) -> imputer -> random_forest +# extract_columns_by_semantic_types(targets) -> ^ + +# Creating pipeline +pipeline_description = Pipeline() +pipeline_description.add_input(name='inputs') + +# Step 0: dataset_to_dataframe +primitive_0 = index.get_primitive('d3m.primitives.tods.data_processing.dataset_to_dataframe') +step_0 = PrimitiveStep(primitive=primitive_0) +step_0.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='inputs.0') +step_0.add_output('produce') +pipeline_description.add_step(step_0) + +# # Step 1: column_parser +primitive_1 = index.get_primitive('d3m.primitives.tods.data_processing.column_parser') +step_1 = PrimitiveStep(primitive=primitive_1) +step_1.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce') +step_1.add_output('produce') +pipeline_description.add_step(step_1) + +# # Step 2: test primitive +primitive_2 = index.get_primitive('d3m.primitives.tods.detection_algorithm.pyod_mogaal') + +step_2 = PrimitiveStep(primitive=primitive_2) +step_2.add_hyperparameter(name='contamination', argument_type=ArgumentType.VALUE, data=0.1) +step_2.add_hyperparameter(name='use_semantic_types', argument_type=ArgumentType.VALUE, data=True) +step_2.add_hyperparameter(name='use_columns', argument_type=ArgumentType.VALUE, data=(2,)) # There is sth wrong with multi-dimensional +step_2.add_hyperparameter(name='return_result', argument_type=ArgumentType.VALUE, data='append') +step_2.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce') +step_2.add_output('produce') +pipeline_description.add_step(step_2) + +# Final Output +pipeline_description.add_output(name='output predictions', data_reference='steps.2.produce') + +# Output to YAML +yaml = pipeline_description.to_yaml() +with open('pipeline.yml', 'w') as f: + f.write(yaml) +print(yaml) + +# Or you can output json +#data = pipline_description.to_json() diff --git a/new_tests/build_test_detection_algorithm_PyodSoGaal.py b/new_tests/build_test_detection_algorithm_PyodSoGaal.py new file mode 100644 index 0000000..4caa752 --- /dev/null +++ b/new_tests/build_test_detection_algorithm_PyodSoGaal.py @@ -0,0 +1,50 @@ +from d3m import index +from d3m.metadata.base import ArgumentType +from d3m.metadata.pipeline import Pipeline, PrimitiveStep +from d3m.metadata import hyperparams +import copy + +# -> dataset_to_dataframe -> column_parser -> extract_columns_by_semantic_types(attributes) -> imputer -> random_forest +# extract_columns_by_semantic_types(targets) -> ^ + +# Creating pipeline +pipeline_description = Pipeline() +pipeline_description.add_input(name='inputs') + +# Step 0: dataset_to_dataframe +primitive_0 = index.get_primitive('d3m.primitives.tods.data_processing.dataset_to_dataframe') +step_0 = PrimitiveStep(primitive=primitive_0) +step_0.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='inputs.0') +step_0.add_output('produce') +pipeline_description.add_step(step_0) + +# # Step 1: column_parser +primitive_1 = index.get_primitive('d3m.primitives.tods.data_processing.column_parser') +step_1 = PrimitiveStep(primitive=primitive_1) +step_1.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce') +step_1.add_output('produce') +pipeline_description.add_step(step_1) + +# # Step 2: test primitive +primitive_2 = index.get_primitive('d3m.primitives.tods.detection_algorithm.pyod_sogaal') + +step_2 = PrimitiveStep(primitive=primitive_2) +step_2.add_hyperparameter(name='contamination', argument_type=ArgumentType.VALUE, data=0.1) +step_2.add_hyperparameter(name='use_semantic_types', argument_type=ArgumentType.VALUE, data=True) +step_2.add_hyperparameter(name='use_columns', argument_type=ArgumentType.VALUE, data=(2,)) # There is sth wrong with multi-dimensional +step_2.add_hyperparameter(name='return_result', argument_type=ArgumentType.VALUE, data='append') +step_2.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce') +step_2.add_output('produce') +pipeline_description.add_step(step_2) + +# Final Output +pipeline_description.add_output(name='output predictions', data_reference='steps.2.produce') + +# Output to YAML +yaml = pipeline_description.to_yaml() +with open('pipeline.yml', 'w') as f: + f.write(yaml) +print(yaml) + +# Or you can output json +#data = pipline_description.to_json() diff --git a/new_tests/build_test_feature_analysis_spectral_residual_transform_pipeline.py b/new_tests/build_test_feature_analysis_spectral_residual_transform_pipeline.py new file mode 100644 index 0000000..5fbd61e --- /dev/null +++ b/new_tests/build_test_feature_analysis_spectral_residual_transform_pipeline.py @@ -0,0 +1,61 @@ +from d3m import index +from d3m.metadata.base import ArgumentType +from d3m.metadata.pipeline import Pipeline, PrimitiveStep +from d3m.metadata import hyperparams + +# -> dataset_to_dataframe -> column_parser -> extract_columns_by_semantic_types(attributes) -> imputer -> random_forest +# extract_columns_by_semantic_types(targets) -> ^ + +# Creating pipeline +pipeline_description = Pipeline() +pipeline_description.add_input(name='inputs') + +# Step 0: dataset_to_dataframe +primitive_0 = index.get_primitive('d3m.primitives.data_transformation.dataset_to_dataframe.Common') +step_0 = PrimitiveStep(primitive=primitive_0) +step_0.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='inputs.0') +step_0.add_output('produce') +pipeline_description.add_step(step_0) + +# # Step 1: column_parser +primitive_1 = index.get_primitive('d3m.primitives.tods.data_processing.column_parser') +step_1 = PrimitiveStep(primitive=primitive_1) +step_1.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce') +step_1.add_output('produce') +pipeline_description.add_step(step_1) + +# # Step 2: Standardization +primitive_2 = index.get_primitive('d3m.primitives.tods.timeseries_processing.transformation.standard_scaler') +step_2 = PrimitiveStep(primitive=primitive_2) +step_2.add_hyperparameter(name='use_semantic_types', argument_type=ArgumentType.VALUE, data=True) +step_2.add_hyperparameter(name='use_columns', argument_type=ArgumentType.VALUE, data=(2,3,4,5,6)) +step_2.add_hyperparameter(name='return_result', argument_type=ArgumentType.VALUE, data='append') +step_2.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce') +step_2.add_output('produce') +pipeline_description.add_step(step_2) + +# # Step 3: test primitive +# primitive_3 = index.get_primitive('d3m.primitives.anomaly_detection.KNNPrimitive') +primitive_3 = index.get_primitive('d3m.primitives.tods.feature_analysis.spectral_residual_transform') +step_3 = PrimitiveStep(primitive=primitive_3) +step_3.add_hyperparameter(name='avg_filter_dimension', argument_type=ArgumentType.VALUE, data=4) +step_3.add_hyperparameter(name='use_semantic_types', argument_type=ArgumentType.VALUE, data=True) +step_3.add_hyperparameter(name='use_columns', argument_type=ArgumentType.VALUE, data=(8,9,10,11,12)) # There is sth wrong with multi-dimensional +step_3.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.2.produce') +step_3.add_output('produce') +pipeline_description.add_step(step_3) + + + +# Final Output +pipeline_description.add_output(name='output', data_reference='steps.3.produce') + +# Output to YAML +yaml = pipeline_description.to_yaml() +with open('pipeline.yml', 'w') as f: + f.write(yaml) + + +# Or you can output json +#data = pipline_description.to_json() + diff --git a/new_tests/build_test_feature_analysis_statistical_abs_energy.py b/new_tests/build_test_feature_analysis_statistical_abs_energy.py new file mode 100644 index 0000000..cb28366 --- /dev/null +++ b/new_tests/build_test_feature_analysis_statistical_abs_energy.py @@ -0,0 +1,62 @@ +from d3m import index +from d3m.metadata.base import ArgumentType +from d3m.metadata.pipeline import Pipeline, PrimitiveStep +from d3m.metadata import hyperparams + +# -> dataset_to_dataframe -> column_parser -> extract_columns_by_semantic_types(attributes) -> imputer -> random_forest +# extract_columns_by_semantic_types(targets) -> ^ + +# Creating pipeline +pipeline_description = Pipeline() +pipeline_description.add_input(name='inputs') + +# Step 0: dataset_to_dataframe +primitive_0 = index.get_primitive('d3m.primitives.tods.data_processing.dataset_to_dataframe') +step_0 = PrimitiveStep(primitive=primitive_0) +step_0.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='inputs.0') +step_0.add_output('produce') +pipeline_description.add_step(step_0) + +# # Step 1: column_parser +primitive_1 = index.get_primitive('d3m.primitives.tods.data_processing.column_parser') +step_1 = PrimitiveStep(primitive=primitive_1) +step_1.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce') +step_1.add_output('produce') +pipeline_description.add_step(step_1) + +# # Step 2: Standardization +primitive_2 = index.get_primitive('d3m.primitives.tods.timeseries_processing.transformation.standard_scaler') +step_2 = PrimitiveStep(primitive=primitive_2) +step_2.add_hyperparameter(name='use_semantic_types', argument_type=ArgumentType.VALUE, data=True) +step_2.add_hyperparameter(name='use_columns', argument_type=ArgumentType.VALUE, data=(2,3,4,5,6)) +step_2.add_hyperparameter(name='return_result', argument_type=ArgumentType.VALUE, data='append') +step_2.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce') +step_2.add_output('produce') +pipeline_description.add_step(step_2) + +# # Step 3: test primitive +# primitive_3 = index.get_primitive('d3m.primitives.anomaly_detection.KNNPrimitive') +primitive_3 = index.get_primitive('d3m.primitives.tods.feature_analysis.statistical_abs_energy') +step_3 = PrimitiveStep(primitive=primitive_3) +step_3.add_hyperparameter(name='window_size', argument_type=ArgumentType.VALUE, data=4) +step_3.add_hyperparameter(name='use_semantic_types', argument_type=ArgumentType.VALUE, data=True) +step_3.add_hyperparameter(name='use_columns', argument_type=ArgumentType.VALUE, data=(8,9,10,11,12)) # There is sth wrong with multi-dimensional +step_3.add_hyperparameter(name='return_result', argument_type=ArgumentType.VALUE, data='append') +step_3.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.2.produce') +step_3.add_output('produce') +pipeline_description.add_step(step_3) + + + +# Final Output +pipeline_description.add_output(name='output', data_reference='steps.3.produce') + +# Output to YAML +yaml = pipeline_description.to_yaml() +with open('pipeline.yml', 'w') as f: + f.write(yaml) + + +# Or you can output json +#data = pipline_description.to_json() + diff --git a/new_tests/build_test_feature_analysis_statistical_abs_sum.py b/new_tests/build_test_feature_analysis_statistical_abs_sum.py new file mode 100644 index 0000000..91b3d42 --- /dev/null +++ b/new_tests/build_test_feature_analysis_statistical_abs_sum.py @@ -0,0 +1,62 @@ +from d3m import index +from d3m.metadata.base import ArgumentType +from d3m.metadata.pipeline import Pipeline, PrimitiveStep +from d3m.metadata import hyperparams + +# -> dataset_to_dataframe -> column_parser -> extract_columns_by_semantic_types(attributes) -> imputer -> random_forest +# extract_columns_by_semantic_types(targets) -> ^ + +# Creating pipeline +pipeline_description = Pipeline() +pipeline_description.add_input(name='inputs') + +# Step 0: dataset_to_dataframe +primitive_0 = index.get_primitive('d3m.primitives.tods.data_processing.dataset_to_dataframe') +step_0 = PrimitiveStep(primitive=primitive_0) +step_0.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='inputs.0') +step_0.add_output('produce') +pipeline_description.add_step(step_0) + +# # Step 1: column_parser +primitive_1 = index.get_primitive('d3m.primitives.tods.data_processing.column_parser') +step_1 = PrimitiveStep(primitive=primitive_1) +step_1.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce') +step_1.add_output('produce') +pipeline_description.add_step(step_1) + +# # Step 2: Standardization +primitive_2 = index.get_primitive('d3m.primitives.tods.timeseries_processing.transformation.standard_scaler') +step_2 = PrimitiveStep(primitive=primitive_2) +step_2.add_hyperparameter(name='use_semantic_types', argument_type=ArgumentType.VALUE, data=True) +step_2.add_hyperparameter(name='use_columns', argument_type=ArgumentType.VALUE, data=(2,3,4,5,6)) +step_2.add_hyperparameter(name='return_result', argument_type=ArgumentType.VALUE, data='append') +step_2.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce') +step_2.add_output('produce') +pipeline_description.add_step(step_2) + +# # Step 3: test primitive +# primitive_3 = index.get_primitive('d3m.primitives.anomaly_detection.KNNPrimitive') +primitive_3 = index.get_primitive('d3m.primitives.tods.feature_analysis.statistical_abs_sum') +step_3 = PrimitiveStep(primitive=primitive_3) +step_3.add_hyperparameter(name='window_size', argument_type=ArgumentType.VALUE, data=4) +step_3.add_hyperparameter(name='use_semantic_types', argument_type=ArgumentType.VALUE, data=True) +step_3.add_hyperparameter(name='use_columns', argument_type=ArgumentType.VALUE, data=(8,9,10,11,12)) # There is sth wrong with multi-dimensional +step_3.add_hyperparameter(name='return_result', argument_type=ArgumentType.VALUE, data='append') +step_3.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.2.produce') +step_3.add_output('produce') +pipeline_description.add_step(step_3) + + + +# Final Output +pipeline_description.add_output(name='output', data_reference='steps.3.produce') + +# Output to YAML +yaml = pipeline_description.to_yaml() +with open('pipeline.yml', 'w') as f: + f.write(yaml) + + +# Or you can output json +#data = pipline_description.to_json() + diff --git a/new_tests/build_test_feature_analysis_statistical_gmean.py b/new_tests/build_test_feature_analysis_statistical_gmean.py new file mode 100644 index 0000000..5d54b3c --- /dev/null +++ b/new_tests/build_test_feature_analysis_statistical_gmean.py @@ -0,0 +1,62 @@ +from d3m import index +from d3m.metadata.base import ArgumentType +from d3m.metadata.pipeline import Pipeline, PrimitiveStep +from d3m.metadata import hyperparams + +# -> dataset_to_dataframe -> column_parser -> extract_columns_by_semantic_types(attributes) -> imputer -> random_forest +# extract_columns_by_semantic_types(targets) -> ^ + +# Creating pipeline +pipeline_description = Pipeline() +pipeline_description.add_input(name='inputs') + +# Step 0: dataset_to_dataframe +primitive_0 = index.get_primitive('d3m.primitives.tods.data_processing.dataset_to_dataframe') +step_0 = PrimitiveStep(primitive=primitive_0) +step_0.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='inputs.0') +step_0.add_output('produce') +pipeline_description.add_step(step_0) + +# # Step 1: column_parser +primitive_1 = index.get_primitive('d3m.primitives.tods.data_processing.column_parser') +step_1 = PrimitiveStep(primitive=primitive_1) +step_1.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce') +step_1.add_output('produce') +pipeline_description.add_step(step_1) + +# # Step 2: Standardization +primitive_2 = index.get_primitive('d3m.primitives.tods.timeseries_processing.transformation.standard_scaler') +step_2 = PrimitiveStep(primitive=primitive_2) +step_2.add_hyperparameter(name='use_semantic_types', argument_type=ArgumentType.VALUE, data=True) +step_2.add_hyperparameter(name='use_columns', argument_type=ArgumentType.VALUE, data=(2,3,4,5,6)) +step_2.add_hyperparameter(name='return_result', argument_type=ArgumentType.VALUE, data='append') +step_2.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce') +step_2.add_output('produce') +pipeline_description.add_step(step_2) + +# # Step 3: test primitive +# primitive_3 = index.get_primitive('d3m.primitives.anomaly_detection.KNNPrimitive') +primitive_3 = index.get_primitive('d3m.primitives.tods.feature_analysis.statistical_g_mean') +step_3 = PrimitiveStep(primitive=primitive_3) +step_3.add_hyperparameter(name='window_size', argument_type=ArgumentType.VALUE, data=4) +step_3.add_hyperparameter(name='use_semantic_types', argument_type=ArgumentType.VALUE, data=True) +step_3.add_hyperparameter(name='use_columns', argument_type=ArgumentType.VALUE, data=(5,6)) # There is sth wrong with multi-dimensional +step_3.add_hyperparameter(name='return_result', argument_type=ArgumentType.VALUE, data='append') +step_3.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.2.produce') +step_3.add_output('produce') +pipeline_description.add_step(step_3) + + + +# Final Output +pipeline_description.add_output(name='output', data_reference='steps.3.produce') + +# Output to YAML +yaml = pipeline_description.to_yaml() +with open('pipeline.yml', 'w') as f: + f.write(yaml) + + +# Or you can output json +#data = pipline_description.to_json() + diff --git a/new_tests/build_test_feature_analysis_statistical_hmean.py b/new_tests/build_test_feature_analysis_statistical_hmean.py new file mode 100644 index 0000000..01f9dbb --- /dev/null +++ b/new_tests/build_test_feature_analysis_statistical_hmean.py @@ -0,0 +1,62 @@ +from d3m import index +from d3m.metadata.base import ArgumentType +from d3m.metadata.pipeline import Pipeline, PrimitiveStep +from d3m.metadata import hyperparams + +# -> dataset_to_dataframe -> column_parser -> extract_columns_by_semantic_types(attributes) -> imputer -> random_forest +# extract_columns_by_semantic_types(targets) -> ^ + +# Creating pipeline +pipeline_description = Pipeline() +pipeline_description.add_input(name='inputs') + +# Step 0: dataset_to_dataframe +primitive_0 = index.get_primitive('d3m.primitives.data_transformation.dataset_to_dataframe.Common') +step_0 = PrimitiveStep(primitive=primitive_0) +step_0.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='inputs.0') +step_0.add_output('produce') +pipeline_description.add_step(step_0) + +# # Step 1: column_parser +primitive_1 = index.get_primitive('d3m.primitives.tods.data_processing.column_parser') +step_1 = PrimitiveStep(primitive=primitive_1) +step_1.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce') +step_1.add_output('produce') +pipeline_description.add_step(step_1) + +# # Step 2: Standardization +primitive_2 = index.get_primitive('d3m.primitives.tods.timeseries_processing.transformation.standard_scaler') +step_2 = PrimitiveStep(primitive=primitive_2) +step_2.add_hyperparameter(name='use_semantic_types', argument_type=ArgumentType.VALUE, data=True) +step_2.add_hyperparameter(name='use_columns', argument_type=ArgumentType.VALUE, data=(2,3,4,5,6)) +step_2.add_hyperparameter(name='return_result', argument_type=ArgumentType.VALUE, data='append') +step_2.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce') +step_2.add_output('produce') +pipeline_description.add_step(step_2) + +# # Step 3: test primitive +# primitive_3 = index.get_primitive('d3m.primitives.anomaly_detection.KNNPrimitive') +primitive_3 = index.get_primitive('d3m.primitives.tods.feature_analysis.statistical_h_mean') +step_3 = PrimitiveStep(primitive=primitive_3) +step_3.add_hyperparameter(name='window_size', argument_type=ArgumentType.VALUE, data=4) +step_3.add_hyperparameter(name='use_semantic_types', argument_type=ArgumentType.VALUE, data=True) +step_3.add_hyperparameter(name='use_columns', argument_type=ArgumentType.VALUE, data=(5,6)) # There is sth wrong with multi-dimensional +step_3.add_hyperparameter(name='return_result', argument_type=ArgumentType.VALUE, data='append') +step_3.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.2.produce') +step_3.add_output('produce') +pipeline_description.add_step(step_3) + + + +# Final Output +pipeline_description.add_output(name='output', data_reference='steps.3.produce') + +# Output to YAML +yaml = pipeline_description.to_yaml() +with open('pipeline.yml', 'w') as f: + f.write(yaml) + + +# Or you can output json +#data = pipline_description.to_json() + diff --git a/new_tests/build_test_feature_analysis_statistical_kurtosis.py b/new_tests/build_test_feature_analysis_statistical_kurtosis.py new file mode 100644 index 0000000..3276152 --- /dev/null +++ b/new_tests/build_test_feature_analysis_statistical_kurtosis.py @@ -0,0 +1,62 @@ +from d3m import index +from d3m.metadata.base import ArgumentType +from d3m.metadata.pipeline import Pipeline, PrimitiveStep +from d3m.metadata import hyperparams + +# -> dataset_to_dataframe -> column_parser -> extract_columns_by_semantic_types(attributes) -> imputer -> random_forest +# extract_columns_by_semantic_types(targets) -> ^ + +# Creating pipeline +pipeline_description = Pipeline() +pipeline_description.add_input(name='inputs') + +# Step 0: dataset_to_dataframe +primitive_0 = index.get_primitive('d3m.primitives.data_transformation.dataset_to_dataframe.Common') +step_0 = PrimitiveStep(primitive=primitive_0) +step_0.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='inputs.0') +step_0.add_output('produce') +pipeline_description.add_step(step_0) + +# # Step 1: column_parser +primitive_1 = index.get_primitive('d3m.primitives.tods.data_processing.column_parser') +step_1 = PrimitiveStep(primitive=primitive_1) +step_1.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce') +step_1.add_output('produce') +pipeline_description.add_step(step_1) + +# # Step 2: Standardization +primitive_2 = index.get_primitive('d3m.primitives.tods.timeseries_processing.transformation.standard_scaler') +step_2 = PrimitiveStep(primitive=primitive_2) +step_2.add_hyperparameter(name='use_semantic_types', argument_type=ArgumentType.VALUE, data=True) +step_2.add_hyperparameter(name='use_columns', argument_type=ArgumentType.VALUE, data=(2,3,4,5,6)) +step_2.add_hyperparameter(name='return_result', argument_type=ArgumentType.VALUE, data='append') +step_2.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce') +step_2.add_output('produce') +pipeline_description.add_step(step_2) + +# # Step 3: test primitive +# primitive_3 = index.get_primitive('d3m.primitives.anomaly_detection.KNNPrimitive') +primitive_3 = index.get_primitive('d3m.primitives.tods.feature_analysis.statistical_kurtosis') +step_3 = PrimitiveStep(primitive=primitive_3) +step_3.add_hyperparameter(name='window_size', argument_type=ArgumentType.VALUE, data=4) +step_3.add_hyperparameter(name='use_semantic_types', argument_type=ArgumentType.VALUE, data=True) +step_3.add_hyperparameter(name='use_columns', argument_type=ArgumentType.VALUE, data=(5,6)) # There is sth wrong with multi-dimensional +step_3.add_hyperparameter(name='return_result', argument_type=ArgumentType.VALUE, data='append') +step_3.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.2.produce') +step_3.add_output('produce') +pipeline_description.add_step(step_3) + + + +# Final Output +pipeline_description.add_output(name='output', data_reference='steps.3.produce') + +# Output to YAML +yaml = pipeline_description.to_yaml() +with open('pipeline.yml', 'w') as f: + f.write(yaml) + + +# Or you can output json +#data = pipline_description.to_json() + diff --git a/new_tests/build_test_feature_analysis_statistical_maximum.py b/new_tests/build_test_feature_analysis_statistical_maximum.py new file mode 100644 index 0000000..900a5c1 --- /dev/null +++ b/new_tests/build_test_feature_analysis_statistical_maximum.py @@ -0,0 +1,62 @@ +from d3m import index +from d3m.metadata.base import ArgumentType +from d3m.metadata.pipeline import Pipeline, PrimitiveStep +from d3m.metadata import hyperparams + +# -> dataset_to_dataframe -> column_parser -> extract_columns_by_semantic_types(attributes) -> imputer -> random_forest +# extract_columns_by_semantic_types(targets) -> ^ + +# Creating pipeline +pipeline_description = Pipeline() +pipeline_description.add_input(name='inputs') + +# Step 0: dataset_to_dataframe +primitive_0 = index.get_primitive('d3m.primitives.tods.data_processing.dataset_to_dataframe') +step_0 = PrimitiveStep(primitive=primitive_0) +step_0.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='inputs.0') +step_0.add_output('produce') +pipeline_description.add_step(step_0) + +# # Step 1: column_parser +primitive_1 = index.get_primitive('d3m.primitives.tods.data_processing.column_parser') +step_1 = PrimitiveStep(primitive=primitive_1) +step_1.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce') +step_1.add_output('produce') +pipeline_description.add_step(step_1) + +# # Step 2: Standardization +primitive_2 = index.get_primitive('d3m.primitives.tods.timeseries_processing.transformation.standard_scaler') +step_2 = PrimitiveStep(primitive=primitive_2) +step_2.add_hyperparameter(name='use_semantic_types', argument_type=ArgumentType.VALUE, data=True) +step_2.add_hyperparameter(name='use_columns', argument_type=ArgumentType.VALUE, data=(2,3,4,5,6)) +step_2.add_hyperparameter(name='return_result', argument_type=ArgumentType.VALUE, data='append') +step_2.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce') +step_2.add_output('produce') +pipeline_description.add_step(step_2) + +# # Step 3: test primitive +# primitive_3 = index.get_primitive('d3m.primitives.anomaly_detection.KNNPrimitive') +primitive_3 = index.get_primitive('d3m.primitives.tods.feature_analysis.statistical_maximum') +step_3 = PrimitiveStep(primitive=primitive_3) +step_3.add_hyperparameter(name='window_size', argument_type=ArgumentType.VALUE, data=4) +step_3.add_hyperparameter(name='use_semantic_types', argument_type=ArgumentType.VALUE, data=True) +step_3.add_hyperparameter(name='use_columns', argument_type=ArgumentType.VALUE, data=(5,6)) # There is sth wrong with multi-dimensional +step_3.add_hyperparameter(name='return_result', argument_type=ArgumentType.VALUE, data='append') +step_3.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.2.produce') +step_3.add_output('produce') +pipeline_description.add_step(step_3) + + + +# Final Output +pipeline_description.add_output(name='output', data_reference='steps.3.produce') + +# Output to YAML +yaml = pipeline_description.to_yaml() +with open('pipeline.yml', 'w') as f: + f.write(yaml) + + +# Or you can output json +#data = pipline_description.to_json() + diff --git a/new_tests/build_test_feature_analysis_statistical_mean.py b/new_tests/build_test_feature_analysis_statistical_mean.py new file mode 100644 index 0000000..29c7bb0 --- /dev/null +++ b/new_tests/build_test_feature_analysis_statistical_mean.py @@ -0,0 +1,62 @@ +from d3m import index +from d3m.metadata.base import ArgumentType +from d3m.metadata.pipeline import Pipeline, PrimitiveStep +from d3m.metadata import hyperparams + +# -> dataset_to_dataframe -> column_parser -> extract_columns_by_semantic_types(attributes) -> imputer -> random_forest +# extract_columns_by_semantic_types(targets) -> ^ + +# Creating pipeline +pipeline_description = Pipeline() +pipeline_description.add_input(name='inputs') + +# Step 0: dataset_to_dataframe +primitive_0 = index.get_primitive('d3m.primitives.tods.data_processing.dataset_to_dataframe') +step_0 = PrimitiveStep(primitive=primitive_0) +step_0.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='inputs.0') +step_0.add_output('produce') +pipeline_description.add_step(step_0) + +# # Step 1: column_parser +primitive_1 = index.get_primitive('d3m.primitives.tods.data_processing.column_parser') +step_1 = PrimitiveStep(primitive=primitive_1) +step_1.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce') +step_1.add_output('produce') +pipeline_description.add_step(step_1) + +# # Step 2: Standardization +primitive_2 = index.get_primitive('d3m.primitives.tods.timeseries_processing.transformation.standard_scaler') +step_2 = PrimitiveStep(primitive=primitive_2) +step_2.add_hyperparameter(name='use_semantic_types', argument_type=ArgumentType.VALUE, data=True) +step_2.add_hyperparameter(name='use_columns', argument_type=ArgumentType.VALUE, data=(2,3,4,5,6)) +step_2.add_hyperparameter(name='return_result', argument_type=ArgumentType.VALUE, data='append') +step_2.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce') +step_2.add_output('produce') +pipeline_description.add_step(step_2) + +# # Step 3: test primitive +# primitive_3 = index.get_primitive('d3m.primitives.anomaly_detection.KNNPrimitive') +primitive_3 = index.get_primitive('d3m.primitives.tods.feature_analysis.statistical_mean') +step_3 = PrimitiveStep(primitive=primitive_3) +step_3.add_hyperparameter(name='window_size', argument_type=ArgumentType.VALUE, data=4) +step_3.add_hyperparameter(name='use_semantic_types', argument_type=ArgumentType.VALUE, data=True) +step_3.add_hyperparameter(name='use_columns', argument_type=ArgumentType.VALUE, data=(5,6)) # There is sth wrong with multi-dimensional +step_3.add_hyperparameter(name='return_result', argument_type=ArgumentType.VALUE, data='append') +step_3.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.2.produce') +step_3.add_output('produce') +pipeline_description.add_step(step_3) + + + +# Final Output +pipeline_description.add_output(name='output', data_reference='steps.3.produce') + +# Output to YAML +yaml = pipeline_description.to_yaml() +with open('pipeline.yml', 'w') as f: + f.write(yaml) + + +# Or you can output json +#data = pipline_description.to_json() + diff --git a/new_tests/build_test_feature_analysis_statistical_mean_abs.py b/new_tests/build_test_feature_analysis_statistical_mean_abs.py new file mode 100644 index 0000000..6be3c45 --- /dev/null +++ b/new_tests/build_test_feature_analysis_statistical_mean_abs.py @@ -0,0 +1,62 @@ +from d3m import index +from d3m.metadata.base import ArgumentType +from d3m.metadata.pipeline import Pipeline, PrimitiveStep +from d3m.metadata import hyperparams + +# -> dataset_to_dataframe -> column_parser -> extract_columns_by_semantic_types(attributes) -> imputer -> random_forest +# extract_columns_by_semantic_types(targets) -> ^ + +# Creating pipeline +pipeline_description = Pipeline() +pipeline_description.add_input(name='inputs') + +# Step 0: dataset_to_dataframe +primitive_0 = index.get_primitive('d3m.primitives.tods.data_processing.dataset_to_dataframe') +step_0 = PrimitiveStep(primitive=primitive_0) +step_0.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='inputs.0') +step_0.add_output('produce') +pipeline_description.add_step(step_0) + +# # Step 1: column_parser +primitive_1 = index.get_primitive('d3m.primitives.tods.data_processing.column_parser') +step_1 = PrimitiveStep(primitive=primitive_1) +step_1.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce') +step_1.add_output('produce') +pipeline_description.add_step(step_1) + +# # Step 2: Standardization +primitive_2 = index.get_primitive('d3m.primitives.tods.timeseries_processing.transformation.standard_scaler') +step_2 = PrimitiveStep(primitive=primitive_2) +step_2.add_hyperparameter(name='use_semantic_types', argument_type=ArgumentType.VALUE, data=True) +step_2.add_hyperparameter(name='use_columns', argument_type=ArgumentType.VALUE, data=(2,3,4,5,6)) +step_2.add_hyperparameter(name='return_result', argument_type=ArgumentType.VALUE, data='append') +step_2.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce') +step_2.add_output('produce') +pipeline_description.add_step(step_2) + +# # Step 3: test primitive +# primitive_3 = index.get_primitive('d3m.primitives.anomaly_detection.KNNPrimitive') +primitive_3 = index.get_primitive('d3m.primitives.tods.feature_analysis.statistical_mean_abs') +step_3 = PrimitiveStep(primitive=primitive_3) +step_3.add_hyperparameter(name='window_size', argument_type=ArgumentType.VALUE, data=4) +step_3.add_hyperparameter(name='use_semantic_types', argument_type=ArgumentType.VALUE, data=True) +step_3.add_hyperparameter(name='use_columns', argument_type=ArgumentType.VALUE, data=(5,6)) # There is sth wrong with multi-dimensional +step_3.add_hyperparameter(name='return_result', argument_type=ArgumentType.VALUE, data='append') +step_3.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.2.produce') +step_3.add_output('produce') +pipeline_description.add_step(step_3) + + + +# Final Output +pipeline_description.add_output(name='output', data_reference='steps.3.produce') + +# Output to YAML +yaml = pipeline_description.to_yaml() +with open('pipeline.yml', 'w') as f: + f.write(yaml) + + +# Or you can output json +#data = pipline_description.to_json() + diff --git a/new_tests/build_test_feature_analysis_statistical_mean_abs_temporal_derivative.py b/new_tests/build_test_feature_analysis_statistical_mean_abs_temporal_derivative.py new file mode 100644 index 0000000..15c12aa --- /dev/null +++ b/new_tests/build_test_feature_analysis_statistical_mean_abs_temporal_derivative.py @@ -0,0 +1,62 @@ +from d3m import index +from d3m.metadata.base import ArgumentType +from d3m.metadata.pipeline import Pipeline, PrimitiveStep +from d3m.metadata import hyperparams + +# -> dataset_to_dataframe -> column_parser -> extract_columns_by_semantic_types(attributes) -> imputer -> random_forest +# extract_columns_by_semantic_types(targets) -> ^ + +# Creating pipeline +pipeline_description = Pipeline() +pipeline_description.add_input(name='inputs') + +# Step 0: dataset_to_dataframe +primitive_0 = index.get_primitive('d3m.primitives.tods.data_processing.dataset_to_dataframe') +step_0 = PrimitiveStep(primitive=primitive_0) +step_0.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='inputs.0') +step_0.add_output('produce') +pipeline_description.add_step(step_0) + +# # Step 1: column_parser +primitive_1 = index.get_primitive('d3m.primitives.tods.data_processing.column_parser') +step_1 = PrimitiveStep(primitive=primitive_1) +step_1.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce') +step_1.add_output('produce') +pipeline_description.add_step(step_1) + +# # Step 2: Standardization +primitive_2 = index.get_primitive('d3m.primitives.tods.timeseries_processing.transformation.standard_scaler') +step_2 = PrimitiveStep(primitive=primitive_2) +step_2.add_hyperparameter(name='use_semantic_types', argument_type=ArgumentType.VALUE, data=True) +step_2.add_hyperparameter(name='use_columns', argument_type=ArgumentType.VALUE, data=(2,3,4,5,6)) +step_2.add_hyperparameter(name='return_result', argument_type=ArgumentType.VALUE, data='append') +step_2.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce') +step_2.add_output('produce') +pipeline_description.add_step(step_2) + +# # Step 3: test primitive +# primitive_3 = index.get_primitive('d3m.primitives.anomaly_detection.KNNPrimitive') +primitive_3 = index.get_primitive('d3m.primitives.tods.feature_analysis.statistical_mean_abs_temporal_derivative') +step_3 = PrimitiveStep(primitive=primitive_3) +step_3.add_hyperparameter(name='window_size', argument_type=ArgumentType.VALUE, data=4) +step_3.add_hyperparameter(name='use_semantic_types', argument_type=ArgumentType.VALUE, data=True) +step_3.add_hyperparameter(name='use_columns', argument_type=ArgumentType.VALUE, data=(5,6)) # There is sth wrong with multi-dimensional +step_3.add_hyperparameter(name='return_result', argument_type=ArgumentType.VALUE, data='append') +step_3.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.2.produce') +step_3.add_output('produce') +pipeline_description.add_step(step_3) + + + +# Final Output +pipeline_description.add_output(name='output', data_reference='steps.3.produce') + +# Output to YAML +yaml = pipeline_description.to_yaml() +with open('pipeline.yml', 'w') as f: + f.write(yaml) + + +# Or you can output json +#data = pipline_description.to_json() + diff --git a/new_tests/build_test_feature_analysis_statistical_mean_temporal_derivative.py b/new_tests/build_test_feature_analysis_statistical_mean_temporal_derivative.py new file mode 100644 index 0000000..d63dddb --- /dev/null +++ b/new_tests/build_test_feature_analysis_statistical_mean_temporal_derivative.py @@ -0,0 +1,62 @@ +from d3m import index +from d3m.metadata.base import ArgumentType +from d3m.metadata.pipeline import Pipeline, PrimitiveStep +from d3m.metadata import hyperparams + +# -> dataset_to_dataframe -> column_parser -> extract_columns_by_semantic_types(attributes) -> imputer -> random_forest +# extract_columns_by_semantic_types(targets) -> ^ + +# Creating pipeline +pipeline_description = Pipeline() +pipeline_description.add_input(name='inputs') + +# Step 0: dataset_to_dataframe +primitive_0 = index.get_primitive('d3m.primitives.tods.data_processing.dataset_to_dataframe') +step_0 = PrimitiveStep(primitive=primitive_0) +step_0.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='inputs.0') +step_0.add_output('produce') +pipeline_description.add_step(step_0) + +# # Step 1: column_parser +primitive_1 = index.get_primitive('d3m.primitives.tods.data_processing.column_parser') +step_1 = PrimitiveStep(primitive=primitive_1) +step_1.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce') +step_1.add_output('produce') +pipeline_description.add_step(step_1) + +# # Step 2: Standardization +primitive_2 = index.get_primitive('d3m.primitives.tods.timeseries_processing.transformation.standard_scaler') +step_2 = PrimitiveStep(primitive=primitive_2) +step_2.add_hyperparameter(name='use_semantic_types', argument_type=ArgumentType.VALUE, data=True) +step_2.add_hyperparameter(name='use_columns', argument_type=ArgumentType.VALUE, data=(2,3,4,5,6)) +step_2.add_hyperparameter(name='return_result', argument_type=ArgumentType.VALUE, data='append') +step_2.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce') +step_2.add_output('produce') +pipeline_description.add_step(step_2) + +# # Step 3: test primitive +# primitive_3 = index.get_primitive('d3m.primitives.anomaly_detection.KNNPrimitive') +primitive_3 = index.get_primitive('d3m.primitives.tods.feature_analysis.statistical_mean_temporal_derivative') +step_3 = PrimitiveStep(primitive=primitive_3) +step_3.add_hyperparameter(name='window_size', argument_type=ArgumentType.VALUE, data=4) +step_3.add_hyperparameter(name='use_semantic_types', argument_type=ArgumentType.VALUE, data=True) +step_3.add_hyperparameter(name='use_columns', argument_type=ArgumentType.VALUE, data=(5,6)) # There is sth wrong with multi-dimensional +step_3.add_hyperparameter(name='return_result', argument_type=ArgumentType.VALUE, data='append') +step_3.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.2.produce') +step_3.add_output('produce') +pipeline_description.add_step(step_3) + + + +# Final Output +pipeline_description.add_output(name='output', data_reference='steps.3.produce') + +# Output to YAML +yaml = pipeline_description.to_yaml() +with open('pipeline.yml', 'w') as f: + f.write(yaml) + + +# Or you can output json +#data = pipline_description.to_json() + diff --git a/new_tests/build_test_feature_analysis_statistical_median.py b/new_tests/build_test_feature_analysis_statistical_median.py new file mode 100644 index 0000000..cefe002 --- /dev/null +++ b/new_tests/build_test_feature_analysis_statistical_median.py @@ -0,0 +1,62 @@ +from d3m import index +from d3m.metadata.base import ArgumentType +from d3m.metadata.pipeline import Pipeline, PrimitiveStep +from d3m.metadata import hyperparams + +# -> dataset_to_dataframe -> column_parser -> extract_columns_by_semantic_types(attributes) -> imputer -> random_forest +# extract_columns_by_semantic_types(targets) -> ^ + +# Creating pipeline +pipeline_description = Pipeline() +pipeline_description.add_input(name='inputs') + +# Step 0: dataset_to_dataframe +primitive_0 = index.get_primitive('d3m.primitives.tods.data_processing.dataset_to_dataframe') +step_0 = PrimitiveStep(primitive=primitive_0) +step_0.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='inputs.0') +step_0.add_output('produce') +pipeline_description.add_step(step_0) + +# # Step 1: column_parser +primitive_1 = index.get_primitive('d3m.primitives.tods.data_processing.column_parser') +step_1 = PrimitiveStep(primitive=primitive_1) +step_1.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce') +step_1.add_output('produce') +pipeline_description.add_step(step_1) + +# # Step 2: Standardization +primitive_2 = index.get_primitive('d3m.primitives.tods.timeseries_processing.transformation.standard_scaler') +step_2 = PrimitiveStep(primitive=primitive_2) +step_2.add_hyperparameter(name='use_semantic_types', argument_type=ArgumentType.VALUE, data=True) +step_2.add_hyperparameter(name='use_columns', argument_type=ArgumentType.VALUE, data=(2,3,4,5,6)) +step_2.add_hyperparameter(name='return_result', argument_type=ArgumentType.VALUE, data='append') +step_2.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce') +step_2.add_output('produce') +pipeline_description.add_step(step_2) + +# # Step 3: test primitive +# primitive_3 = index.get_primitive('d3m.primitives.anomaly_detection.KNNPrimitive') +primitive_3 = index.get_primitive('d3m.primitives.tods.feature_analysis.statistical_median') +step_3 = PrimitiveStep(primitive=primitive_3) +step_3.add_hyperparameter(name='window_size', argument_type=ArgumentType.VALUE, data=4) +step_3.add_hyperparameter(name='use_semantic_types', argument_type=ArgumentType.VALUE, data=True) +step_3.add_hyperparameter(name='use_columns', argument_type=ArgumentType.VALUE, data=(5,6)) # There is sth wrong with multi-dimensional +step_3.add_hyperparameter(name='return_result', argument_type=ArgumentType.VALUE, data='append') +step_3.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.2.produce') +step_3.add_output('produce') +pipeline_description.add_step(step_3) + + + +# Final Output +pipeline_description.add_output(name='output', data_reference='steps.3.produce') + +# Output to YAML +yaml = pipeline_description.to_yaml() +with open('pipeline.yml', 'w') as f: + f.write(yaml) + + +# Or you can output json +#data = pipline_description.to_json() + diff --git a/new_tests/build_test_feature_analysis_statistical_median_absolute_deviation.py b/new_tests/build_test_feature_analysis_statistical_median_absolute_deviation.py new file mode 100644 index 0000000..499a877 --- /dev/null +++ b/new_tests/build_test_feature_analysis_statistical_median_absolute_deviation.py @@ -0,0 +1,63 @@ +from d3m import index +from d3m.metadata.base import ArgumentType +from d3m.metadata.pipeline import Pipeline, PrimitiveStep +from d3m.metadata import hyperparams + +# -> dataset_to_dataframe -> column_parser -> extract_columns_by_semantic_types(attributes) -> imputer -> random_forest +# extract_columns_by_semantic_types(targets) -> ^ + +# Creating pipeline +pipeline_description = Pipeline() +pipeline_description.add_input(name='inputs') + +# Step 0: dataset_to_dataframe +primitive_0 = index.get_primitive('d3m.primitives.tods.data_processing.dataset_to_dataframe') +step_0 = PrimitiveStep(primitive=primitive_0) +step_0.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='inputs.0') +step_0.add_output('produce') +pipeline_description.add_step(step_0) + +# # Step 1: column_parser +primitive_1 = index.get_primitive('d3m.primitives.tods.data_processing.column_parser') +step_1 = PrimitiveStep(primitive=primitive_1) +step_1.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce') +step_1.add_output('produce') +pipeline_description.add_step(step_1) + +# # Step 2: Standardization +primitive_2 = index.get_primitive('d3m.primitives.tods.timeseries_processing.transformation.standard_scaler') +step_2 = PrimitiveStep(primitive=primitive_2) +step_2.add_hyperparameter(name='use_semantic_types', argument_type=ArgumentType.VALUE, data=True) +step_2.add_hyperparameter(name='use_columns', argument_type=ArgumentType.VALUE, data=(2,3,4,5,6)) +step_2.add_hyperparameter(name='return_result', argument_type=ArgumentType.VALUE, data='append') +step_2.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce') +step_2.add_output('produce') +pipeline_description.add_step(step_2) + + +# # Step 3: test primitive +# primitive_3 = index.get_primitive('d3m.primitives.anomaly_detection.KNNPrimitive') +primitive_3 = index.get_primitive('d3m.primitives.tods.feature_analysis.statistical_median_abs_deviation') +step_3 = PrimitiveStep(primitive=primitive_3) +step_3.add_hyperparameter(name='window_size', argument_type=ArgumentType.VALUE, data=4) +step_3.add_hyperparameter(name='use_semantic_types', argument_type=ArgumentType.VALUE, data=True) +step_3.add_hyperparameter(name='use_columns', argument_type=ArgumentType.VALUE, data=(5,6)) # There is sth wrong with multi-dimensional +step_3.add_hyperparameter(name='return_result', argument_type=ArgumentType.VALUE, data='append') +step_3.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.2.produce') +step_3.add_output('produce') +pipeline_description.add_step(step_3) + + + +# Final Output +pipeline_description.add_output(name='output', data_reference='steps.3.produce') + +# Output to YAML +yaml = pipeline_description.to_yaml() +with open('pipeline.yml', 'w') as f: + f.write(yaml) + + +# Or you can output json +#data = pipline_description.to_json() + diff --git a/new_tests/build_test_feature_analysis_statistical_minimum.py b/new_tests/build_test_feature_analysis_statistical_minimum.py new file mode 100644 index 0000000..01c918d --- /dev/null +++ b/new_tests/build_test_feature_analysis_statistical_minimum.py @@ -0,0 +1,62 @@ +from d3m import index +from d3m.metadata.base import ArgumentType +from d3m.metadata.pipeline import Pipeline, PrimitiveStep +from d3m.metadata import hyperparams + +# -> dataset_to_dataframe -> column_parser -> extract_columns_by_semantic_types(attributes) -> imputer -> random_forest +# extract_columns_by_semantic_types(targets) -> ^ + +# Creating pipeline +pipeline_description = Pipeline() +pipeline_description.add_input(name='inputs') + +# Step 0: dataset_to_dataframe +primitive_0 = index.get_primitive('d3m.primitives.tods.data_processing.dataset_to_dataframe') +step_0 = PrimitiveStep(primitive=primitive_0) +step_0.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='inputs.0') +step_0.add_output('produce') +pipeline_description.add_step(step_0) + +# # Step 1: column_parser +primitive_1 = index.get_primitive('d3m.primitives.tods.data_processing.column_parser') +step_1 = PrimitiveStep(primitive=primitive_1) +step_1.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce') +step_1.add_output('produce') +pipeline_description.add_step(step_1) + +# # Step 2: Standardization +primitive_2 = index.get_primitive('d3m.primitives.tods.timeseries_processing.transformation.standard_scaler') +step_2 = PrimitiveStep(primitive=primitive_2) +step_2.add_hyperparameter(name='use_semantic_types', argument_type=ArgumentType.VALUE, data=True) +step_2.add_hyperparameter(name='use_columns', argument_type=ArgumentType.VALUE, data=(2,3,4,5,6)) +step_2.add_hyperparameter(name='return_result', argument_type=ArgumentType.VALUE, data='append') +step_2.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce') +step_2.add_output('produce') +pipeline_description.add_step(step_2) + +# # Step 3: test primitive +# primitive_3 = index.get_primitive('d3m.primitives.anomaly_detection.KNNPrimitive') +primitive_3 = index.get_primitive('d3m.primitives.tods.feature_analysis.statistical_minimum') +step_3 = PrimitiveStep(primitive=primitive_3) +step_3.add_hyperparameter(name='window_size', argument_type=ArgumentType.VALUE, data=4) +step_3.add_hyperparameter(name='use_semantic_types', argument_type=ArgumentType.VALUE, data=True) +step_3.add_hyperparameter(name='use_columns', argument_type=ArgumentType.VALUE, data=(5,6)) # There is sth wrong with multi-dimensional +step_3.add_hyperparameter(name='return_result', argument_type=ArgumentType.VALUE, data='append') +step_3.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.2.produce') +step_3.add_output('produce') +pipeline_description.add_step(step_3) + + + +# Final Output +pipeline_description.add_output(name='output', data_reference='steps.3.produce') + +# Output to YAML +yaml = pipeline_description.to_yaml() +with open('pipeline.yml', 'w') as f: + f.write(yaml) + + +# Or you can output json +#data = pipline_description.to_json() + diff --git a/new_tests/build_test_feature_analysis_statistical_skew.py b/new_tests/build_test_feature_analysis_statistical_skew.py new file mode 100644 index 0000000..7ca113c --- /dev/null +++ b/new_tests/build_test_feature_analysis_statistical_skew.py @@ -0,0 +1,62 @@ +from d3m import index +from d3m.metadata.base import ArgumentType +from d3m.metadata.pipeline import Pipeline, PrimitiveStep +from d3m.metadata import hyperparams + +# -> dataset_to_dataframe -> column_parser -> extract_columns_by_semantic_types(attributes) -> imputer -> random_forest +# extract_columns_by_semantic_types(targets) -> ^ + +# Creating pipeline +pipeline_description = Pipeline() +pipeline_description.add_input(name='inputs') + +# Step 0: dataset_to_dataframe +primitive_0 = index.get_primitive('d3m.primitives.tods.data_processing.dataset_to_dataframe') +step_0 = PrimitiveStep(primitive=primitive_0) +step_0.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='inputs.0') +step_0.add_output('produce') +pipeline_description.add_step(step_0) + +# # Step 1: column_parser +primitive_1 = index.get_primitive('d3m.primitives.tods.data_processing.column_parser') +step_1 = PrimitiveStep(primitive=primitive_1) +step_1.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce') +step_1.add_output('produce') +pipeline_description.add_step(step_1) + +# # Step 2: Standardization +primitive_2 = index.get_primitive('d3m.primitives.tods.timeseries_processing.transformation.standard_scaler') +step_2 = PrimitiveStep(primitive=primitive_2) +step_2.add_hyperparameter(name='use_semantic_types', argument_type=ArgumentType.VALUE, data=True) +step_2.add_hyperparameter(name='use_columns', argument_type=ArgumentType.VALUE, data=(2,3,4,5,6)) +step_2.add_hyperparameter(name='return_result', argument_type=ArgumentType.VALUE, data='append') +step_2.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce') +step_2.add_output('produce') +pipeline_description.add_step(step_2) + +# # Step 3: test primitive +# primitive_3 = index.get_primitive('d3m.primitives.anomaly_detection.KNNPrimitive') +primitive_3 = index.get_primitive('d3m.primitives.tods.feature_analysis.statistical_skew') +step_3 = PrimitiveStep(primitive=primitive_3) +step_3.add_hyperparameter(name='window_size', argument_type=ArgumentType.VALUE, data=4) +step_3.add_hyperparameter(name='use_semantic_types', argument_type=ArgumentType.VALUE, data=True) +step_3.add_hyperparameter(name='use_columns', argument_type=ArgumentType.VALUE, data=(5,6)) # There is sth wrong with multi-dimensional +step_3.add_hyperparameter(name='return_result', argument_type=ArgumentType.VALUE, data='append') +step_3.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.2.produce') +step_3.add_output('produce') +pipeline_description.add_step(step_3) + + + +# Final Output +pipeline_description.add_output(name='output', data_reference='steps.3.produce') + +# Output to YAML +yaml = pipeline_description.to_yaml() +with open('pipeline.yml', 'w') as f: + f.write(yaml) + + +# Or you can output json +#data = pipline_description.to_json() + diff --git a/new_tests/build_test_feature_analysis_statistical_std.py b/new_tests/build_test_feature_analysis_statistical_std.py new file mode 100644 index 0000000..66d3180 --- /dev/null +++ b/new_tests/build_test_feature_analysis_statistical_std.py @@ -0,0 +1,62 @@ +from d3m import index +from d3m.metadata.base import ArgumentType +from d3m.metadata.pipeline import Pipeline, PrimitiveStep +from d3m.metadata import hyperparams + +# -> dataset_to_dataframe -> column_parser -> extract_columns_by_semantic_types(attributes) -> imputer -> random_forest +# extract_columns_by_semantic_types(targets) -> ^ + +# Creating pipeline +pipeline_description = Pipeline() +pipeline_description.add_input(name='inputs') + +# Step 0: dataset_to_dataframe +primitive_0 = index.get_primitive('d3m.primitives.tods.data_processing.dataset_to_dataframe') +step_0 = PrimitiveStep(primitive=primitive_0) +step_0.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='inputs.0') +step_0.add_output('produce') +pipeline_description.add_step(step_0) + +# # Step 1: column_parser +primitive_1 = index.get_primitive('d3m.primitives.tods.data_processing.column_parser') +step_1 = PrimitiveStep(primitive=primitive_1) +step_1.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce') +step_1.add_output('produce') +pipeline_description.add_step(step_1) + +# # Step 2: Standardization +primitive_2 = index.get_primitive('d3m.primitives.tods.timeseries_processing.transformation.standard_scaler') +step_2 = PrimitiveStep(primitive=primitive_2) +step_2.add_hyperparameter(name='use_semantic_types', argument_type=ArgumentType.VALUE, data=True) +step_2.add_hyperparameter(name='use_columns', argument_type=ArgumentType.VALUE, data=(2,3,4,5,6)) +step_2.add_hyperparameter(name='return_result', argument_type=ArgumentType.VALUE, data='append') +step_2.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce') +step_2.add_output('produce') +pipeline_description.add_step(step_2) + +# # Step 3: test primitive +# primitive_3 = index.get_primitive('d3m.primitives.anomaly_detection.KNNPrimitive') +primitive_3 = index.get_primitive('d3m.primitives.tods.feature_analysis.statistical_std') +step_3 = PrimitiveStep(primitive=primitive_3) +step_3.add_hyperparameter(name='window_size', argument_type=ArgumentType.VALUE, data=4) +step_3.add_hyperparameter(name='use_semantic_types', argument_type=ArgumentType.VALUE, data=True) +step_3.add_hyperparameter(name='use_columns', argument_type=ArgumentType.VALUE, data=(5,6)) # There is sth wrong with multi-dimensional +step_3.add_hyperparameter(name='return_result', argument_type=ArgumentType.VALUE, data='append') +step_3.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.2.produce') +step_3.add_output('produce') +pipeline_description.add_step(step_3) + + + +# Final Output +pipeline_description.add_output(name='output', data_reference='steps.3.produce') + +# Output to YAML +yaml = pipeline_description.to_yaml() +with open('pipeline.yml', 'w') as f: + f.write(yaml) + + +# Or you can output json +#data = pipline_description.to_json() + diff --git a/new_tests/build_test_feature_analysis_statistical_var.py b/new_tests/build_test_feature_analysis_statistical_var.py new file mode 100644 index 0000000..bd13e96 --- /dev/null +++ b/new_tests/build_test_feature_analysis_statistical_var.py @@ -0,0 +1,62 @@ +from d3m import index +from d3m.metadata.base import ArgumentType +from d3m.metadata.pipeline import Pipeline, PrimitiveStep +from d3m.metadata import hyperparams + +# -> dataset_to_dataframe -> column_parser -> extract_columns_by_semantic_types(attributes) -> imputer -> random_forest +# extract_columns_by_semantic_types(targets) -> ^ + +# Creating pipeline +pipeline_description = Pipeline() +pipeline_description.add_input(name='inputs') + +# Step 0: dataset_to_dataframe +primitive_0 = index.get_primitive('d3m.primitives.tods.data_processing.dataset_to_dataframe') +step_0 = PrimitiveStep(primitive=primitive_0) +step_0.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='inputs.0') +step_0.add_output('produce') +pipeline_description.add_step(step_0) + +# # Step 1: column_parser +primitive_1 = index.get_primitive('d3m.primitives.tods.data_processing.column_parser') +step_1 = PrimitiveStep(primitive=primitive_1) +step_1.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce') +step_1.add_output('produce') +pipeline_description.add_step(step_1) + +# # Step 2: Standardization +primitive_2 = index.get_primitive('d3m.primitives.tods.timeseries_processing.transformation.standard_scaler') +step_2 = PrimitiveStep(primitive=primitive_2) +step_2.add_hyperparameter(name='use_semantic_types', argument_type=ArgumentType.VALUE, data=True) +step_2.add_hyperparameter(name='use_columns', argument_type=ArgumentType.VALUE, data=(2,3,4,5,6)) +step_2.add_hyperparameter(name='return_result', argument_type=ArgumentType.VALUE, data='append') +step_2.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce') +step_2.add_output('produce') +pipeline_description.add_step(step_2) + +# # Step 3: test primitive +# primitive_3 = index.get_primitive('d3m.primitives.anomaly_detection.KNNPrimitive') +primitive_3 = index.get_primitive('d3m.primitives.tods.feature_analysis.statistical_var') +step_3 = PrimitiveStep(primitive=primitive_3) +step_3.add_hyperparameter(name='window_size', argument_type=ArgumentType.VALUE, data=4) +step_3.add_hyperparameter(name='use_semantic_types', argument_type=ArgumentType.VALUE, data=True) +step_3.add_hyperparameter(name='use_columns', argument_type=ArgumentType.VALUE, data=(5,6)) # There is sth wrong with multi-dimensional +step_3.add_hyperparameter(name='return_result', argument_type=ArgumentType.VALUE, data='append') +step_3.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.2.produce') +step_3.add_output('produce') +pipeline_description.add_step(step_3) + + + +# Final Output +pipeline_description.add_output(name='output', data_reference='steps.3.produce') + +# Output to YAML +yaml = pipeline_description.to_yaml() +with open('pipeline.yml', 'w') as f: + f.write(yaml) + + +# Or you can output json +#data = pipline_description.to_json() + diff --git a/new_tests/build_test_feature_analysis_statistical_variation.py b/new_tests/build_test_feature_analysis_statistical_variation.py new file mode 100644 index 0000000..5292e03 --- /dev/null +++ b/new_tests/build_test_feature_analysis_statistical_variation.py @@ -0,0 +1,62 @@ +from d3m import index +from d3m.metadata.base import ArgumentType +from d3m.metadata.pipeline import Pipeline, PrimitiveStep +from d3m.metadata import hyperparams + +# -> dataset_to_dataframe -> column_parser -> extract_columns_by_semantic_types(attributes) -> imputer -> random_forest +# extract_columns_by_semantic_types(targets) -> ^ + +# Creating pipeline +pipeline_description = Pipeline() +pipeline_description.add_input(name='inputs') + +# Step 0: dataset_to_dataframe +primitive_0 = index.get_primitive('d3m.primitives.tods.data_processing.dataset_to_dataframe') +step_0 = PrimitiveStep(primitive=primitive_0) +step_0.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='inputs.0') +step_0.add_output('produce') +pipeline_description.add_step(step_0) + +# # Step 1: column_parser +primitive_1 = index.get_primitive('d3m.primitives.tods.data_processing.column_parser') +step_1 = PrimitiveStep(primitive=primitive_1) +step_1.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce') +step_1.add_output('produce') +pipeline_description.add_step(step_1) + +# # Step 2: Standardization +primitive_2 = index.get_primitive('d3m.primitives.tods.timeseries_processing.transformation.standard_scaler') +step_2 = PrimitiveStep(primitive=primitive_2) +step_2.add_hyperparameter(name='use_semantic_types', argument_type=ArgumentType.VALUE, data=True) +step_2.add_hyperparameter(name='use_columns', argument_type=ArgumentType.VALUE, data=(2,3,4,5,6)) +step_2.add_hyperparameter(name='return_result', argument_type=ArgumentType.VALUE, data='append') +step_2.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce') +step_2.add_output('produce') +pipeline_description.add_step(step_2) + +# # Step 3: test primitive +# primitive_3 = index.get_primitive('d3m.primitives.anomaly_detection.KNNPrimitive') +primitive_3 = index.get_primitive('d3m.primitives.tods.feature_analysis.statistical_variation') +step_3 = PrimitiveStep(primitive=primitive_3) +step_3.add_hyperparameter(name='window_size', argument_type=ArgumentType.VALUE, data=4) +step_3.add_hyperparameter(name='use_semantic_types', argument_type=ArgumentType.VALUE, data=True) +step_3.add_hyperparameter(name='use_columns', argument_type=ArgumentType.VALUE, data=(5,6)) # There is sth wrong with multi-dimensional +step_3.add_hyperparameter(name='return_result', argument_type=ArgumentType.VALUE, data='append') +step_3.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.2.produce') +step_3.add_output('produce') +pipeline_description.add_step(step_3) + + + +# Final Output +pipeline_description.add_output(name='output', data_reference='steps.3.produce') + +# Output to YAML +yaml = pipeline_description.to_yaml() +with open('pipeline.yml', 'w') as f: + f.write(yaml) + + +# Or you can output json +#data = pipline_description.to_json() + diff --git a/new_tests/build_test_feature_analysis_statistical_vec_sum.py b/new_tests/build_test_feature_analysis_statistical_vec_sum.py new file mode 100644 index 0000000..fa8f99b --- /dev/null +++ b/new_tests/build_test_feature_analysis_statistical_vec_sum.py @@ -0,0 +1,62 @@ +from d3m import index +from d3m.metadata.base import ArgumentType +from d3m.metadata.pipeline import Pipeline, PrimitiveStep +from d3m.metadata import hyperparams + +# -> dataset_to_dataframe -> column_parser -> extract_columns_by_semantic_types(attributes) -> imputer -> random_forest +# extract_columns_by_semantic_types(targets) -> ^ + +# Creating pipeline +pipeline_description = Pipeline() +pipeline_description.add_input(name='inputs') + +# Step 0: dataset_to_dataframe +primitive_0 = index.get_primitive('d3m.primitives.tods.data_processing.dataset_to_dataframe') +step_0 = PrimitiveStep(primitive=primitive_0) +step_0.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='inputs.0') +step_0.add_output('produce') +pipeline_description.add_step(step_0) + +# # Step 1: column_parser +primitive_1 = index.get_primitive('d3m.primitives.tods.data_processing.column_parser') +step_1 = PrimitiveStep(primitive=primitive_1) +step_1.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce') +step_1.add_output('produce') +pipeline_description.add_step(step_1) + +# # Step 2: Standardization +primitive_2 = index.get_primitive('d3m.primitives.tods.timeseries_processing.transformation.standard_scaler') +step_2 = PrimitiveStep(primitive=primitive_2) +step_2.add_hyperparameter(name='use_semantic_types', argument_type=ArgumentType.VALUE, data=True) +step_2.add_hyperparameter(name='use_columns', argument_type=ArgumentType.VALUE, data=(2,3,4,5,6)) +step_2.add_hyperparameter(name='return_result', argument_type=ArgumentType.VALUE, data='append') +step_2.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce') +step_2.add_output('produce') +pipeline_description.add_step(step_2) + +# # Step 3: test primitive +# primitive_3 = index.get_primitive('d3m.primitives.anomaly_detection.KNNPrimitive') +primitive_3 = index.get_primitive('d3m.primitives.tods.feature_analysis.statistical_vec_sum') +step_3 = PrimitiveStep(primitive=primitive_3) +step_3.add_hyperparameter(name='window_size', argument_type=ArgumentType.VALUE, data=4) +step_3.add_hyperparameter(name='use_semantic_types', argument_type=ArgumentType.VALUE, data=True) +step_3.add_hyperparameter(name='use_columns', argument_type=ArgumentType.VALUE, data=(5,6)) # There is sth wrong with multi-dimensional +step_3.add_hyperparameter(name='return_result', argument_type=ArgumentType.VALUE, data='append') +step_3.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.2.produce') +step_3.add_output('produce') +pipeline_description.add_step(step_3) + + + +# Final Output +pipeline_description.add_output(name='output', data_reference='steps.3.produce') + +# Output to YAML +yaml = pipeline_description.to_yaml() +with open('pipeline.yml', 'w') as f: + f.write(yaml) + + +# Or you can output json +#data = pipline_description.to_json() + diff --git a/new_tests/build_test_feature_analysis_statistical_willison_amplitude.py b/new_tests/build_test_feature_analysis_statistical_willison_amplitude.py new file mode 100644 index 0000000..f750dad --- /dev/null +++ b/new_tests/build_test_feature_analysis_statistical_willison_amplitude.py @@ -0,0 +1,62 @@ +from d3m import index +from d3m.metadata.base import ArgumentType +from d3m.metadata.pipeline import Pipeline, PrimitiveStep +from d3m.metadata import hyperparams + +# -> dataset_to_dataframe -> column_parser -> extract_columns_by_semantic_types(attributes) -> imputer -> random_forest +# extract_columns_by_semantic_types(targets) -> ^ + +# Creating pipeline +pipeline_description = Pipeline() +pipeline_description.add_input(name='inputs') + +# Step 0: dataset_to_dataframe +primitive_0 = index.get_primitive('d3m.primitives.tods.data_processing.dataset_to_dataframe') +step_0 = PrimitiveStep(primitive=primitive_0) +step_0.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='inputs.0') +step_0.add_output('produce') +pipeline_description.add_step(step_0) + +# # Step 1: column_parser +primitive_1 = index.get_primitive('d3m.primitives.tods.data_processing.column_parser') +step_1 = PrimitiveStep(primitive=primitive_1) +step_1.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce') +step_1.add_output('produce') +pipeline_description.add_step(step_1) + +# # Step 2: Standardization +primitive_2 = index.get_primitive('d3m.primitives.tods.timeseries_processing.transformation.standard_scaler') +step_2 = PrimitiveStep(primitive=primitive_2) +step_2.add_hyperparameter(name='use_semantic_types', argument_type=ArgumentType.VALUE, data=True) +step_2.add_hyperparameter(name='use_columns', argument_type=ArgumentType.VALUE, data=(2,3,4,5,6)) +step_2.add_hyperparameter(name='return_result', argument_type=ArgumentType.VALUE, data='append') +step_2.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce') +step_2.add_output('produce') +pipeline_description.add_step(step_2) + +# # Step 3: test primitive +# primitive_3 = index.get_primitive('d3m.primitives.anomaly_detection.KNNPrimitive') +primitive_3 = index.get_primitive('d3m.primitives.tods.feature_analysis.statistical_willison_amplitude') +step_3 = PrimitiveStep(primitive=primitive_3) +step_3.add_hyperparameter(name='window_size', argument_type=ArgumentType.VALUE, data=4) +step_3.add_hyperparameter(name='use_semantic_types', argument_type=ArgumentType.VALUE, data=True) +step_3.add_hyperparameter(name='use_columns', argument_type=ArgumentType.VALUE, data=(5,6)) # There is sth wrong with multi-dimensional +step_3.add_hyperparameter(name='return_result', argument_type=ArgumentType.VALUE, data='append') +step_3.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.2.produce') +step_3.add_output('produce') +pipeline_description.add_step(step_3) + + + +# Final Output +pipeline_description.add_output(name='output', data_reference='steps.3.produce') + +# Output to YAML +yaml = pipeline_description.to_yaml() +with open('pipeline.yml', 'w') as f: + f.write(yaml) + + +# Or you can output json +#data = pipline_description.to_json() + diff --git a/new_tests/build_test_feature_analysis_statistical_zero_crossing.py b/new_tests/build_test_feature_analysis_statistical_zero_crossing.py new file mode 100644 index 0000000..1c4efa1 --- /dev/null +++ b/new_tests/build_test_feature_analysis_statistical_zero_crossing.py @@ -0,0 +1,62 @@ +from d3m import index +from d3m.metadata.base import ArgumentType +from d3m.metadata.pipeline import Pipeline, PrimitiveStep +from d3m.metadata import hyperparams + +# -> dataset_to_dataframe -> column_parser -> extract_columns_by_semantic_types(attributes) -> imputer -> random_forest +# extract_columns_by_semantic_types(targets) -> ^ + +# Creating pipeline +pipeline_description = Pipeline() +pipeline_description.add_input(name='inputs') + +# Step 0: dataset_to_dataframe +primitive_0 = index.get_primitive('d3m.primitives.tods.data_processing.dataset_to_dataframe') +step_0 = PrimitiveStep(primitive=primitive_0) +step_0.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='inputs.0') +step_0.add_output('produce') +pipeline_description.add_step(step_0) + +# # Step 1: column_parser +primitive_1 = index.get_primitive('d3m.primitives.tods.data_processing.column_parser') +step_1 = PrimitiveStep(primitive=primitive_1) +step_1.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce') +step_1.add_output('produce') +pipeline_description.add_step(step_1) + +# # Step 2: Standardization +primitive_2 = index.get_primitive('d3m.primitives.tods.timeseries_processing.transformation.standard_scaler') +step_2 = PrimitiveStep(primitive=primitive_2) +step_2.add_hyperparameter(name='use_semantic_types', argument_type=ArgumentType.VALUE, data=True) +step_2.add_hyperparameter(name='use_columns', argument_type=ArgumentType.VALUE, data=(2,3,4,5,6)) +step_2.add_hyperparameter(name='return_result', argument_type=ArgumentType.VALUE, data='append') +step_2.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce') +step_2.add_output('produce') +pipeline_description.add_step(step_2) + + +# # Step 3: test primitive +# primitive_3 = index.get_primitive('d3m.primitives.anomaly_detection.KNNPrimitive') +primitive_3 = index.get_primitive('d3m.primitives.tods.feature_analysis.statistical_zero_crossing') +step_3 = PrimitiveStep(primitive=primitive_3) +step_3.add_hyperparameter(name='use_semantic_types', argument_type=ArgumentType.VALUE, data=True) +step_3.add_hyperparameter(name='use_columns', argument_type=ArgumentType.VALUE, data=(9,10)) # There is sth wrong with multi-dimensional +step_3.add_hyperparameter(name='return_result', argument_type=ArgumentType.VALUE, data='append') +step_3.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.2.produce') +step_3.add_output('produce') +pipeline_description.add_step(step_3) + + + +# Final Output +pipeline_description.add_output(name='output', data_reference='steps.3.produce') + +# Output to YAML +yaml = pipeline_description.to_yaml() +with open('pipeline.yml', 'w') as f: + f.write(yaml) + + +# Or you can output json +#data = pipline_description.to_json() + diff --git a/new_tests/build_test_time_series_seasonality_trend_decomposition.py b/new_tests/build_test_time_series_seasonality_trend_decomposition.py new file mode 100644 index 0000000..ab172bf --- /dev/null +++ b/new_tests/build_test_time_series_seasonality_trend_decomposition.py @@ -0,0 +1,61 @@ +from d3m import index +from d3m.metadata.base import ArgumentType +from d3m.metadata.pipeline import Pipeline, PrimitiveStep +from d3m.metadata import hyperparams +import copy + +# -> dataset_to_dataframe -> column_parser -> extract_columns_by_semantic_types(attributes) -> imputer -> random_forest +# extract_columns_by_semantic_types(targets) -> ^ + +# Creating pipeline +pipeline_description = Pipeline() +pipeline_description.add_input(name='inputs') + +# Step 0: dataset_to_dataframe +primitive_0 = index.get_primitive('d3m.primitives.tods.data_processing.dataset_to_dataframe') +step_0 = PrimitiveStep(primitive=primitive_0) +step_0.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='inputs.0') +step_0.add_output('produce') +pipeline_description.add_step(step_0) + +# # Step 1: column_parser +primitive_1 = index.get_primitive('d3m.primitives.tods.data_processing.column_parser') +step_1 = PrimitiveStep(primitive=primitive_1) +step_1.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce') +step_1.add_output('produce') +pipeline_description.add_step(step_1) + +# # Step 2: Standardization +primitive_2 = index.get_primitive('d3m.primitives.tods.timeseries_processing.transformation.standard_scaler') +step_2 = PrimitiveStep(primitive=primitive_2) +step_2.add_hyperparameter(name='use_semantic_types', argument_type=ArgumentType.VALUE, data=True) +step_2.add_hyperparameter(name='use_columns', argument_type=ArgumentType.VALUE, data=(2,3,4,5,6)) +step_2.add_hyperparameter(name='return_result', argument_type=ArgumentType.VALUE, data='append') +step_2.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce') +step_2.add_output('produce') +pipeline_description.add_step(step_2) + +# # Step 3: test primitive +# primitive_3 = index.get_primitive('d3m.primitives.anomaly_detection.KNNPrimitive') +primitive_3 = index.get_primitive('d3m.primitives.tods.timeseries_processing.decomposition.time_series_seasonality_trend_decomposition') +step_3 = PrimitiveStep(primitive=primitive_3) +step_3.add_hyperparameter(name='period', argument_type=ArgumentType.VALUE, data=5) +step_3.add_hyperparameter(name='use_semantic_types', argument_type=ArgumentType.VALUE, data=True) +step_3.add_hyperparameter(name='use_columns', argument_type=ArgumentType.VALUE, data=(8,9,10,11,12)) # There is sth wrong with multi-dimensional +step_3.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.2.produce') +step_3.add_output('produce') +pipeline_description.add_step(step_3) + + + +# Final Output +pipeline_description.add_output(name='output', data_reference='steps.3.produce') + +# Output to YAML +yaml = pipeline_description.to_yaml() +with open('pipeline.yml', 'w') as f: + f.write(yaml) + +# Or you can output json +#data = pipline_description.to_json() + diff --git a/test.sh b/test.sh index bcd4448..3481545 100644 --- a/test.sh +++ b/test.sh @@ -1,6 +1,6 @@ #!/bin/bash -test_scripts=$(ls primitive_tests) +test_scripts=$(ls new_tests) #test_scripts=$(ls primitive_tests | grep -v -f tested_file.txt) for file in $test_scripts diff --git a/tods/data_processing/ColumnParser.py b/tods/data_processing/ColumnParser.py new file mode 100644 index 0000000..c552ab5 --- /dev/null +++ b/tods/data_processing/ColumnParser.py @@ -0,0 +1,398 @@ +import hashlib +import os +import typing + +import numpy # type: ignore + +from d3m import container, utils as d3m_utils +from d3m.base import utils as base_utils +from d3m.metadata import base as metadata_base, hyperparams +from d3m.primitive_interfaces import base, transformer + +import common_primitives +from common_primitives import utils + +__all__ = ('ColumnParserPrimitive',) + +Inputs = container.DataFrame +Outputs = container.DataFrame + + +class Hyperparams(hyperparams.Hyperparams): + parse_semantic_types = hyperparams.Set( + elements=hyperparams.Enumeration( + values=[ + 'http://schema.org/Boolean', 'https://metadata.datadrivendiscovery.org/types/CategoricalData', + 'http://schema.org/Integer', 'http://schema.org/Float', + 'https://metadata.datadrivendiscovery.org/types/FloatVector', 'http://schema.org/DateTime', + ], + # Default is ignored. + # TODO: Remove default. See: https://gitlab.com/datadrivendiscovery/d3m/issues/141 + default='http://schema.org/Boolean', + ), + default=( + 'http://schema.org/Boolean', 'https://metadata.datadrivendiscovery.org/types/CategoricalData', + 'http://schema.org/Integer', 'http://schema.org/Float', + 'https://metadata.datadrivendiscovery.org/types/FloatVector', 'http://schema.org/DateTime', + ), + semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], + description="A set of semantic types to parse. One can provide a subset of supported semantic types to limit what the primitive parses.", + ) + use_columns = hyperparams.Set( + elements=hyperparams.Hyperparameter[int](-1), + default=(), + semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], + description="A set of column indices to force primitive to operate on. If any specified column cannot be parsed, it is skipped.", + ) + exclude_columns = hyperparams.Set( + elements=hyperparams.Hyperparameter[int](-1), + default=(), + semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], + description="A set of column indices to not operate on. Applicable only if \"use_columns\" is not provided.", + ) + return_result = hyperparams.Enumeration( + values=['append', 'replace', 'new'], + default='replace', + semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], + description="Should parsed columns be appended, should they replace original columns, or should only parsed columns be returned?", + ) + add_index_columns = hyperparams.UniformBool( + default=True, + semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], + description="Also include primary index columns if input data has them. Applicable only if \"return_result\" is set to \"new\".", + ) + parse_categorical_target_columns = hyperparams.UniformBool( + default=False, + semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], + description="Should it parse also categorical target columns?", + ) + replace_index_columns = hyperparams.UniformBool( + default=True, + semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], + description="Replace primary index columns even if otherwise appending columns. Applicable only if \"return_result\" is set to \"append\".", + ) + fuzzy_time_parsing = hyperparams.UniformBool( + default=True, + semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], + description="Use fuzzy time parsing.", + ) + + +class ColumnParserPrimitive(transformer.TransformerPrimitiveBase[Inputs, Outputs, Hyperparams]): + """ + A primitive which parses strings into their parsed values. + + It goes over all columns (by default, controlled by ``use_columns``, ``exclude_columns``) + and checks those with structural type ``str`` if they have a semantic type suggesting + that they are a boolean value, categorical, integer, float, or time (by default, + controlled by ``parse_semantic_types``). Categorical values are converted with + hash encoding. + + What is returned is controlled by ``return_result`` and ``add_index_columns``. + """ + + metadata = metadata_base.PrimitiveMetadata( + { + 'id': 'd510cb7a-1782-4f51-b44c-58f0236e47c7', + 'version': '0.6.0', + 'name': "Parses strings into their types", + 'python_path': 'd3m.primitives.tods.data_processing.column_parser', + 'source': { + 'name': common_primitives.__author__, + 'contact': 'mailto:mitar.commonprimitives@tnode.com', + 'uris': [ + 'https://gitlab.com/datadrivendiscovery/common-primitives/blob/master/common_primitives/column_parser.py', + 'https://gitlab.com/datadrivendiscovery/common-primitives.git', + ], + }, + 'installation': [{ + 'type': metadata_base.PrimitiveInstallationType.PIP, + 'package_uri': 'git+https://gitlab.com/datadrivendiscovery/common-primitives.git@{git_commit}#egg=common_primitives'.format( + git_commit=d3m_utils.current_git_commit(os.path.dirname(__file__)), + ), + }], + 'algorithm_types': [ + metadata_base.PrimitiveAlgorithmType.DATA_CONVERSION, + ], + 'primitive_family': metadata_base.PrimitiveFamily.DATA_TRANSFORMATION, + }, + ) + + def produce(self, *, inputs: Inputs, timeout: float = None, iterations: int = None) -> base.CallResult[Outputs]: + columns_to_use, output_columns = self._produce_columns(inputs) + + if self.hyperparams['replace_index_columns'] and self.hyperparams['return_result'] == 'append': + assert len(columns_to_use) == len(output_columns) + + index_columns = inputs.metadata.get_index_columns() + + index_columns_to_use = [] + other_columns_to_use = [] + index_output_columns = [] + other_output_columns = [] + for column_to_use, output_column in zip(columns_to_use, output_columns): + if column_to_use in index_columns: + index_columns_to_use.append(column_to_use) + index_output_columns.append(output_column) + else: + other_columns_to_use.append(column_to_use) + other_output_columns.append(output_column) + + outputs = base_utils.combine_columns(inputs, index_columns_to_use, index_output_columns, return_result='replace', add_index_columns=self.hyperparams['add_index_columns']) + outputs = base_utils.combine_columns(outputs, other_columns_to_use, other_output_columns, return_result='append', add_index_columns=self.hyperparams['add_index_columns']) + else: + outputs = base_utils.combine_columns(inputs, columns_to_use, output_columns, return_result=self.hyperparams['return_result'], add_index_columns=self.hyperparams['add_index_columns']) + + return base.CallResult(outputs) + + def _can_use_column(self, inputs_metadata: metadata_base.DataMetadata, column_index: int) -> bool: + column_metadata = inputs_metadata.query((metadata_base.ALL_ELEMENTS, column_index)) + + # We produce only on columns which have not yet been parsed (are strings). + if column_metadata['structural_type'] != str: + return False + + semantic_types = column_metadata.get('semantic_types', []) + + for semantic_type in self.hyperparams['parse_semantic_types']: + if semantic_type not in semantic_types: + continue + + if semantic_type == 'https://metadata.datadrivendiscovery.org/types/CategoricalData': + # Skip parsing if a column is categorical, but also a target column. + if not self.hyperparams['parse_categorical_target_columns'] and 'https://metadata.datadrivendiscovery.org/types/Target' in semantic_types: + continue + + return True + + return False + + def _produce_columns(self, inputs: Inputs) -> typing.Tuple[typing.List[int], typing.List[Outputs]]: + # The logic of parsing values tries to mirror also the logic of detecting + # values in "SimpleProfilerPrimitive". One should keep them in sync. + + columns_to_use = self._get_columns(inputs.metadata) + + # We check against this list again, because there might be multiple matching semantic types + # (which is not really valid). + parse_semantic_types = self.hyperparams['parse_semantic_types'] + + output_columns = [] + + for column_index in columns_to_use: + column_metadata = inputs.metadata.query((metadata_base.ALL_ELEMENTS, column_index)) + semantic_types = column_metadata.get('semantic_types', []) + if column_metadata['structural_type'] == str: + if 'http://schema.org/Boolean' in parse_semantic_types and 'http://schema.org/Boolean' in semantic_types: + output_columns.append(self._parse_boolean_data(inputs, column_index)) + + elif 'https://metadata.datadrivendiscovery.org/types/CategoricalData' in parse_semantic_types and \ + 'https://metadata.datadrivendiscovery.org/types/CategoricalData' in semantic_types and \ + (self.hyperparams['parse_categorical_target_columns'] or 'https://metadata.datadrivendiscovery.org/types/Target' not in semantic_types): + output_columns.append(self._parse_categorical_data(inputs, column_index)) + + elif 'http://schema.org/Integer' in parse_semantic_types and 'http://schema.org/Integer' in semantic_types: + # For primary key we know all values have to exist so we can assume they can always be represented as integers. + if 'https://metadata.datadrivendiscovery.org/types/PrimaryKey' in semantic_types: + integer_required = True + else: + integer_required = False + + output_columns.append(self._parse_integer(inputs, column_index, integer_required)) + + elif 'http://schema.org/Float' in parse_semantic_types and 'http://schema.org/Float' in semantic_types: + output_columns.append(self._parse_float_data(inputs, column_index)) + + elif 'https://metadata.datadrivendiscovery.org/types/FloatVector' in parse_semantic_types and 'https://metadata.datadrivendiscovery.org/types/FloatVector' in semantic_types: + output_columns.append(self._parse_float_vector_data(inputs, column_index)) + + elif 'http://schema.org/DateTime' in parse_semantic_types and 'http://schema.org/DateTime' in semantic_types: + output_columns.append(self._parse_time_data(inputs, column_index, self.hyperparams['fuzzy_time_parsing'])) + + else: + assert False, column_index + + assert len(output_columns) == len(columns_to_use) + + return columns_to_use, output_columns + + def _produce_columns_metadata(self, inputs_metadata: metadata_base.DataMetadata) -> typing.Tuple[typing.List[int], typing.List[metadata_base.DataMetadata]]: + columns_to_use = self._get_columns(inputs_metadata) + + # We check against this list again, because there might be multiple matching semantic types + # (which is not really valid). + parse_semantic_types = self.hyperparams['parse_semantic_types'] + + output_columns = [] + + for column_index in columns_to_use: + column_metadata = inputs_metadata.query((metadata_base.ALL_ELEMENTS, column_index)) + semantic_types = column_metadata.get('semantic_types', []) + if column_metadata['structural_type'] == str: + if 'http://schema.org/Boolean' in parse_semantic_types and 'http://schema.org/Boolean' in semantic_types: + output_columns.append(self._parse_boolean_metadata(inputs_metadata, column_index)) + + elif 'https://metadata.datadrivendiscovery.org/types/CategoricalData' in parse_semantic_types and \ + 'https://metadata.datadrivendiscovery.org/types/CategoricalData' in semantic_types and \ + (self.hyperparams['parse_categorical_target_columns'] or 'https://metadata.datadrivendiscovery.org/types/Target' not in semantic_types): + output_columns.append(self._parse_categorical_metadata(inputs_metadata, column_index)) + + elif 'http://schema.org/Integer' in parse_semantic_types and 'http://schema.org/Integer' in semantic_types: + output_columns.append(self._parse_integer_metadata(inputs_metadata, column_index)) + + elif 'http://schema.org/Float' in parse_semantic_types and 'http://schema.org/Float' in semantic_types: + output_columns.append(self._parse_float_metadata(inputs_metadata, column_index)) + + elif 'https://metadata.datadrivendiscovery.org/types/FloatVector' in parse_semantic_types and 'https://metadata.datadrivendiscovery.org/types/FloatVector' in semantic_types: + output_columns.append(self._parse_float_vector_metadata(inputs_metadata, column_index)) + + elif 'http://schema.org/DateTime' in parse_semantic_types and 'http://schema.org/DateTime' in semantic_types: + output_columns.append(self._parse_time_metadata(inputs_metadata, column_index)) + + else: + assert False, column_index + + assert len(output_columns) == len(columns_to_use) + + return columns_to_use, output_columns + + def _get_columns(self, inputs_metadata: metadata_base.DataMetadata) -> typing.List[int]: + def can_use_column(column_index: int) -> bool: + return self._can_use_column(inputs_metadata, column_index) + + columns_to_use, columns_not_to_use = base_utils.get_columns_to_use(inputs_metadata, self.hyperparams['use_columns'], self.hyperparams['exclude_columns'], can_use_column) + + # We are OK if no columns ended up being parsed. + # "base_utils.combine_columns" will throw an error if it cannot work with this. + + if self.hyperparams['use_columns'] and columns_not_to_use: + self.logger.warning("Not all specified columns can parsed. Skipping columns: %(columns)s", { + 'columns': columns_not_to_use, + }) + + return columns_to_use + + @classmethod + def _parse_boolean_data(cls, inputs: Inputs, column_index: metadata_base.SimpleSelectorSegment) -> Outputs: + return cls._parse_categorical_data(inputs, column_index) + + @classmethod + def _parse_boolean_metadata(cls, inputs_metadata: metadata_base.DataMetadata, column_index: metadata_base.SimpleSelectorSegment) -> metadata_base.DataMetadata: + return cls._parse_categorical_metadata(inputs_metadata, column_index) + + @classmethod + def _parse_categorical_data(cls, inputs: Inputs, column_index: metadata_base.SimpleSelectorSegment) -> Outputs: + values_map: typing.Dict[str, int] = {} + for value in inputs.iloc[:, column_index]: + value = value.strip() + if value not in values_map: + value_hash = hashlib.sha256(value.encode('utf8')) + values_map[value] = int.from_bytes(value_hash.digest()[0:8], byteorder='little') ^ int.from_bytes(value_hash.digest()[8:16], byteorder='little') ^ \ + int.from_bytes(value_hash.digest()[16:24], byteorder='little') ^ int.from_bytes(value_hash.digest()[24:32], byteorder='little') + + outputs = container.DataFrame({inputs.columns[column_index]: [values_map[value.strip()] for value in inputs.iloc[:, column_index]]}, generate_metadata=False) + outputs.metadata = cls._parse_categorical_metadata(inputs.metadata, column_index) + + return outputs + + @classmethod + def _parse_categorical_metadata(cls, inputs_metadata: metadata_base.DataMetadata, column_index: metadata_base.SimpleSelectorSegment) -> metadata_base.DataMetadata: + outputs_metadata = inputs_metadata.select_columns([column_index]) + return outputs_metadata.update_column(0, {'structural_type': int}) + + @classmethod + def _str_to_int(cls, value: str) -> typing.Union[float, int]: + try: + return int(value.strip()) + except ValueError: + try: + # Maybe it is an int represented as a float. Let's try this. This can get rid of non-integer + # part of the value, but the integer was requested through a semantic type, so this is probably OK. + return int(float(value.strip())) + except ValueError: + # No luck, use NaN to represent a missing value. + return float('nan') + + @classmethod + def _parse_integer(cls, inputs: Inputs, column_index: metadata_base.SimpleSelectorSegment, + integer_required: bool) -> container.DataFrame: + outputs = container.DataFrame({inputs.columns[column_index]: [cls._str_to_int(value) for value in inputs.iloc[:, column_index]]}, generate_metadata=False) + + if outputs.dtypes.iloc[0].kind == 'f': + structural_type: type = float + elif outputs.dtypes.iloc[0].kind in ['i', 'u']: + structural_type = int + else: + assert False, outputs.dtypes.iloc[0] + + if structural_type is float and integer_required: + raise ValueError("Not all values in a column can be parsed into integers, but only integers were expected.") + + outputs.metadata = inputs.metadata.select_columns([column_index]) + outputs.metadata = outputs.metadata.update_column(0, {'structural_type': structural_type}) + + return outputs + + @classmethod + def _parse_integer_metadata(cls, inputs_metadata: metadata_base.DataMetadata, column_index: metadata_base.SimpleSelectorSegment) -> metadata_base.DataMetadata: + outputs_metadata = inputs_metadata.select_columns([column_index]) + # Without data we assume we can parse everything into integers. This might not be true and + # we might end up parsing into floats if we have to represent missing (or invalid) values. + return outputs_metadata.update_column(0, {'structural_type': int}) + + @classmethod + def _str_to_float(cls, value: str) -> float: + try: + return float(value.strip()) + except ValueError: + return float('nan') + + @classmethod + def _parse_float_data(cls, inputs: Inputs, column_index: metadata_base.SimpleSelectorSegment) -> Outputs: + outputs = container.DataFrame({inputs.columns[column_index]: [cls._str_to_float(value) for value in inputs.iloc[:, column_index]]}, generate_metadata=False) + outputs.metadata = cls._parse_float_metadata(inputs.metadata, column_index) + + return outputs + + @classmethod + def _parse_float_metadata(cls, inputs_metadata: metadata_base.DataMetadata, column_index: metadata_base.SimpleSelectorSegment) -> metadata_base.DataMetadata: + outputs_metadata = inputs_metadata.select_columns([column_index]) + return outputs_metadata.update_column(0, {'structural_type': float}) + + @classmethod + def _parse_float_vector_data(cls, inputs: Inputs, column_index: metadata_base.SimpleSelectorSegment) -> Outputs: + # We are pretty strict here because we are assuming this was generated programmatically. + outputs = container.DataFrame( + { + inputs.columns[column_index]: [ + container.ndarray([cls._str_to_float(value) for value in values.split(',')]) + for values in inputs.iloc[:, column_index] + ], + }, + generate_metadata=False, + ) + outputs.metadata = cls._parse_float_metadata(inputs.metadata, column_index) + # We have to automatically generate metadata to set ndarray dimension(s). + outputs.metadata = outputs.metadata.generate(outputs) + + return outputs + + @classmethod + def _parse_float_vector_metadata(cls, inputs_metadata: metadata_base.DataMetadata, column_index: metadata_base.SimpleSelectorSegment) -> metadata_base.DataMetadata: + outputs_metadata = inputs_metadata.select_columns([column_index]) + # We cannot know the dimension of the ndarray without data. + outputs_metadata = outputs_metadata.update_column(0, {'structural_type': container.ndarray}) + outputs_metadata = outputs_metadata.update((metadata_base.ALL_ELEMENTS, 0, metadata_base.ALL_ELEMENTS), {'structural_type': numpy.float64}) + return outputs_metadata + + @classmethod + def _parse_time_data(cls, inputs: Inputs, column_index: metadata_base.SimpleSelectorSegment, fuzzy: bool) -> Outputs: + outputs = container.DataFrame({inputs.columns[column_index]: [utils.parse_datetime_to_float(value, fuzzy=fuzzy) for value in inputs.iloc[:, column_index]]}, generate_metadata=False) + outputs.metadata = cls._parse_time_metadata(inputs.metadata, column_index) + + return outputs + + @classmethod + def _parse_time_metadata(cls, inputs_metadata: metadata_base.DataMetadata, column_index: metadata_base.SimpleSelectorSegment) -> metadata_base.DataMetadata: + outputs_metadata = inputs_metadata.select_columns([column_index]) + return outputs_metadata.update_column(0, {'structural_type': float}) diff --git a/tods/data_processing/ContructPredictions.py b/tods/data_processing/ContructPredictions.py new file mode 100644 index 0000000..ecc89cf --- /dev/null +++ b/tods/data_processing/ContructPredictions.py @@ -0,0 +1,261 @@ +import os +import typing + +from d3m import container, utils as d3m_utils +from d3m.metadata import base as metadata_base, hyperparams +from d3m.primitive_interfaces import base, transformer +from d3m.contrib.primitives import compute_scores + +import common_primitives + +__all__ = ('ConstructPredictionsPrimitive',) + +Inputs = container.DataFrame +Outputs = container.DataFrame + + +class Hyperparams(hyperparams.Hyperparams): + use_columns = hyperparams.Set( + elements=hyperparams.Hyperparameter[int](-1), + default=(), + semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], + description="A set of column indices to force primitive to operate on. If metadata reconstruction happens, this is used for reference columns." + " If any specified column is not a primary index or a predicted target, it is skipped.", + ) + exclude_columns = hyperparams.Set( + elements=hyperparams.Hyperparameter[int](-1), + default=(), + semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], + description="A set of column indices to not operate on. If metadata reconstruction happens, this is used for reference columns. Applicable only if \"use_columns\" is not provided.", + ) + + +class ConstructPredictionsPrimitive(transformer.TransformerPrimitiveBase[Inputs, Outputs, Hyperparams]): + """ + A primitive which takes as input a DataFrame and outputs a DataFrame in Lincoln Labs predictions + format: first column is a d3mIndex column (and other primary index columns, e.g., for object detection + problem), and then predicted targets, each in its column, followed by optional confidence column(s). + + It supports both input columns annotated with semantic types (``https://metadata.datadrivendiscovery.org/types/PrimaryKey``, + ``https://metadata.datadrivendiscovery.org/types/PrimaryMultiKey``, ``https://metadata.datadrivendiscovery.org/types/PredictedTarget``, + ``https://metadata.datadrivendiscovery.org/types/Confidence``), or trying to reconstruct metadata. + This is why the primitive takes also additional input of a reference DataFrame which should + have metadata to help reconstruct missing metadata. If metadata is missing, the primitive + assumes that all ``inputs`` columns are predicted targets, without confidence column(s). + """ + + metadata = metadata_base.PrimitiveMetadata( + { + 'id': '8d38b340-f83f-4877-baaa-162f8e551736', + 'version': '0.3.0', + 'name': "Construct pipeline predictions output", + 'python_path': 'd3m.primitives.tods.data_processing.construct_predictions', + 'source': { + 'name': common_primitives.__author__, + 'contact': 'mailto:mitar.commonprimitives@tnode.com', + 'uris': [ + 'https://gitlab.com/datadrivendiscovery/common-primitives/blob/master/common_primitives/construct_predictions.py', + 'https://gitlab.com/datadrivendiscovery/common-primitives.git', + ], + }, + 'installation': [{ + 'type': metadata_base.PrimitiveInstallationType.PIP, + 'package_uri': 'git+https://gitlab.com/datadrivendiscovery/common-primitives.git@{git_commit}#egg=common_primitives'.format( + git_commit=d3m_utils.current_git_commit(os.path.dirname(__file__)), + ), + }], + 'algorithm_types': [ + metadata_base.PrimitiveAlgorithmType.DATA_CONVERSION, + ], + 'primitive_family': metadata_base.PrimitiveFamily.DATA_TRANSFORMATION, + }, + ) + + def produce(self, *, inputs: Inputs, reference: Inputs, timeout: float = None, iterations: int = None) -> base.CallResult[Outputs]: # type: ignore + index_columns = inputs.metadata.get_index_columns() + target_columns = inputs.metadata.list_columns_with_semantic_types(('https://metadata.datadrivendiscovery.org/types/PredictedTarget',)) + + # Target columns cannot be also index columns. This should not really happen, + # but it could happen with buggy primitives. + target_columns = [target_column for target_column in target_columns if target_column not in index_columns] + + if index_columns and target_columns: + outputs = self._produce_using_semantic_types(inputs, index_columns, target_columns) + else: + outputs = self._produce_reconstruct(inputs, reference, index_columns, target_columns) + + outputs = compute_scores.ComputeScoresPrimitive._encode_columns(outputs) + + # Generally we do not care about column names in DataFrame itself (but use names of columns from metadata), + # but in this case setting column names makes it easier to assure that "to_csv" call produces correct output. + # See: https://gitlab.com/datadrivendiscovery/d3m/issues/147 + column_names = [] + for column_index in range(len(outputs.columns)): + column_names.append(outputs.metadata.query_column(column_index).get('name', outputs.columns[column_index])) + outputs.columns = column_names + + return base.CallResult(outputs) + + def _filter_index_columns(self, inputs_metadata: metadata_base.DataMetadata, index_columns: typing.Sequence[int]) -> typing.Sequence[int]: + if self.hyperparams['use_columns']: + index_columns = [index_column_index for index_column_index in index_columns if index_column_index in self.hyperparams['use_columns']] + if not index_columns: + raise ValueError("No index columns listed in \"use_columns\" hyper-parameter, but index columns are required.") + + else: + index_columns = [index_column_index for index_column_index in index_columns if index_column_index not in self.hyperparams['exclude_columns']] + if not index_columns: + raise ValueError("All index columns listed in \"exclude_columns\" hyper-parameter, but index columns are required.") + + names = [] + for index_column in index_columns: + index_metadata = inputs_metadata.query_column(index_column) + # We do not care about empty strings for names either. + if index_metadata.get('name', None): + names.append(index_metadata['name']) + + if 'd3mIndex' not in names: + raise ValueError("\"d3mIndex\" index column is missing.") + + names_set = set(names) + if len(names) != len(names_set): + duplicate_names = names + for name in names_set: + # Removes just the first occurrence. + duplicate_names.remove(name) + + self.logger.warning("Duplicate names for index columns: %(duplicate_names)s", { + 'duplicate_names': list(set(duplicate_names)), + }) + + return index_columns + + def _get_columns(self, inputs_metadata: metadata_base.DataMetadata, index_columns: typing.Sequence[int], target_columns: typing.Sequence[int]) -> typing.List[int]: + assert index_columns + assert target_columns + + index_columns = self._filter_index_columns(inputs_metadata, index_columns) + + if self.hyperparams['use_columns']: + target_columns = [target_column_index for target_column_index in target_columns if target_column_index in self.hyperparams['use_columns']] + if not target_columns: + raise ValueError("No target columns listed in \"use_columns\" hyper-parameter, but target columns are required.") + + else: + target_columns = [target_column_index for target_column_index in target_columns if target_column_index not in self.hyperparams['exclude_columns']] + if not target_columns: + raise ValueError("All target columns listed in \"exclude_columns\" hyper-parameter, but target columns are required.") + + assert index_columns + assert target_columns + + return list(index_columns) + list(target_columns) + + def _get_confidence_columns(self, inputs_metadata: metadata_base.DataMetadata) -> typing.List[int]: + confidence_columns = inputs_metadata.list_columns_with_semantic_types(('https://metadata.datadrivendiscovery.org/types/Confidence',)) + + if self.hyperparams['use_columns']: + confidence_columns = [confidence_column_index for confidence_column_index in confidence_columns if confidence_column_index in self.hyperparams['use_columns']] + else: + confidence_columns = [confidence_column_index for confidence_column_index in confidence_columns if confidence_column_index not in self.hyperparams['exclude_columns']] + + return confidence_columns + + def _produce_using_semantic_types(self, inputs: Inputs, index_columns: typing.Sequence[int], + target_columns: typing.Sequence[int]) -> Outputs: + confidence_columns = self._get_confidence_columns(inputs.metadata) + + output_columns = self._get_columns(inputs.metadata, index_columns, target_columns) + confidence_columns + + # "get_index_columns" makes sure that "d3mIndex" is always listed first. + # And "select_columns" selects columns in order listed, which then + # always puts "d3mIndex" first. + outputs = inputs.select_columns(output_columns) + + if confidence_columns: + outputs.metadata = self._update_confidence_columns(outputs.metadata, confidence_columns) + + return outputs + + def _update_confidence_columns(self, inputs_metadata: metadata_base.DataMetadata, confidence_columns: typing.Sequence[int]) -> metadata_base.DataMetadata: + output_columns_length = inputs_metadata.query((metadata_base.ALL_ELEMENTS,))['dimension']['length'] + + outputs_metadata = inputs_metadata + + # All confidence columns have to be named "confidence". + for column_index in range(output_columns_length - len(confidence_columns), output_columns_length): + outputs_metadata = outputs_metadata.update((metadata_base.ALL_ELEMENTS, column_index), { + 'name': 'confidence', + }) + + return outputs_metadata + + def _produce_reconstruct(self, inputs: Inputs, reference: Inputs, index_columns: typing.Sequence[int], target_columns: typing.Sequence[int]) -> Outputs: + if not index_columns: + reference_index_columns = reference.metadata.get_index_columns() + + if not reference_index_columns: + raise ValueError("Cannot find an index column in reference data, but index column is required.") + + filtered_index_columns = self._filter_index_columns(reference.metadata, reference_index_columns) + index = reference.select_columns(filtered_index_columns) + else: + filtered_index_columns = self._filter_index_columns(inputs.metadata, index_columns) + index = inputs.select_columns(filtered_index_columns) + + if not target_columns: + if index_columns: + raise ValueError("No target columns in input data, but index column(s) present.") + + # We assume all inputs are targets. + targets = inputs + + # We make sure at least basic metadata is generated correctly, so we regenerate metadata. + targets.metadata = targets.metadata.generate(targets) + + # We set target column names from the reference. We set semantic types. + targets.metadata = self._update_targets_metadata(targets.metadata, self._get_target_names(reference.metadata)) + + else: + targets = inputs.select_columns(target_columns) + + return index.append_columns(targets) + + def multi_produce(self, *, produce_methods: typing.Sequence[str], inputs: Inputs, reference: Inputs, timeout: float = None, iterations: int = None) -> base.MultiCallResult: # type: ignore + return self._multi_produce(produce_methods=produce_methods, timeout=timeout, iterations=iterations, inputs=inputs, reference=reference) + + def fit_multi_produce(self, *, produce_methods: typing.Sequence[str], inputs: Inputs, reference: Inputs, timeout: float = None, iterations: int = None) -> base.MultiCallResult: # type: ignore + return self._fit_multi_produce(produce_methods=produce_methods, timeout=timeout, iterations=iterations, inputs=inputs, reference=reference) + + def _get_target_names(self, metadata: metadata_base.DataMetadata) -> typing.List[typing.Union[str, None]]: + target_names = [] + + for column_index in metadata.list_columns_with_semantic_types(('https://metadata.datadrivendiscovery.org/types/TrueTarget',)): + column_metadata = metadata.query((metadata_base.ALL_ELEMENTS, column_index)) + + target_names.append(column_metadata.get('name', None)) + + return target_names + + def _update_targets_metadata(self, metadata: metadata_base.DataMetadata, target_names: typing.Sequence[typing.Union[str, None]]) -> metadata_base.DataMetadata: + targets_length = metadata.query((metadata_base.ALL_ELEMENTS,))['dimension']['length'] + + if targets_length != len(target_names): + raise ValueError("Not an expected number of target columns to apply names for. Expected {target_names}, provided {targets_length}.".format( + target_names=len(target_names), + targets_length=targets_length, + )) + + for column_index, target_name in enumerate(target_names): + metadata = metadata.add_semantic_type((metadata_base.ALL_ELEMENTS, column_index), 'https://metadata.datadrivendiscovery.org/types/Target') + metadata = metadata.add_semantic_type((metadata_base.ALL_ELEMENTS, column_index), 'https://metadata.datadrivendiscovery.org/types/PredictedTarget') + + # We do not have it, let's skip it and hope for the best. + if target_name is None: + continue + + metadata = metadata.update_column(column_index, { + 'name': target_name, + }) + + return metadata diff --git a/tods/data_processing/ExtractColumnsBySemanticTypes.py b/tods/data_processing/ExtractColumnsBySemanticTypes.py new file mode 100644 index 0000000..283cb18 --- /dev/null +++ b/tods/data_processing/ExtractColumnsBySemanticTypes.py @@ -0,0 +1,141 @@ +import os +import typing + +from d3m import container, exceptions, utils as d3m_utils +from d3m.base import utils as base_utils +from d3m.metadata import base as metadata_base, hyperparams +from d3m.primitive_interfaces import base, transformer + +import common_primitives + +__all__ = ('ExtractColumnsBySemanticTypesPrimitive',) + +Inputs = container.DataFrame +Outputs = container.DataFrame + + +class Hyperparams(hyperparams.Hyperparams): + semantic_types = hyperparams.Set( + elements=hyperparams.Hyperparameter[str](''), + default=('https://metadata.datadrivendiscovery.org/types/Attribute',), + min_size=1, + semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], + description="Semantic types to use to extract columns. If any of them matches, by default.", + ) + match_logic = hyperparams.Enumeration( + values=['all', 'any', 'equal'], + default='any', + semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], + description="Should a column have all of semantic types in \"semantic_types\" to be extracted, or any of them?", + ) + negate = hyperparams.UniformBool( + default=False, + semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], + description="Should columns which do not match semantic types in \"semantic_types\" be extracted?", + ) + use_columns = hyperparams.Set( + elements=hyperparams.Hyperparameter[int](-1), + default=(), + semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], + description="A set of column indices to force primitive to operate on. If any specified column does not match any semantic type, it is skipped.", + ) + exclude_columns = hyperparams.Set( + elements=hyperparams.Hyperparameter[int](-1), + default=(), + semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], + description="A set of column indices to not operate on. Applicable only if \"use_columns\" is not provided.", + ) + add_index_columns = hyperparams.UniformBool( + default=False, + semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], + description="Also include primary index columns if input data has them.", + ) + + +class ExtractColumnsBySemanticTypesPrimitive(transformer.TransformerPrimitiveBase[Inputs, Outputs, Hyperparams]): + """ + A primitive which extracts columns from input data based on semantic types provided. + Columns which match any of the listed semantic types are extracted. + + If you want to extract only attributes, you can use ``https://metadata.datadrivendiscovery.org/types/Attribute`` + semantic type (also default). + + For real targets (not suggested targets) use ``https://metadata.datadrivendiscovery.org/types/Target``. + For this to work, columns have to be are marked as targets by the TA2 in a dataset before passing the dataset + through a pipeline. Or something else has to mark them at some point in a pipeline. + + It uses ``use_columns`` and ``exclude_columns`` to control which columns it considers. + """ + + metadata = metadata_base.PrimitiveMetadata( + { + 'id': '4503a4c6-42f7-45a1-a1d4-ed69699cf5e1', + 'version': '0.4.0', + 'name': "Extracts columns by semantic type", + 'python_path': 'd3m.primitives.tods.data_processing.extract_columns_by_semantic_types', + 'source': { + 'name': common_primitives.__author__, + 'contact': 'mailto:mitar.commonprimitives@tnode.com', + 'uris': [ + 'https://gitlab.com/datadrivendiscovery/common-primitives/blob/master/common_primitives/extract_columns_semantic_types.py', + 'https://gitlab.com/datadrivendiscovery/common-primitives.git', + ], + }, + 'installation': [{ + 'type': metadata_base.PrimitiveInstallationType.PIP, + 'package_uri': 'git+https://gitlab.com/datadrivendiscovery/common-primitives.git@{git_commit}#egg=common_primitives'.format( + git_commit=d3m_utils.current_git_commit(os.path.dirname(__file__)), + ), + }], + 'algorithm_types': [ + metadata_base.PrimitiveAlgorithmType.ARRAY_SLICING, + ], + 'primitive_family': metadata_base.PrimitiveFamily.DATA_TRANSFORMATION, + }, + ) + + def produce(self, *, inputs: Inputs, timeout: float = None, iterations: int = None) -> base.CallResult[Outputs]: + columns_to_use = self._get_columns(inputs.metadata) + + output_columns = inputs.select_columns(columns_to_use) + + outputs = base_utils.combine_columns(inputs, columns_to_use, [output_columns], return_result='new', add_index_columns=self.hyperparams['add_index_columns']) + + return base.CallResult(outputs) + + def _can_use_column(self, inputs_metadata: metadata_base.DataMetadata, column_index: int) -> bool: + column_metadata = inputs_metadata.query((metadata_base.ALL_ELEMENTS, column_index)) + + semantic_types = column_metadata.get('semantic_types', []) + + if self.hyperparams['match_logic'] == 'all': + match = all(semantic_type in semantic_types for semantic_type in self.hyperparams['semantic_types']) + elif self.hyperparams['match_logic'] == 'any': + match = any(semantic_type in semantic_types for semantic_type in self.hyperparams['semantic_types']) + elif self.hyperparams["match_logic"] == "equal": + match = set(semantic_types) == set(self.hyperparams["semantic_types"]) + else: + raise exceptions.UnexpectedValueError("Unknown value of hyper-parameter \"match_logic\": {value}".format(value=self.hyperparams['match_logic'])) + + if self.hyperparams['negate']: + return not match + else: + return match + + def _get_columns(self, inputs_metadata: metadata_base.DataMetadata) -> typing.Sequence[int]: + def can_use_column(column_index: int) -> bool: + return self._can_use_column(inputs_metadata, column_index) + + columns_to_use, columns_not_to_use = base_utils.get_columns_to_use(inputs_metadata, self.hyperparams['use_columns'], self.hyperparams['exclude_columns'], can_use_column) + + if not columns_to_use: + raise ValueError("Input data has no columns matching semantic types: {semantic_types}".format( + semantic_types=self.hyperparams['semantic_types'], + )) + + if self.hyperparams['use_columns'] and columns_not_to_use: + self.logger.warning("Not all specified columns match semantic types. Skipping columns: %(columns)s", { + 'columns': columns_not_to_use, + }) + + return columns_to_use diff --git a/tods/resources/.entry_points.ini b/tods/resources/.entry_points.ini index 7e3075e..81ba6ae 100644 --- a/tods/resources/.entry_points.ini +++ b/tods/resources/.entry_points.ini @@ -7,7 +7,9 @@ tods.data_processing.timestamp_validation = tods.data_processing.TimeStampValida tods.data_processing.duplication_validation = tods.data_processing.DuplicationValidation:DuplicationValidationPrimitive tods.data_processing.continuity_validation = tods.data_processing.ContinuityValidation:ContinuityValidationPrimitive tods.data_processing.impute_missing = tods.data_processing.SKImputer:SKImputerPrimitive - +tods.data_processing.column_parser = tods.data_processing.ColumnParser:ColumnParserPrimitive +tods.data_processing.extract_columns_by_semantic_types = tods.data_processing.ExtractColumnsBySemanticTypes:ExtractColumnsBySemanticTypesPrimitive +tods.data_processing.construct_predictions = tods.data_processing.ConstructPredictions:ConstructPredictionsPrimitive tods.timeseries_processing.transformation.axiswise_scaler = tods.timeseries_processing.SKAxiswiseScaler:SKAxiswiseScalerPrimitive tods.timeseries_processing.transformation.standard_scaler = tods.timeseries_processing.SKStandardScaler:SKStandardScalerPrimitive