From 4069de2744e9975208bf82c9f27fa202f907ebfd Mon Sep 17 00:00:00 2001 From: lhenry15 Date: Sun, 20 Sep 2020 09:09:08 -0500 Subject: [PATCH] merge common Former-commit-id: 082a7acf576336117a46ae603d83db7b17fec251 [formerly 1f74e655a7e305bd05bc0dc68b867919708c95c6] [formerly 69763f4c1225fb8336fef2c15d4dac24170f7a34 [formerly a893d7685f3c1fbaa86d8cd60b2d9f5ed63c03f2]] [formerly cf3b68a678d14748a4a48217423126639a96556b [formerly 99d8f9b77863f4e8fade353b70f8a64d68067161] [formerly 18f539f981e6dc0b98a3d8617cb4cc91df0f5de0 [formerly ef1836f33fa08a381e267a3f74d0243706b752ba]]] [formerly bd8884495d195d50bce63b7cc4f97fa68ee32c16 [formerly 7fbf002dac97b75b46969d1865689bc601a857e1] [formerly f5a45b390bfa0234c3fd9332c6bdfef353d1fce2 [formerly 5ff75e2e466de9edf7153baeb9d408535c5ad899]] [formerly 1b40e48dd8087c382e56ec1261a9acd463c29694 [formerly f323cbe994c9ba6416a30cf44045fdfcc9857223] [formerly 84c2b485e4559b5be5d93d083ba6be6200c7c25f [formerly da5752974dc225f78ee65e4f2cd596a469dc2861]]]] [formerly 79eafae538857d306d8f9b7e085d55fbb06fb711 [formerly c68cb0cba15abbd3ead00f1db3d24ef1b80412e0] [formerly c98bc71cb5012e10c415a09c04247ce8353dd6c6 [formerly f3f012f5fb28eb7e1249965374a027cd30c1890f]] [formerly f202a31ab47b0be99b4e174e8301f6bb21fee479 [formerly 4694d83a52283c6b4858766539c78deadf853239] [formerly a1f0aaca826211b0ac78008ffc64059f5945c355 [formerly eb090d6d3a4283a661797fc7cc3b46206aed7710]]] [formerly 37c4555bd091854b090376a1a0cfc21e29427a57 [formerly 55dfcc63fb10f0d6d3f7fee7ea86f4fa1abebd04] [formerly 302c56c509aef64b5a6e14850df226a609fcb89b [formerly bebdfd98dd6d9cf0b791ebb6b8a32f937db5d7da]] [formerly eed97e12bd488304fc62f7013c55042845f6867e [formerly 67f3d10947b9353fa0f7dfe090313bf2abd5c660] [formerly 395df67343b9b5a5d718212f50d38275e1b8d713 [formerly c43cf776c645f08af798dd15dda97e1712656fc9]]]]] [formerly c4997cb6a0e2764e515861c63907589923034790 [formerly c4703c17ec0973c858f16ceb1d47a032145e8ad0] [formerly 7529580984538d1a89f0fd347cd140d6afdc419b [formerly 9c000b2c794a7441b78c286b5456f2a31aa9cf46]] [formerly b102c6c9b54bb8d089a85a7a0934e467c6709fb7 [formerly fd861756c585c9233fbef576f817650a298cf5fa] [formerly 537f077ec0f60122b0079b951921b9611721b52a [formerly ee573b901f2a8bd8f39123e4a4d92c9be762f348]]] [formerly 105d925a3d46c48d6675979ff42309102d1805b7 [formerly 8cdc22f1d9a0ae6efb49919be1b4bf45510c1c8a] [formerly 39e5bc7d5d131e6b236c91d44687ff22380a909c [formerly 5edf83752100c552a4a85fbd392c37bd775170fd]] [formerly 21b0344ffcafde360621473eba3b54f36ee85283 [formerly 41e32435f7d3cd8f56a26f10d78060b655a22dd9] [formerly cf73b9408fb4dfddaa1c3bbfa5b5522e1402df92 [formerly 04b883bb8ed1f76d814c8031e8b9378881842284]]]] [formerly e732011aec591b27ae2414924cbc825a27c8d7c6 [formerly 163307ea6f1d192605667af038bfb8cf78290c11] [formerly 38c9e8ff5e0d59dba1cc8bc3b748c29968b57cf0 [formerly 2c63f408783e401e58501261b02dc20d7f2bf2bc]] [formerly dc9037bcb64afa854284dcf5e955bb78095e0dbf [formerly 9d15047b5b901d8658e953853571f6cfa3b6b6c3] [formerly 4da590437563a13fa6c5c378360229fe571daed2 [formerly 577283a8278a03af8e8f9256f970afe273cc3103]]] [formerly 238094954c4908833829ce4d5d243ff3eb555a92 [formerly 99f59e110cfb967a7f991f484a5e049ea20625dd] [formerly a7bba7d99c82c6b4ef701650a26b000668ed512f [formerly f7b13e25e8f6232f69f8484f0d895b576833aac4]] [formerly 411b1ea01b4180b0174e294f8676547fcc4ddca5 [formerly baabbad21a1d7c684a3cae1d2daf15c909cd9f65] [formerly 869366cb7afc88bba776d9c58850f96e596b2661 [formerly 733cfb4398abae34d18c9932bd44af8731a4d79e]]]]]] Former-commit-id: 5ac3e41be23cb95a6ebd0a8e859e38db3d01f435 [formerly fe857595197523f09fb3c43f52c495e7ee3d7b35] [formerly bc7c03db324bbab404f02d15cb889582dd7c8c16 [formerly 9bb2fa5132822405303ca7b72a893f64fb77e468]] [formerly 3b49901893e82b54bb4a37d2c2efab46795275c5 [formerly 5dd1b25ebc5f9140b2338b4ffb8cfa3a024225cb] [formerly b7a93df35814d9f8ecef369693d52a657ccadbaf [formerly 7b3f6e309060d253b6f556b80c6f95b2c2cf7c66]]] [formerly 627cceb26bca068c5b71d2bf78055c876f9283dd [formerly b0f3ed5f08ad772e8e2ba6af49c835fb1fdba2d7] [formerly 1672ff9df145e93f7bbdbce7bbca13c9a171cf09 [formerly 8eb215a6526ff1f76ae86fab11a20f1b009c940f]] [formerly c9a8fc553bab2c913268e316a64fc912ce63ef03 [formerly 7a69e6e65c96d0c479c196d5434d2065809f9c65] [formerly 5418882e2d3428ce8c53bd6bddf0f5bb9e7e46af [formerly 19352e650764f3fa5c9616df5384a2f4a03974e2]]]] [formerly ce5bc94f1c5bf70802675f43cff93a2b56edc0a6 [formerly 65c63ca77b789ce8f30fefb1537a0eea5669e58f] [formerly 00098b391f031b6e70f47106f29414c991cfd146 [formerly 4e4b706c5fa551a6f5c281b0c878cdd5358e02d3]] [formerly 16fbe6ff8e670e54f2e1d3a867da50e70a673783 [formerly ad40273329bd023243d456bb92f6ba38f3c1388a] [formerly f113cd10e66e0be8d8c5749882df4f184ef75cee [formerly 5b3aa4a777d529640119394e9b40f68a66b68342]]] [formerly 7af46bef201a1f8d76115e3bcd6ee8e99a092a21 [formerly 0fe5b7dbb2b1af324d4f02e612273e30d3aad8c8] [formerly 712f4701ed1219f5365c48462b3ae1c73ef41624 [formerly 273adfd4ebc633a6d4e5965a19b057f8301aecd4]] [formerly 226de99943f63e50dd84adcf392e8065074175af [formerly 626f787629745bc4a33fc520549cc2f3943f8b1d] [formerly 869366cb7afc88bba776d9c58850f96e596b2661]]]] Former-commit-id: 912fde5a077cc896a18ece7136fa0672aeba06af [formerly 6c947b866f1ccf9654a07fab3cb7a94a25b9f93d] [formerly 799d5800c207e75f833088f5c72c6bd60ef34eb8 [formerly ef77c736ef1728908ffdfd77cd90b2670460eebb]] [formerly 811bc3d01f8be6e6b07e4810f6711a9bf66fd8cb [formerly 341febd7c4a8eab4a04f938a69521e4a062cf9d8] [formerly 13c141596ad9cc36e8d9bf3799b5933d31eb664a [formerly 933e5935f147aefe49a8a07326dcbc88bfed4855]]] [formerly b77ccf87c19b2667ceea5185bc0b02f35fc7812a [formerly f8fc926b806a37a8f66e91dc302863d604b39d89] [formerly 0632053739b033dfc55e48b26ad88729e27a332e [formerly 73abb0cf6ab914cf64cc730a2ff0fc26772ce265]] [formerly e14b39bc6e2a04ee5627c322660f36de857caf61 [formerly ff77272b0df3927a4f6c4cbe31186ec8c00fedab] [formerly e9bbc7aff879300af3222856838ef985eff9c39b [formerly b6be52bd798b2d0b554ecbe3fa06e03cfe2e51ac]]]] Former-commit-id: 98f7b4cc650e88fc30284e6ca5d3a710d771087a [formerly 3f70f5e2aad1b52d5625bc7e90f6eacbedc9ba7b] [formerly f1df66b510d1702ff9682c14db40fdf1b81239c3 [formerly 36f63495a4acb47dd39e760d85da88c4a878ab64]] [formerly 1b39a6508e1a7f8da703c6199367681b1a59096c [formerly db3e50f7dde15fde230108cb904869913c07df0c] [formerly a36fc0aa0224e3636d7e24f7beb4d1652cdd6a12 [formerly e4e3039ce7f2a2d6668305ce2121383e21b178d0]]] Former-commit-id: 05ff21ba2f975630fe8891468d5629921d90206b [formerly b98dd6488a2bbfd2f0db4668a2e58b10a44b3f27] [formerly c4dbfe74c2fdbe08415f05b8c6e15b3824e34c4f [formerly 7451b535e9b7cda8df41369ab81543f636eab49f]] Former-commit-id: 51c838646579d7758c1cf33044c6494381dacb57 [formerly 3b85eedffd81141d7b337c8d12f4742ec508b7b8] Former-commit-id: 90d4dc1597536642144fc2450d8a594986a07026 --- common-primitives/HISTORY.md | 363 -- common-primitives/HOW_TO_MANAGE.md | 94 - common-primitives/LICENSE.txt | 201 - common-primitives/MANIFEST.in | 2 - common-primitives/README.md | 83 - common-primitives/add.sh | 24 - common-primitives/entry_points.ini | 63 - common-primitives/git-add.sh | 5 - common-primitives/git-check.sh | 21 - common-primitives/list_primitives.py | 32 - .../1.yaml.gz | Bin 8605 -> 0 bytes .../1.yaml.gz | Bin 8790 -> 0 bytes .../pipeline_run_extract_structural_types.yml.gz | 1 - .../1.yaml.gz | Bin 8599 -> 0 bytes .../1.yaml.gz | Bin 8575 -> 0 bytes .../2.yaml.gz | Bin 95641 -> 0 bytes .../1.yaml.gz | Bin 7503 -> 0 bytes .../pipeline_run_extract_structural_types.yml.gz | 1 - .../1.yaml.gz | 1 - .../pipeline_run_extract_structural_types.yml.gz | 1 - .../pipeline_run_group_field_compose.yml.gz | 1 - .../1.yaml.gz | 1 - .../pipeline_run_extract_structural_types.yml.gz | 1 - .../1.yaml.gz | 1 - .../pipeline_run_extract_structural_types.yml.gz | 1 - .../pipeline_run_group_field_compose.yml.gz | 1 - .../1.yaml.gz | 1 - .../pipeline_run_extract_structural_types.yml.gz | 1 - .../pipeline_run.yml.gz | Bin 59789 -> 0 bytes .../pipeline_run.yml.gz | Bin 165228 -> 0 bytes .../1.yaml.gz | 1 - .../pipeline_run_extract_structural_types.yml.gz | 1 - .../1.yml | 4729 -------------------- .../pipeline_run_extract_structural_types.yml.gz | 1 - .../pipeline_run_group_field_compose.yml.gz | 1 - .../d2473bbc-7839-4deb-9ba4-4ff4bc9b0bde.json | 246 - .../b523335c-0c47-4d02-a582-f69609cde1e8.json | 1 - .../ccad0f9c-130e-4063-a91e-ea65a18cb041.yaml | 110 - .../b7a24816-2518-4073-9c45-b97f2b2fee30.json | 246 - .../4d402450-2562-48cc-93fd-719fb658c43c.json | 246 - .../3afd2bd2-7ba1-4ac1-928f-fad0c39a05e5.json | 522 --- .../4ff2f21d-1bba-4c44-bb96-e05728bcf6ed.json | 342 -- .../387d432a-9893-4558-b190-1c5e9e399dbf.yaml | 123 - .../2b307634-f01e-412e-8d95-7e54afd4731f.json | 300 -- .../b523335c-0c47-4d02-a582-f69609cde1e8.json | 1 - .../4ec215d1-6484-4502-a6dd-f659943ccb94.json | 1 - .../a8c40699-c48d-4f12-aa18-639c5fb6baae.json | 1 - .../b523335c-0c47-4d02-a582-f69609cde1e8.json | 1 - .../d2473bbc-7839-4deb-9ba4-4ff4bc9b0bde.json | 1 - .../4ec215d1-6484-4502-a6dd-f659943ccb94.json | 1 - .../b523335c-0c47-4d02-a582-f69609cde1e8.json | 1 - .../d2473bbc-7839-4deb-9ba4-4ff4bc9b0bde.json | 1 - .../4ec215d1-6484-4502-a6dd-f659943ccb94.json | 1 - .../a8c40699-c48d-4f12-aa18-639c5fb6baae.json | 1 - .../b523335c-0c47-4d02-a582-f69609cde1e8.json | 1 - .../d2473bbc-7839-4deb-9ba4-4ff4bc9b0bde.json | 1 - .../4ec215d1-6484-4502-a6dd-f659943ccb94.json | 1 - .../pipeline.py | 71 - .../b523335c-0c47-4d02-a582-f69609cde1e8.json | 1 - .../d2473bbc-7839-4deb-9ba4-4ff4bc9b0bde.json | 1 - .../b523335c-0c47-4d02-a582-f69609cde1e8.json | 1 - .../pipeline.py | 83 - .../a8c40699-c48d-4f12-aa18-639c5fb6baae.json | 1 - .../pipeline.py | 100 - .../2b307634-f01e-412e-8d95-7e54afd4731f.json | 1 - .../b523335c-0c47-4d02-a582-f69609cde1e8.json | 1 - .../11ee9290-992d-4e48-97ed-1a6e4c15f92f.json | 272 -- .../k-fold-timeseries-split.yml | 83 - .../k-fold-timeseries-split-raw.yml | 108 - .../0f636602-6299-411b-9873-4b974cd393ba.json | 247 - .../4ec215d1-6484-4502-a6dd-f659943ccb94.json | 1 - .../a8c40699-c48d-4f12-aa18-639c5fb6baae.json | 1 - .../b523335c-0c47-4d02-a582-f69609cde1e8.json | 1 - common-primitives/run_pipelines.sh | 44 - common-primitives/run_tests.py | 11 - common-primitives/setup.cfg | 28 - common-primitives/setup.py | 65 - common-primitives/sklearn-wrap/.gitignore | 2 - common-primitives/sklearn-wrap/requirements.txt | 31 - common-primitives/sklearn-wrap/setup.py | 106 - .../sklearn-wrap/sklearn_wrap/SKARDRegression.py | 470 -- .../sklearn_wrap/SKAdaBoostClassifier.py | 498 --- .../sklearn_wrap/SKAdaBoostRegressor.py | 437 -- .../sklearn_wrap/SKBaggingClassifier.py | 589 --- .../sklearn_wrap/SKBaggingRegressor.py | 533 --- .../sklearn-wrap/sklearn_wrap/SKBernoulliNB.py | 508 --- .../sklearn-wrap/sklearn_wrap/SKBinarizer.py | 330 -- .../sklearn-wrap/sklearn_wrap/SKCountVectorizer.py | 490 -- .../sklearn_wrap/SKDecisionTreeClassifier.py | 621 --- .../sklearn_wrap/SKDecisionTreeRegressor.py | 565 --- .../sklearn-wrap/sklearn_wrap/SKDummyClassifier.py | 503 --- .../sklearn-wrap/sklearn_wrap/SKDummyRegressor.py | 442 -- .../sklearn-wrap/sklearn_wrap/SKElasticNet.py | 466 -- .../sklearn_wrap/SKExtraTreesClassifier.py | 675 --- .../sklearn_wrap/SKExtraTreesRegressor.py | 607 --- .../sklearn-wrap/sklearn_wrap/SKFastICA.py | 439 -- .../sklearn_wrap/SKFeatureAgglomeration.py | 361 -- .../sklearn-wrap/sklearn_wrap/SKGaussianNB.py | 492 -- .../sklearn_wrap/SKGaussianProcessRegressor.py | 463 -- .../sklearn_wrap/SKGaussianRandomProjection.py | 344 -- .../sklearn_wrap/SKGenericUnivariateSelect.py | 443 -- .../sklearn_wrap/SKGradientBoostingClassifier.py | 707 --- .../sklearn_wrap/SKGradientBoostingRegressor.py | 673 --- .../sklearn-wrap/sklearn_wrap/SKImputer.py | 391 -- .../sklearn_wrap/SKKNeighborsClassifier.py | 497 -- .../sklearn_wrap/SKKNeighborsRegressor.py | 475 -- .../sklearn-wrap/sklearn_wrap/SKKernelPCA.py | 536 --- .../sklearn-wrap/sklearn_wrap/SKKernelRidge.py | 491 -- .../sklearn-wrap/sklearn_wrap/SKLars.py | 460 -- .../sklearn-wrap/sklearn_wrap/SKLasso.py | 474 -- .../sklearn-wrap/sklearn_wrap/SKLassoCV.py | 526 --- .../sklearn_wrap/SKLinearDiscriminantAnalysis.py | 535 --- .../sklearn_wrap/SKLinearRegression.py | 431 -- .../sklearn-wrap/sklearn_wrap/SKLinearSVC.py | 478 -- .../sklearn-wrap/sklearn_wrap/SKLinearSVR.py | 452 -- .../sklearn_wrap/SKLogisticRegression.py | 582 --- .../sklearn-wrap/sklearn_wrap/SKMLPClassifier.py | 730 --- .../sklearn-wrap/sklearn_wrap/SKMLPRegressor.py | 669 --- .../sklearn-wrap/sklearn_wrap/SKMaxAbsScaler.py | 339 -- .../sklearn-wrap/sklearn_wrap/SKMinMaxScaler.py | 366 -- .../sklearn_wrap/SKMissingIndicator.py | 373 -- .../sklearn-wrap/sklearn_wrap/SKMultinomialNB.py | 488 -- .../sklearn-wrap/sklearn_wrap/SKNearestCentroid.py | 408 -- .../sklearn-wrap/sklearn_wrap/SKNormalizer.py | 329 -- .../sklearn-wrap/sklearn_wrap/SKNystroem.py | 522 --- .../sklearn-wrap/sklearn_wrap/SKOneHotEncoder.py | 420 -- .../sklearn-wrap/sklearn_wrap/SKOrdinalEncoder.py | 343 -- .../sklearn-wrap/sklearn_wrap/SKPCA.py | 468 -- .../sklearn_wrap/SKPassiveAggressiveClassifier.py | 648 --- .../sklearn_wrap/SKPassiveAggressiveRegressor.py | 583 --- .../sklearn_wrap/SKPolynomialFeatures.py | 346 -- .../SKQuadraticDiscriminantAnalysis.py | 473 -- .../sklearn_wrap/SKQuantileTransformer.py | 364 -- .../sklearn-wrap/sklearn_wrap/SKRBFSampler.py | 349 -- .../sklearn_wrap/SKRandomForestClassifier.py | 682 --- .../sklearn_wrap/SKRandomForestRegressor.py | 609 --- .../sklearn_wrap/SKRandomTreesEmbedding.py | 482 -- .../sklearn-wrap/sklearn_wrap/SKRidge.py | 444 -- .../sklearn-wrap/sklearn_wrap/SKRobustScaler.py | 354 -- .../sklearn-wrap/sklearn_wrap/SKSGDClassifier.py | 661 --- .../sklearn-wrap/sklearn_wrap/SKSGDRegressor.py | 643 --- .../sklearn-wrap/sklearn_wrap/SKSVC.py | 635 --- .../sklearn-wrap/sklearn_wrap/SKSVR.py | 616 --- .../sklearn-wrap/sklearn_wrap/SKSelectFwe.py | 428 -- .../sklearn_wrap/SKSelectPercentile.py | 428 -- .../sklearn_wrap/SKSparseRandomProjection.py | 375 -- .../sklearn-wrap/sklearn_wrap/SKStandardScaler.py | 357 -- .../sklearn-wrap/sklearn_wrap/SKStringImputer.py | 371 -- .../sklearn-wrap/sklearn_wrap/SKTfidfVectorizer.py | 530 --- .../sklearn-wrap/sklearn_wrap/SKTruncatedSVD.py | 369 -- .../sklearn_wrap/SKVarianceThreshold.py | 414 -- .../sklearn-wrap/sklearn_wrap/__init__.py | 2 - common-primitives/tests/test_audio_reader.py | 105 - common-primitives/tests/test_cast_to_type.py | 131 - common-primitives/tests/test_column_map.py | 75 - common-primitives/tests/test_column_parser.py | 474 -- .../tests/test_compute_metafeatures.py | 1106 ----- .../tests/test_construct_predictions.py | 233 - common-primitives/tests/test_csv_reader.py | 50 - common-primitives/tests/test_cut_audio.py | 122 - common-primitives/tests/test_dataframe_flatten.py | 132 - .../tests/test_dataframe_image_reader.py | 46 - common-primitives/tests/test_dataframe_to_list.py | 41 - .../tests/test_dataframe_to_ndarray.py | 40 - common-primitives/tests/test_dataframe_utils.py | 27 - common-primitives/tests/test_dataset_map.py | 73 - common-primitives/tests/test_dataset_sample.py | 58 - .../tests/test_dataset_to_dataframe.py | 93 - .../tests/test_datetime_field_compose.py | 67 - .../tests/test_datetime_range_filter.py | 149 - common-primitives/tests/test_denormalize.py | 469 -- .../tests/test_extract_columns_semantic_types.py | 203 - .../tests/test_extract_columns_structural_types.py | 89 - common-primitives/tests/test_fixed_split.py | 148 - .../tests/test_grouping_field_compose.py | 56 - common-primitives/tests/test_horizontal_concat.py | 183 - common-primitives/tests/test_kfold_split.py | 100 - .../tests/test_kfold_timeseries_split.py | 223 - common-primitives/tests/test_lgbm_classifier.py | 571 --- common-primitives/tests/test_list_to_dataframe.py | 185 - common-primitives/tests/test_list_to_ndarray.py | 132 - .../tests/test_ndarray_to_dataframe.py | 99 - common-primitives/tests/test_ndarray_to_list.py | 116 - common-primitives/tests/test_no_split.py | 71 - .../tests/test_normalize_column_references.py | 597 --- common-primitives/tests/test_normalize_graphs.py | 207 - .../tests/test_numeric_range_filter.py | 143 - common-primitives/tests/test_one_hot_maker.py | 516 --- .../tests/test_pandas_onehot_encoder.py | 178 - common-primitives/tests/test_random_forest.py | 701 --- common-primitives/tests/test_ravel.py | 125 - common-primitives/tests/test_redact_columns.py | 173 - common-primitives/tests/test_regex_filter.py | 114 - .../tests/test_remove_duplicate_columns.py | 123 - .../tests/test_rename_duplicate_columns.py | 136 - .../tests/test_replace_semantic_types.py | 97 - common-primitives/tests/test_simple_profiler.py | 446 -- .../tests/test_stack_ndarray_column.py | 77 - common-primitives/tests/test_tabular_extractor.py | 173 - common-primitives/tests/test_term_filter.py | 136 - common-primitives/tests/test_text_reader.py | 30 - common-primitives/tests/test_train_score_split.py | 88 - .../tests/test_unseen_label_decoder.py | 51 - .../tests/test_unseen_label_encoder.py | 46 - common-primitives/tests/test_video_reader.py | 35 - common-primitives/tests/test_xgboost_dart.py | 687 --- common-primitives/tests/test_xgboost_gbtree.py | 733 --- common-primitives/tests/test_xgboost_regressor.py | 617 --- common-primitives/tests/utils.py | 112 - entry_points.ini | 140 +- entry_points_common.ini | 63 + setup.py | 7 +- test.sh | 4 +- .../common_primitives/__init__.py | 0 .../common_primitives/add_semantic_types.py | 0 .../common_primitives/audio_reader.py | 0 .../common_primitives/base.py | 0 .../common_primitives/cast_to_type.py | 0 .../common_primitives/column_map.py | 0 .../common_primitives/column_parser.py | 0 .../common_primitives/compute_metafeatures.py | 0 .../common_primitives/construct_predictions.py | 0 .../common_primitives/csv_reader.py | 0 .../common_primitives/cut_audio.py | 0 .../common_primitives/dataframe_flatten.py | 0 .../common_primitives/dataframe_image_reader.py | 0 .../common_primitives/dataframe_to_list.py | 0 .../common_primitives/dataframe_to_ndarray.py | 0 .../common_primitives/dataframe_utils.py | 0 .../common_primitives/datamart_augment.py | 0 .../common_primitives/datamart_download.py | 0 .../common_primitives/dataset_map.py | 0 .../common_primitives/dataset_sample.py | 0 .../common_primitives/dataset_to_dataframe.py | 2 +- .../common_primitives/dataset_utils.py | 0 .../common_primitives/datetime_field_compose.py | 0 .../common_primitives/datetime_range_filter.py | 0 .../common_primitives/denormalize.py | 0 .../common_primitives/extract_columns.py | 0 .../extract_columns_semantic_types.py | 0 .../extract_columns_structural_types.py | 0 .../common_primitives/fixed_split.py | 0 .../common_primitives/grouping_field_compose.py | 0 .../common_primitives/holt_smoothing.py | 0 .../holt_winters_exponential_smoothing.py | 0 .../common_primitives/horizontal_concat.py | 0 .../common_primitives/kfold_split.py | 0 .../common_primitives/kfold_split_timeseries.py | 0 .../common_primitives/lgbm_classifier.py | 0 .../common_primitives/list_to_dataframe.py | 0 .../common_primitives/list_to_ndarray.py | 0 .../common_primitives/mean_average_transform.py | 0 .../common_primitives/ndarray_to_dataframe.py | 0 .../common_primitives/ndarray_to_list.py | 0 .../common_primitives/no_split.py | 0 .../normalize_column_references.py | 0 .../common_primitives/normalize_graphs.py | 0 .../common_primitives/numeric_range_filter.py | 0 .../common_primitives/one_hot_maker.py | 0 .../common_primitives/pandas_onehot_encoder.py | 0 .../common_primitives/random_forest.py | 0 .../common_primitives/ravel.py | 0 .../common_primitives/redact_columns.py | 0 .../common_primitives/regex_filter.py | 0 .../common_primitives/remove_columns.py | 0 .../common_primitives/remove_duplicate_columns.py | 0 .../common_primitives/remove_semantic_types.py | 0 .../common_primitives/rename_duplicate_columns.py | 0 .../common_primitives/replace_semantic_types.py | 0 .../simple_exponential_smoothing.py | 0 .../common_primitives/simple_profiler.py | 0 .../common_primitives/slacker/README.md | 0 .../common_primitives/slacker/__init__.py | 0 .../common_primitives/slacker/base.py | 0 .../common_primitives/slacker/estimation.py | 0 .../slacker/feature_extraction.py | 0 .../common_primitives/slacker/feature_selection.py | 0 .../common_primitives/stack_ndarray_column.py | 0 .../common_primitives/tabular_extractor.py | 0 .../common_primitives/term_filter.py | 0 .../common_primitives/text_reader.py | 0 .../common_primitives/train_score_split.py | 0 .../common_primitives/unseen_label_decoder.py | 0 .../common_primitives/unseen_label_encoder.py | 0 .../common_primitives/utils.py | 0 .../common_primitives/video_reader.py | 0 .../common_primitives/xgboost_dart.py | 0 .../common_primitives/xgboost_gbtree.py | 0 .../common_primitives/xgboost_regressor.py | 0 .../data_processing}/CategoricalToBinary.py | 0 .../data_processing}/ColumnFilter.py | 0 .../data_processing}/ContinuityValidation.py | 0 .../data_processing}/DatasetToDataframe.py | 0 .../data_processing}/DuplicationValidation.py | 0 .../data_processing}/TimeIntervalTransform.py | 0 .../data_processing}/TimeStampValidation.py | 0 .../data_processing}/__init__.py | 0 .../detection_algorithm}/AutoRegODetect.py | 0 .../detection_algorithm}/DeepLog.py | 0 .../detection_algorithm}/KDiscordODetect.py | 0 .../detection_algorithm}/LSTMODetect.py | 0 .../detection_algorithm}/MatrixProfile.py | 0 .../detection_algorithm}/PCAODetect.py | 0 .../detection_algorithm}/PyodABOD.py | 0 .../detection_algorithm}/PyodAE.py | 0 .../detection_algorithm}/PyodCBLOF.py | 0 .../detection_algorithm}/PyodCOF.py | 0 .../detection_algorithm}/PyodHBOS.py | 0 .../detection_algorithm}/PyodIsolationForest.py | 0 .../detection_algorithm}/PyodKNN.py | 0 .../detection_algorithm}/PyodLODA.py | 0 .../detection_algorithm}/PyodLOF.py | 0 .../detection_algorithm}/PyodMoGaal.py | 0 .../detection_algorithm}/PyodOCSVM.py | 0 .../detection_algorithm}/PyodSOD.py | 0 .../detection_algorithm}/PyodSoGaal.py | 0 .../detection_algorithm}/PyodVAE.py | 0 .../detection_algorithm}/Telemanom.py | 0 .../detection_algorithm}/UODBasePrimitive.py | 0 .../detection_algorithm}/core/AutoRegOD.py | 0 .../detection_algorithm}/core/CollectiveBase.py | 0 .../core/CollectiveCommonTest.py | 0 .../detection_algorithm}/core/KDiscord.py | 0 .../detection_algorithm}/core/LSTMOD.py | 0 .../detection_algorithm}/core/MultiAutoRegOD.py | 0 .../detection_algorithm}/core/PCA.py | 0 .../detection_algorithm}/core/UODCommonTest.py | 0 .../core/algorithm_implementation.py | 0 .../core/test_CollectiveBase.py | 0 .../detection_algorithm}/core/utility.py | 0 .../detection_algorithm}/core/utils/channel.py | 0 .../detection_algorithm}/core/utils/errors.py | 0 .../detection_algorithm}/core/utils/modeling.py | 0 .../detection_algorithm}/core/utils/utils.py | 0 .../feature_analysis}/AutoCorrelation.py | 0 .../feature_analysis}/BKFilter.py | 0 .../feature_analysis}/DiscreteCosineTransform.py | 0 .../feature_analysis}/FastFourierTransform.py | 0 .../feature_analysis}/HPFilter.py | 0 .../NonNegativeMatrixFactorization.py | 0 .../feature_analysis}/SKTruncatedSVD.py | 0 .../feature_analysis}/SpectralResidualTransform.py | 0 .../feature_analysis}/StatisticalAbsEnergy.py | 0 .../feature_analysis}/StatisticalAbsSum.py | 0 .../feature_analysis}/StatisticalGmean.py | 0 .../feature_analysis}/StatisticalHmean.py | 0 .../feature_analysis}/StatisticalKurtosis.py | 0 .../feature_analysis}/StatisticalMaximum.py | 0 .../feature_analysis}/StatisticalMean.py | 0 .../feature_analysis}/StatisticalMeanAbs.py | 0 .../StatisticalMeanAbsTemporalDerivative.py | 0 .../StatisticalMeanTemporalDerivative.py | 0 .../feature_analysis}/StatisticalMedian.py | 0 .../StatisticalMedianAbsoluteDeviation.py | 0 .../feature_analysis}/StatisticalMinimum.py | 0 .../feature_analysis}/StatisticalSkew.py | 0 .../feature_analysis}/StatisticalStd.py | 0 .../feature_analysis}/StatisticalVar.py | 0 .../feature_analysis}/StatisticalVariation.py | 0 .../feature_analysis}/StatisticalVecSum.py | 0 .../StatisticalWillisonAmplitude.py | 0 .../feature_analysis}/StatisticalZeroCrossing.py | 0 .../feature_analysis}/TRMF.py | 0 .../feature_analysis}/WaveletTransform.py | 0 .../feature_analysis}/__init__.py | 0 .../reinforcement}/RuleBasedFilter.py | 0 .../timeseries_processing}/.HoltSmoothing.py.swo | Bin .../timeseries_processing}/HoltSmoothing.py | 0 .../HoltWintersExponentialSmoothing.py | 0 .../MovingAverageTransform.py | 0 .../timeseries_processing}/SKAxiswiseScaler.py | 0 .../timeseries_processing}/SKPowerTransformer.py | 0 .../SKQuantileTransformer.py | 0 .../timeseries_processing}/SKStandardScaler.py | 0 .../SimpleExponentialSmoothing.py | 0 .../TimeSeriesSeasonalityTrendDecomposition.py | 0 .../timeseries_processing}/__init__.py | 0 377 files changed, 141 insertions(+), 55727 deletions(-) delete mode 100644 common-primitives/HISTORY.md delete mode 100644 common-primitives/HOW_TO_MANAGE.md delete mode 100644 common-primitives/LICENSE.txt delete mode 100644 common-primitives/MANIFEST.in delete mode 100644 common-primitives/README.md delete mode 100755 common-primitives/add.sh delete mode 100644 common-primitives/entry_points.ini delete mode 100755 common-primitives/git-add.sh delete mode 100755 common-primitives/git-check.sh delete mode 100755 common-primitives/list_primitives.py delete mode 100644 common-primitives/pipeline_runs/classification.light_gbm.DataFrameCommon/1.yaml.gz delete mode 100644 common-primitives/pipeline_runs/classification.random_forest.DataFrameCommon/1.yaml.gz delete mode 120000 common-primitives/pipeline_runs/classification.random_forest.DataFrameCommon/pipeline_run_extract_structural_types.yml.gz delete mode 100644 common-primitives/pipeline_runs/classification.xgboost_dart.DataFrameCommon/1.yaml.gz delete mode 100644 common-primitives/pipeline_runs/classification.xgboost_gbtree.DataFrameCommon/1.yaml.gz delete mode 100644 common-primitives/pipeline_runs/data_augmentation.datamart_augmentation.Common/2.yaml.gz delete mode 100644 common-primitives/pipeline_runs/data_preprocessing.dataset_sample.Common/1.yaml.gz delete mode 120000 common-primitives/pipeline_runs/data_preprocessing.one_hot_encoder.PandasCommon/pipeline_run_extract_structural_types.yml.gz delete mode 120000 common-primitives/pipeline_runs/data_transformation.column_parser.DataFrameCommon/1.yaml.gz delete mode 120000 common-primitives/pipeline_runs/data_transformation.column_parser.DataFrameCommon/pipeline_run_extract_structural_types.yml.gz delete mode 120000 common-primitives/pipeline_runs/data_transformation.column_parser.DataFrameCommon/pipeline_run_group_field_compose.yml.gz delete mode 120000 common-primitives/pipeline_runs/data_transformation.construct_predictions.DataFrameCommon/1.yaml.gz delete mode 120000 common-primitives/pipeline_runs/data_transformation.construct_predictions.DataFrameCommon/pipeline_run_extract_structural_types.yml.gz delete mode 120000 common-primitives/pipeline_runs/data_transformation.dataset_to_dataframe.Common/1.yaml.gz delete mode 120000 common-primitives/pipeline_runs/data_transformation.dataset_to_dataframe.Common/pipeline_run_extract_structural_types.yml.gz delete mode 120000 common-primitives/pipeline_runs/data_transformation.dataset_to_dataframe.Common/pipeline_run_group_field_compose.yml.gz delete mode 120000 common-primitives/pipeline_runs/data_transformation.extract_columns_by_semantic_types.DataFrameCommon/1.yaml.gz delete mode 120000 common-primitives/pipeline_runs/data_transformation.extract_columns_by_semantic_types.DataFrameCommon/pipeline_run_extract_structural_types.yml.gz delete mode 100644 common-primitives/pipeline_runs/data_transformation.extract_columns_by_structural_types.Common/pipeline_run.yml.gz delete mode 100644 common-primitives/pipeline_runs/data_transformation.grouping_field_compose.Common/pipeline_run.yml.gz delete mode 120000 common-primitives/pipeline_runs/data_transformation.horizontal_concat.DataFrameConcat/1.yaml.gz delete mode 120000 common-primitives/pipeline_runs/data_transformation.remove_columns.Common/pipeline_run_extract_structural_types.yml.gz delete mode 100644 common-primitives/pipeline_runs/regression.xgboost_gbtree.DataFrameCommon/1.yml delete mode 120000 common-primitives/pipeline_runs/schema_discovery.profiler.Common/pipeline_run_extract_structural_types.yml.gz delete mode 120000 common-primitives/pipeline_runs/schema_discovery.profiler.Common/pipeline_run_group_field_compose.yml.gz delete mode 100644 common-primitives/pipelines/classification.light_gbm.DataFrameCommon/d2473bbc-7839-4deb-9ba4-4ff4bc9b0bde.json delete mode 120000 common-primitives/pipelines/classification.random_forest.DataFrameCommon/b523335c-0c47-4d02-a582-f69609cde1e8.json delete mode 100644 common-primitives/pipelines/classification.random_forest.DataFrameCommon/ccad0f9c-130e-4063-a91e-ea65a18cb041.yaml delete mode 100644 common-primitives/pipelines/classification.xgboost_dart.DataFrameCommon/b7a24816-2518-4073-9c45-b97f2b2fee30.json delete mode 100644 common-primitives/pipelines/classification.xgboost_gbtree.DataFrameCommon/4d402450-2562-48cc-93fd-719fb658c43c.json delete mode 100644 common-primitives/pipelines/data_augmentation.datamart_augmentation.Common/3afd2bd2-7ba1-4ac1-928f-fad0c39a05e5.json delete mode 100644 common-primitives/pipelines/data_augmentation.datamart_augmentation.Common/4ff2f21d-1bba-4c44-bb96-e05728bcf6ed.json delete mode 100644 common-primitives/pipelines/data_preprocessing.dataset_sample.Common/387d432a-9893-4558-b190-1c5e9e399dbf.yaml delete mode 100644 common-primitives/pipelines/data_preprocessing.one_hot_encoder.MakerCommon/2b307634-f01e-412e-8d95-7e54afd4731f.json delete mode 120000 common-primitives/pipelines/data_preprocessing.one_hot_encoder.PandasCommon/b523335c-0c47-4d02-a582-f69609cde1e8.json delete mode 120000 common-primitives/pipelines/data_transformation.column_parser.DataFrameCommon/4ec215d1-6484-4502-a6dd-f659943ccb94.json delete mode 120000 common-primitives/pipelines/data_transformation.column_parser.DataFrameCommon/a8c40699-c48d-4f12-aa18-639c5fb6baae.json delete mode 120000 common-primitives/pipelines/data_transformation.column_parser.DataFrameCommon/b523335c-0c47-4d02-a582-f69609cde1e8.json delete mode 120000 common-primitives/pipelines/data_transformation.column_parser.DataFrameCommon/d2473bbc-7839-4deb-9ba4-4ff4bc9b0bde.json delete mode 120000 common-primitives/pipelines/data_transformation.construct_predictions.DataFrameCommon/4ec215d1-6484-4502-a6dd-f659943ccb94.json delete mode 120000 common-primitives/pipelines/data_transformation.construct_predictions.DataFrameCommon/b523335c-0c47-4d02-a582-f69609cde1e8.json delete mode 120000 common-primitives/pipelines/data_transformation.construct_predictions.DataFrameCommon/d2473bbc-7839-4deb-9ba4-4ff4bc9b0bde.json delete mode 120000 common-primitives/pipelines/data_transformation.dataset_to_dataframe.Common/4ec215d1-6484-4502-a6dd-f659943ccb94.json delete mode 120000 common-primitives/pipelines/data_transformation.dataset_to_dataframe.Common/a8c40699-c48d-4f12-aa18-639c5fb6baae.json delete mode 120000 common-primitives/pipelines/data_transformation.dataset_to_dataframe.Common/b523335c-0c47-4d02-a582-f69609cde1e8.json delete mode 120000 common-primitives/pipelines/data_transformation.dataset_to_dataframe.Common/d2473bbc-7839-4deb-9ba4-4ff4bc9b0bde.json delete mode 100644 common-primitives/pipelines/data_transformation.extract_columns.Common/4ec215d1-6484-4502-a6dd-f659943ccb94.json delete mode 100644 common-primitives/pipelines/data_transformation.extract_columns.Common/pipeline.py delete mode 120000 common-primitives/pipelines/data_transformation.extract_columns_by_semantic_types.DataFrameCommon/b523335c-0c47-4d02-a582-f69609cde1e8.json delete mode 120000 common-primitives/pipelines/data_transformation.extract_columns_by_semantic_types.DataFrameCommon/d2473bbc-7839-4deb-9ba4-4ff4bc9b0bde.json delete mode 100644 common-primitives/pipelines/data_transformation.extract_columns_by_structural_types.Common/b523335c-0c47-4d02-a582-f69609cde1e8.json delete mode 100644 common-primitives/pipelines/data_transformation.extract_columns_by_structural_types.Common/pipeline.py delete mode 100644 common-primitives/pipelines/data_transformation.grouping_field_compose.Common/a8c40699-c48d-4f12-aa18-639c5fb6baae.json delete mode 100644 common-primitives/pipelines/data_transformation.grouping_field_compose.Common/pipeline.py delete mode 120000 common-primitives/pipelines/data_transformation.horizontal_concat.DataFrameConcat/2b307634-f01e-412e-8d95-7e54afd4731f.json delete mode 120000 common-primitives/pipelines/data_transformation.remove_columns.Common/b523335c-0c47-4d02-a582-f69609cde1e8.json delete mode 100644 common-primitives/pipelines/data_transformation.rename_duplicate_name.DataFrameCommon/11ee9290-992d-4e48-97ed-1a6e4c15f92f.json delete mode 100644 common-primitives/pipelines/evaluation.kfold_timeseries_split.Common/k-fold-timeseries-split.yml delete mode 100644 common-primitives/pipelines/operator.dataset_map.DataFrameCommon/k-fold-timeseries-split-raw.yml delete mode 100644 common-primitives/pipelines/regression.xgboost_gbtree.DataFrameCommon/0f636602-6299-411b-9873-4b974cd393ba.json delete mode 120000 common-primitives/pipelines/schema_discovery.profiler.Common/4ec215d1-6484-4502-a6dd-f659943ccb94.json delete mode 120000 common-primitives/pipelines/schema_discovery.profiler.Common/a8c40699-c48d-4f12-aa18-639c5fb6baae.json delete mode 120000 common-primitives/pipelines/schema_discovery.profiler.Common/b523335c-0c47-4d02-a582-f69609cde1e8.json delete mode 100755 common-primitives/run_pipelines.sh delete mode 100755 common-primitives/run_tests.py delete mode 100644 common-primitives/setup.cfg delete mode 100644 common-primitives/setup.py delete mode 100644 common-primitives/sklearn-wrap/.gitignore delete mode 100644 common-primitives/sklearn-wrap/requirements.txt delete mode 100644 common-primitives/sklearn-wrap/setup.py delete mode 100644 common-primitives/sklearn-wrap/sklearn_wrap/SKARDRegression.py delete mode 100644 common-primitives/sklearn-wrap/sklearn_wrap/SKAdaBoostClassifier.py delete mode 100644 common-primitives/sklearn-wrap/sklearn_wrap/SKAdaBoostRegressor.py delete mode 100644 common-primitives/sklearn-wrap/sklearn_wrap/SKBaggingClassifier.py delete mode 100644 common-primitives/sklearn-wrap/sklearn_wrap/SKBaggingRegressor.py delete mode 100644 common-primitives/sklearn-wrap/sklearn_wrap/SKBernoulliNB.py delete mode 100644 common-primitives/sklearn-wrap/sklearn_wrap/SKBinarizer.py delete mode 100644 common-primitives/sklearn-wrap/sklearn_wrap/SKCountVectorizer.py delete mode 100644 common-primitives/sklearn-wrap/sklearn_wrap/SKDecisionTreeClassifier.py delete mode 100644 common-primitives/sklearn-wrap/sklearn_wrap/SKDecisionTreeRegressor.py delete mode 100644 common-primitives/sklearn-wrap/sklearn_wrap/SKDummyClassifier.py delete mode 100644 common-primitives/sklearn-wrap/sklearn_wrap/SKDummyRegressor.py delete mode 100644 common-primitives/sklearn-wrap/sklearn_wrap/SKElasticNet.py delete mode 100644 common-primitives/sklearn-wrap/sklearn_wrap/SKExtraTreesClassifier.py delete mode 100644 common-primitives/sklearn-wrap/sklearn_wrap/SKExtraTreesRegressor.py delete mode 100644 common-primitives/sklearn-wrap/sklearn_wrap/SKFastICA.py delete mode 100644 common-primitives/sklearn-wrap/sklearn_wrap/SKFeatureAgglomeration.py delete mode 100644 common-primitives/sklearn-wrap/sklearn_wrap/SKGaussianNB.py delete mode 100644 common-primitives/sklearn-wrap/sklearn_wrap/SKGaussianProcessRegressor.py delete mode 100644 common-primitives/sklearn-wrap/sklearn_wrap/SKGaussianRandomProjection.py delete mode 100644 common-primitives/sklearn-wrap/sklearn_wrap/SKGenericUnivariateSelect.py delete mode 100644 common-primitives/sklearn-wrap/sklearn_wrap/SKGradientBoostingClassifier.py delete mode 100644 common-primitives/sklearn-wrap/sklearn_wrap/SKGradientBoostingRegressor.py delete mode 100644 common-primitives/sklearn-wrap/sklearn_wrap/SKImputer.py delete mode 100644 common-primitives/sklearn-wrap/sklearn_wrap/SKKNeighborsClassifier.py delete mode 100644 common-primitives/sklearn-wrap/sklearn_wrap/SKKNeighborsRegressor.py delete mode 100644 common-primitives/sklearn-wrap/sklearn_wrap/SKKernelPCA.py delete mode 100644 common-primitives/sklearn-wrap/sklearn_wrap/SKKernelRidge.py delete mode 100644 common-primitives/sklearn-wrap/sklearn_wrap/SKLars.py delete mode 100644 common-primitives/sklearn-wrap/sklearn_wrap/SKLasso.py delete mode 100644 common-primitives/sklearn-wrap/sklearn_wrap/SKLassoCV.py delete mode 100644 common-primitives/sklearn-wrap/sklearn_wrap/SKLinearDiscriminantAnalysis.py delete mode 100644 common-primitives/sklearn-wrap/sklearn_wrap/SKLinearRegression.py delete mode 100644 common-primitives/sklearn-wrap/sklearn_wrap/SKLinearSVC.py delete mode 100644 common-primitives/sklearn-wrap/sklearn_wrap/SKLinearSVR.py delete mode 100644 common-primitives/sklearn-wrap/sklearn_wrap/SKLogisticRegression.py delete mode 100644 common-primitives/sklearn-wrap/sklearn_wrap/SKMLPClassifier.py delete mode 100644 common-primitives/sklearn-wrap/sklearn_wrap/SKMLPRegressor.py delete mode 100644 common-primitives/sklearn-wrap/sklearn_wrap/SKMaxAbsScaler.py delete mode 100644 common-primitives/sklearn-wrap/sklearn_wrap/SKMinMaxScaler.py delete mode 100644 common-primitives/sklearn-wrap/sklearn_wrap/SKMissingIndicator.py delete mode 100644 common-primitives/sklearn-wrap/sklearn_wrap/SKMultinomialNB.py delete mode 100644 common-primitives/sklearn-wrap/sklearn_wrap/SKNearestCentroid.py delete mode 100644 common-primitives/sklearn-wrap/sklearn_wrap/SKNormalizer.py delete mode 100644 common-primitives/sklearn-wrap/sklearn_wrap/SKNystroem.py delete mode 100644 common-primitives/sklearn-wrap/sklearn_wrap/SKOneHotEncoder.py delete mode 100644 common-primitives/sklearn-wrap/sklearn_wrap/SKOrdinalEncoder.py delete mode 100644 common-primitives/sklearn-wrap/sklearn_wrap/SKPCA.py delete mode 100644 common-primitives/sklearn-wrap/sklearn_wrap/SKPassiveAggressiveClassifier.py delete mode 100644 common-primitives/sklearn-wrap/sklearn_wrap/SKPassiveAggressiveRegressor.py delete mode 100644 common-primitives/sklearn-wrap/sklearn_wrap/SKPolynomialFeatures.py delete mode 100644 common-primitives/sklearn-wrap/sklearn_wrap/SKQuadraticDiscriminantAnalysis.py delete mode 100644 common-primitives/sklearn-wrap/sklearn_wrap/SKQuantileTransformer.py delete mode 100644 common-primitives/sklearn-wrap/sklearn_wrap/SKRBFSampler.py delete mode 100644 common-primitives/sklearn-wrap/sklearn_wrap/SKRandomForestClassifier.py delete mode 100644 common-primitives/sklearn-wrap/sklearn_wrap/SKRandomForestRegressor.py delete mode 100644 common-primitives/sklearn-wrap/sklearn_wrap/SKRandomTreesEmbedding.py delete mode 100644 common-primitives/sklearn-wrap/sklearn_wrap/SKRidge.py delete mode 100644 common-primitives/sklearn-wrap/sklearn_wrap/SKRobustScaler.py delete mode 100644 common-primitives/sklearn-wrap/sklearn_wrap/SKSGDClassifier.py delete mode 100644 common-primitives/sklearn-wrap/sklearn_wrap/SKSGDRegressor.py delete mode 100644 common-primitives/sklearn-wrap/sklearn_wrap/SKSVC.py delete mode 100644 common-primitives/sklearn-wrap/sklearn_wrap/SKSVR.py delete mode 100644 common-primitives/sklearn-wrap/sklearn_wrap/SKSelectFwe.py delete mode 100644 common-primitives/sklearn-wrap/sklearn_wrap/SKSelectPercentile.py delete mode 100644 common-primitives/sklearn-wrap/sklearn_wrap/SKSparseRandomProjection.py delete mode 100644 common-primitives/sklearn-wrap/sklearn_wrap/SKStandardScaler.py delete mode 100644 common-primitives/sklearn-wrap/sklearn_wrap/SKStringImputer.py delete mode 100644 common-primitives/sklearn-wrap/sklearn_wrap/SKTfidfVectorizer.py delete mode 100644 common-primitives/sklearn-wrap/sklearn_wrap/SKTruncatedSVD.py delete mode 100644 common-primitives/sklearn-wrap/sklearn_wrap/SKVarianceThreshold.py delete mode 100644 common-primitives/sklearn-wrap/sklearn_wrap/__init__.py delete mode 100644 common-primitives/tests/test_audio_reader.py delete mode 100644 common-primitives/tests/test_cast_to_type.py delete mode 100644 common-primitives/tests/test_column_map.py delete mode 100644 common-primitives/tests/test_column_parser.py delete mode 100644 common-primitives/tests/test_compute_metafeatures.py delete mode 100644 common-primitives/tests/test_construct_predictions.py delete mode 100644 common-primitives/tests/test_csv_reader.py delete mode 100644 common-primitives/tests/test_cut_audio.py delete mode 100644 common-primitives/tests/test_dataframe_flatten.py delete mode 100644 common-primitives/tests/test_dataframe_image_reader.py delete mode 100644 common-primitives/tests/test_dataframe_to_list.py delete mode 100644 common-primitives/tests/test_dataframe_to_ndarray.py delete mode 100644 common-primitives/tests/test_dataframe_utils.py delete mode 100644 common-primitives/tests/test_dataset_map.py delete mode 100644 common-primitives/tests/test_dataset_sample.py delete mode 100644 common-primitives/tests/test_dataset_to_dataframe.py delete mode 100644 common-primitives/tests/test_datetime_field_compose.py delete mode 100644 common-primitives/tests/test_datetime_range_filter.py delete mode 100644 common-primitives/tests/test_denormalize.py delete mode 100644 common-primitives/tests/test_extract_columns_semantic_types.py delete mode 100644 common-primitives/tests/test_extract_columns_structural_types.py delete mode 100644 common-primitives/tests/test_fixed_split.py delete mode 100644 common-primitives/tests/test_grouping_field_compose.py delete mode 100644 common-primitives/tests/test_horizontal_concat.py delete mode 100644 common-primitives/tests/test_kfold_split.py delete mode 100644 common-primitives/tests/test_kfold_timeseries_split.py delete mode 100644 common-primitives/tests/test_lgbm_classifier.py delete mode 100644 common-primitives/tests/test_list_to_dataframe.py delete mode 100644 common-primitives/tests/test_list_to_ndarray.py delete mode 100644 common-primitives/tests/test_ndarray_to_dataframe.py delete mode 100644 common-primitives/tests/test_ndarray_to_list.py delete mode 100644 common-primitives/tests/test_no_split.py delete mode 100644 common-primitives/tests/test_normalize_column_references.py delete mode 100644 common-primitives/tests/test_normalize_graphs.py delete mode 100644 common-primitives/tests/test_numeric_range_filter.py delete mode 100644 common-primitives/tests/test_one_hot_maker.py delete mode 100644 common-primitives/tests/test_pandas_onehot_encoder.py delete mode 100644 common-primitives/tests/test_random_forest.py delete mode 100644 common-primitives/tests/test_ravel.py delete mode 100644 common-primitives/tests/test_redact_columns.py delete mode 100644 common-primitives/tests/test_regex_filter.py delete mode 100644 common-primitives/tests/test_remove_duplicate_columns.py delete mode 100644 common-primitives/tests/test_rename_duplicate_columns.py delete mode 100644 common-primitives/tests/test_replace_semantic_types.py delete mode 100644 common-primitives/tests/test_simple_profiler.py delete mode 100644 common-primitives/tests/test_stack_ndarray_column.py delete mode 100644 common-primitives/tests/test_tabular_extractor.py delete mode 100644 common-primitives/tests/test_term_filter.py delete mode 100644 common-primitives/tests/test_text_reader.py delete mode 100644 common-primitives/tests/test_train_score_split.py delete mode 100644 common-primitives/tests/test_unseen_label_decoder.py delete mode 100644 common-primitives/tests/test_unseen_label_encoder.py delete mode 100644 common-primitives/tests/test_video_reader.py delete mode 100644 common-primitives/tests/test_xgboost_dart.py delete mode 100644 common-primitives/tests/test_xgboost_gbtree.py delete mode 100644 common-primitives/tests/test_xgboost_regressor.py delete mode 100644 common-primitives/tests/utils.py create mode 100644 entry_points_common.ini rename {common-primitives => tods}/common_primitives/__init__.py (100%) rename {common-primitives => tods}/common_primitives/add_semantic_types.py (100%) rename {common-primitives => tods}/common_primitives/audio_reader.py (100%) rename {common-primitives => tods}/common_primitives/base.py (100%) rename {common-primitives => tods}/common_primitives/cast_to_type.py (100%) rename {common-primitives => tods}/common_primitives/column_map.py (100%) rename {common-primitives => tods}/common_primitives/column_parser.py (100%) rename {common-primitives => tods}/common_primitives/compute_metafeatures.py (100%) rename {common-primitives => tods}/common_primitives/construct_predictions.py (100%) rename {common-primitives => tods}/common_primitives/csv_reader.py (100%) rename {common-primitives => tods}/common_primitives/cut_audio.py (100%) rename {common-primitives => tods}/common_primitives/dataframe_flatten.py (100%) rename {common-primitives => tods}/common_primitives/dataframe_image_reader.py (100%) rename {common-primitives => tods}/common_primitives/dataframe_to_list.py (100%) rename {common-primitives => tods}/common_primitives/dataframe_to_ndarray.py (100%) rename {common-primitives => tods}/common_primitives/dataframe_utils.py (100%) rename {common-primitives => tods}/common_primitives/datamart_augment.py (100%) rename {common-primitives => tods}/common_primitives/datamart_download.py (100%) rename {common-primitives => tods}/common_primitives/dataset_map.py (100%) rename {common-primitives => tods}/common_primitives/dataset_sample.py (100%) rename {common-primitives => tods}/common_primitives/dataset_to_dataframe.py (99%) rename {common-primitives => tods}/common_primitives/dataset_utils.py (100%) rename {common-primitives => tods}/common_primitives/datetime_field_compose.py (100%) rename {common-primitives => tods}/common_primitives/datetime_range_filter.py (100%) rename {common-primitives => tods}/common_primitives/denormalize.py (100%) rename {common-primitives => tods}/common_primitives/extract_columns.py (100%) rename {common-primitives => tods}/common_primitives/extract_columns_semantic_types.py (100%) rename {common-primitives => tods}/common_primitives/extract_columns_structural_types.py (100%) rename {common-primitives => tods}/common_primitives/fixed_split.py (100%) rename {common-primitives => tods}/common_primitives/grouping_field_compose.py (100%) rename {common-primitives => tods}/common_primitives/holt_smoothing.py (100%) rename {common-primitives => tods}/common_primitives/holt_winters_exponential_smoothing.py (100%) rename {common-primitives => tods}/common_primitives/horizontal_concat.py (100%) rename {common-primitives => tods}/common_primitives/kfold_split.py (100%) rename {common-primitives => tods}/common_primitives/kfold_split_timeseries.py (100%) rename {common-primitives => tods}/common_primitives/lgbm_classifier.py (100%) rename {common-primitives => tods}/common_primitives/list_to_dataframe.py (100%) rename {common-primitives => tods}/common_primitives/list_to_ndarray.py (100%) rename {common-primitives => tods}/common_primitives/mean_average_transform.py (100%) rename {common-primitives => tods}/common_primitives/ndarray_to_dataframe.py (100%) rename {common-primitives => tods}/common_primitives/ndarray_to_list.py (100%) rename {common-primitives => tods}/common_primitives/no_split.py (100%) rename {common-primitives => tods}/common_primitives/normalize_column_references.py (100%) rename {common-primitives => tods}/common_primitives/normalize_graphs.py (100%) rename {common-primitives => tods}/common_primitives/numeric_range_filter.py (100%) rename {common-primitives => tods}/common_primitives/one_hot_maker.py (100%) rename {common-primitives => tods}/common_primitives/pandas_onehot_encoder.py (100%) rename {common-primitives => tods}/common_primitives/random_forest.py (100%) rename {common-primitives => tods}/common_primitives/ravel.py (100%) rename {common-primitives => tods}/common_primitives/redact_columns.py (100%) rename {common-primitives => tods}/common_primitives/regex_filter.py (100%) rename {common-primitives => tods}/common_primitives/remove_columns.py (100%) rename {common-primitives => tods}/common_primitives/remove_duplicate_columns.py (100%) rename {common-primitives => tods}/common_primitives/remove_semantic_types.py (100%) rename {common-primitives => tods}/common_primitives/rename_duplicate_columns.py (100%) rename {common-primitives => tods}/common_primitives/replace_semantic_types.py (100%) rename {common-primitives => tods}/common_primitives/simple_exponential_smoothing.py (100%) rename {common-primitives => tods}/common_primitives/simple_profiler.py (100%) rename {common-primitives => tods}/common_primitives/slacker/README.md (100%) rename {common-primitives => tods}/common_primitives/slacker/__init__.py (100%) rename {common-primitives => tods}/common_primitives/slacker/base.py (100%) rename {common-primitives => tods}/common_primitives/slacker/estimation.py (100%) rename {common-primitives => tods}/common_primitives/slacker/feature_extraction.py (100%) rename {common-primitives => tods}/common_primitives/slacker/feature_selection.py (100%) rename {common-primitives => tods}/common_primitives/stack_ndarray_column.py (100%) rename {common-primitives => tods}/common_primitives/tabular_extractor.py (100%) rename {common-primitives => tods}/common_primitives/term_filter.py (100%) rename {common-primitives => tods}/common_primitives/text_reader.py (100%) rename {common-primitives => tods}/common_primitives/train_score_split.py (100%) rename {common-primitives => tods}/common_primitives/unseen_label_decoder.py (100%) rename {common-primitives => tods}/common_primitives/unseen_label_encoder.py (100%) rename {common-primitives => tods}/common_primitives/utils.py (100%) rename {common-primitives => tods}/common_primitives/video_reader.py (100%) rename {common-primitives => tods}/common_primitives/xgboost_dart.py (100%) rename {common-primitives => tods}/common_primitives/xgboost_gbtree.py (100%) rename {common-primitives => tods}/common_primitives/xgboost_regressor.py (100%) rename {data_processing => tods/data_processing}/CategoricalToBinary.py (100%) rename {data_processing => tods/data_processing}/ColumnFilter.py (100%) rename {data_processing => tods/data_processing}/ContinuityValidation.py (100%) rename {data_processing => tods/data_processing}/DatasetToDataframe.py (100%) rename {data_processing => tods/data_processing}/DuplicationValidation.py (100%) rename {data_processing => tods/data_processing}/TimeIntervalTransform.py (100%) rename {data_processing => tods/data_processing}/TimeStampValidation.py (100%) rename {data_processing => tods/data_processing}/__init__.py (100%) rename {detection_algorithm => tods/detection_algorithm}/AutoRegODetect.py (100%) rename {detection_algorithm => tods/detection_algorithm}/DeepLog.py (100%) rename {detection_algorithm => tods/detection_algorithm}/KDiscordODetect.py (100%) rename {detection_algorithm => tods/detection_algorithm}/LSTMODetect.py (100%) rename {detection_algorithm => tods/detection_algorithm}/MatrixProfile.py (100%) rename {detection_algorithm => tods/detection_algorithm}/PCAODetect.py (100%) rename {detection_algorithm => tods/detection_algorithm}/PyodABOD.py (100%) rename {detection_algorithm => tods/detection_algorithm}/PyodAE.py (100%) rename {detection_algorithm => tods/detection_algorithm}/PyodCBLOF.py (100%) rename {detection_algorithm => tods/detection_algorithm}/PyodCOF.py (100%) rename {detection_algorithm => tods/detection_algorithm}/PyodHBOS.py (100%) rename {detection_algorithm => tods/detection_algorithm}/PyodIsolationForest.py (100%) rename {detection_algorithm => tods/detection_algorithm}/PyodKNN.py (100%) rename {detection_algorithm => tods/detection_algorithm}/PyodLODA.py (100%) rename {detection_algorithm => tods/detection_algorithm}/PyodLOF.py (100%) rename {detection_algorithm => tods/detection_algorithm}/PyodMoGaal.py (100%) rename {detection_algorithm => tods/detection_algorithm}/PyodOCSVM.py (100%) rename {detection_algorithm => tods/detection_algorithm}/PyodSOD.py (100%) rename {detection_algorithm => tods/detection_algorithm}/PyodSoGaal.py (100%) rename {detection_algorithm => tods/detection_algorithm}/PyodVAE.py (100%) rename {detection_algorithm => tods/detection_algorithm}/Telemanom.py (100%) rename {detection_algorithm => tods/detection_algorithm}/UODBasePrimitive.py (100%) rename {detection_algorithm => tods/detection_algorithm}/core/AutoRegOD.py (100%) rename {detection_algorithm => tods/detection_algorithm}/core/CollectiveBase.py (100%) rename {detection_algorithm => tods/detection_algorithm}/core/CollectiveCommonTest.py (100%) rename {detection_algorithm => tods/detection_algorithm}/core/KDiscord.py (100%) rename {detection_algorithm => tods/detection_algorithm}/core/LSTMOD.py (100%) rename {detection_algorithm => tods/detection_algorithm}/core/MultiAutoRegOD.py (100%) rename {detection_algorithm => tods/detection_algorithm}/core/PCA.py (100%) rename {detection_algorithm => tods/detection_algorithm}/core/UODCommonTest.py (100%) rename {detection_algorithm => tods/detection_algorithm}/core/algorithm_implementation.py (100%) rename {detection_algorithm => tods/detection_algorithm}/core/test_CollectiveBase.py (100%) rename {detection_algorithm => tods/detection_algorithm}/core/utility.py (100%) rename {detection_algorithm => tods/detection_algorithm}/core/utils/channel.py (100%) rename {detection_algorithm => tods/detection_algorithm}/core/utils/errors.py (100%) rename {detection_algorithm => tods/detection_algorithm}/core/utils/modeling.py (100%) rename {detection_algorithm => tods/detection_algorithm}/core/utils/utils.py (100%) rename {feature_analysis => tods/feature_analysis}/AutoCorrelation.py (100%) rename {feature_analysis => tods/feature_analysis}/BKFilter.py (100%) rename {feature_analysis => tods/feature_analysis}/DiscreteCosineTransform.py (100%) rename {feature_analysis => tods/feature_analysis}/FastFourierTransform.py (100%) rename {feature_analysis => tods/feature_analysis}/HPFilter.py (100%) rename {feature_analysis => tods/feature_analysis}/NonNegativeMatrixFactorization.py (100%) rename {feature_analysis => tods/feature_analysis}/SKTruncatedSVD.py (100%) rename {feature_analysis => tods/feature_analysis}/SpectralResidualTransform.py (100%) rename {feature_analysis => tods/feature_analysis}/StatisticalAbsEnergy.py (100%) rename {feature_analysis => tods/feature_analysis}/StatisticalAbsSum.py (100%) rename {feature_analysis => tods/feature_analysis}/StatisticalGmean.py (100%) rename {feature_analysis => tods/feature_analysis}/StatisticalHmean.py (100%) rename {feature_analysis => tods/feature_analysis}/StatisticalKurtosis.py (100%) rename {feature_analysis => tods/feature_analysis}/StatisticalMaximum.py (100%) rename {feature_analysis => tods/feature_analysis}/StatisticalMean.py (100%) rename {feature_analysis => tods/feature_analysis}/StatisticalMeanAbs.py (100%) rename {feature_analysis => tods/feature_analysis}/StatisticalMeanAbsTemporalDerivative.py (100%) rename {feature_analysis => tods/feature_analysis}/StatisticalMeanTemporalDerivative.py (100%) rename {feature_analysis => tods/feature_analysis}/StatisticalMedian.py (100%) rename {feature_analysis => tods/feature_analysis}/StatisticalMedianAbsoluteDeviation.py (100%) rename {feature_analysis => tods/feature_analysis}/StatisticalMinimum.py (100%) rename {feature_analysis => tods/feature_analysis}/StatisticalSkew.py (100%) rename {feature_analysis => tods/feature_analysis}/StatisticalStd.py (100%) rename {feature_analysis => tods/feature_analysis}/StatisticalVar.py (100%) rename {feature_analysis => tods/feature_analysis}/StatisticalVariation.py (100%) rename {feature_analysis => tods/feature_analysis}/StatisticalVecSum.py (100%) rename {feature_analysis => tods/feature_analysis}/StatisticalWillisonAmplitude.py (100%) rename {feature_analysis => tods/feature_analysis}/StatisticalZeroCrossing.py (100%) rename {feature_analysis => tods/feature_analysis}/TRMF.py (100%) rename {feature_analysis => tods/feature_analysis}/WaveletTransform.py (100%) rename {feature_analysis => tods/feature_analysis}/__init__.py (100%) rename {reinforcement => tods/reinforcement}/RuleBasedFilter.py (100%) rename {timeseries_processing => tods/timeseries_processing}/.HoltSmoothing.py.swo (100%) rename {timeseries_processing => tods/timeseries_processing}/HoltSmoothing.py (100%) rename {timeseries_processing => tods/timeseries_processing}/HoltWintersExponentialSmoothing.py (100%) rename {timeseries_processing => tods/timeseries_processing}/MovingAverageTransform.py (100%) rename {timeseries_processing => tods/timeseries_processing}/SKAxiswiseScaler.py (100%) rename {timeseries_processing => tods/timeseries_processing}/SKPowerTransformer.py (100%) rename {timeseries_processing => tods/timeseries_processing}/SKQuantileTransformer.py (100%) rename {timeseries_processing => tods/timeseries_processing}/SKStandardScaler.py (100%) rename {timeseries_processing => tods/timeseries_processing}/SimpleExponentialSmoothing.py (100%) rename {timeseries_processing => tods/timeseries_processing}/TimeSeriesSeasonalityTrendDecomposition.py (100%) rename {timeseries_processing => tods/timeseries_processing}/__init__.py (100%) diff --git a/common-primitives/HISTORY.md b/common-primitives/HISTORY.md deleted file mode 100644 index 5daa8a3..0000000 --- a/common-primitives/HISTORY.md +++ /dev/null @@ -1,363 +0,0 @@ -## v0.8.0 - -* Removed multi-targets support in `classification.light_gbm.Common` and fixed - categorical attributes handling. - [!118](https://gitlab.com/datadrivendiscovery/common-primitives/merge_requests/118) -* Unified date parsing across primitives. - Added `raise_error` hyper-parameter to `data_preprocessing.datetime_range_filter.Common`. - This bumped the version of the primitive. - [!117](https://gitlab.com/datadrivendiscovery/common-primitives/merge_requests/117) -* `evaluation.kfold_time_series_split.Common` now parses the datetime column - before sorting. `fuzzy_time_parsing` hyper-parameter was added to the primitive. - This bumped the version of the primitive. - [!110](https://gitlab.com/datadrivendiscovery/common-primitives/merge_requests/110) -* Added option `equal` to hyper-parameter `match_logic` of primitive - `data_transformation.extract_columns_by_semantic_types.Common` to support set equality - when determining columns to extract. This bumped the version of the primitive. - [!116](https://gitlab.com/datadrivendiscovery/common-primitives/merge_requests/116) -* Fixed `data_preprocessing.one_hot_encoder.MakerCommon` to work with the - latest core package. -* `data_cleaning.tabular_extractor.Common` has been fixed to work with the - latest version of sklearn. - [!113](https://gitlab.com/datadrivendiscovery/common-primitives/merge_requests/113) -* ISI side of `data_augmentation.datamart_augmentation.Common` and - `data_augmentation.datamart_download.Common` has been updated. - [!108](https://gitlab.com/datadrivendiscovery/common-primitives/merge_requests/108) -* Improved how pipelines and pipeline runs for all primitives are managed. - Many more pipelines and pipeline runs were added. -* `evaluation.kfold_timeseries_split.Common` has been renamed to `evaluation.kfold_time_series_split.Common`. -* Fixed `data_preprocessing.dataset_sample.Common` on empty input. - [!95](https://gitlab.com/datadrivendiscovery/common-primitives/merge_requests/95) -* `data_preprocessing.datetime_range_filter.Common` does not assume local timezone - when parsing dates. - [#115](https://gitlab.com/datadrivendiscovery/common-primitives/issues/115) -* Added `fuzzy_time_parsing` hyper-parameter to `data_transformation.column_parser.Common`. - This bumped the version of the primitive. -* Fixed `data_transformation.column_parser.Common` to work correctly with `python-dateutil==2.8.1`. - [#119](https://gitlab.com/datadrivendiscovery/common-primitives/issues/119). -* Refactored `data_preprocessing.one_hot_encoder.MakerCommon` to address some issues. - [#66](https://gitlab.com/datadrivendiscovery/common-primitives/issues/66) - [#75](https://gitlab.com/datadrivendiscovery/common-primitives/issues/75) - [!96](https://gitlab.com/datadrivendiscovery/common-primitives/merge_requests/96) -* Added support for handling of numeric columns to `data_preprocessing.regex_filter.Common` and `data_preprocessing.term_filter.Common`. - [!101](https://gitlab.com/datadrivendiscovery/common-primitives/merge_requests/101) - [!104](https://gitlab.com/datadrivendiscovery/common-primitives/merge_requests/104) -* Fixed exception in `produce` method in `data_transformation.datetime_field_compose.Common` caused by using incorrect type for dataframe indexer. - [!102](https://gitlab.com/datadrivendiscovery/common-primitives/merge_requests/102) -* Added primitives: - * `data_transformation.grouping_field_compose.Common` - -## v0.7.0 - -* Renamed primitives: - * `data_transformation.add_semantic_types.DataFrameCommon` to `data_transformation.add_semantic_types.Common` - * `data_transformation.remove_semantic_types.DataFrameCommon` to `data_transformation.remove_semantic_types.Common` - * `data_transformation.replace_semantic_types.DataFrameCommon` to `data_transformation.replace_semantic_types.Common` - * `operator.column_map.DataFrameCommon` to `operator.column_map.Common` - * `regression.xgboost_gbtree.DataFrameCommon` to `regression.xgboost_gbtree.Common` - * `classification.light_gbm.DataFrameCommon` to `classification.light_gbm.Common` - * `classification.xgboost_gbtree.DataFrameCommon` to `classification.xgboost_gbtree.Common` - * `classification.xgboost_dart.DataFrameCommon` to `classification.xgboost_dart.Common` - * `classification.random_forest.DataFrameCommon` to `classification.random_forest.Common` - * `data_transformation.extract_columns.DataFrameCommon` to `data_transformation.extract_columns.Common` - * `data_transformation.extract_columns_by_semantic_types.DataFrameCommon` to `data_transformation.extract_columns_by_semantic_types.Common` - * `data_transformation.extract_columns_by_structural_types.DataFrameCommon` to `data_transformation.extract_columns_by_structural_types.Common` - * `data_transformation.cut_audio.DataFrameCommon` to `data_transformation.cut_audio.Common` - * `data_transformation.column_parser.DataFrameCommon` to `data_transformation.column_parser.Common` - * `data_transformation.remove_columns.DataFrameCommon` to `data_transformation.remove_columns.Common` - * `data_transformation.remove_duplicate_columns.DataFrameCommon` to `data_transformation.remove_duplicate_columns.Common` - * `data_transformation.horizontal_concat.DataFrameConcat` to `data_transformation.horizontal_concat.DataFrameCommon` - * `data_transformation.construct_predictions.DataFrameCommon` to `data_transformation.construct_predictions.Common` - * `data_transformation.datetime_field_compose.DataFrameCommon` to `data_transformation.datetime_field_compose.Common` - * `data_preprocessing.label_encoder.DataFrameCommon` to `data_preprocessing.label_encoder.Common` - * `data_preprocessing.label_decoder.DataFrameCommon` to `data_preprocessing.label_decoder.Common` - * `data_preprocessing.image_reader.DataFrameCommon` to `data_preprocessing.image_reader.Common` - * `data_preprocessing.text_reader.DataFrameCommon` to `data_preprocessing.text_reader.Common` - * `data_preprocessing.video_reader.DataFrameCommon` to `data_preprocessing.video_reader.Common` - * `data_preprocessing.csv_reader.DataFrameCommon` to `data_preprocessing.csv_reader.Common` - * `data_preprocessing.audio_reader.DataFrameCommon` to `data_preprocessing.audio_reader.Common` - * `data_preprocessing.regex_filter.DataFrameCommon` to `data_preprocessing.regex_filter.Common` - * `data_preprocessing.term_filter.DataFrameCommon` to `data_preprocessing.term_filter.Common` - * `data_preprocessing.numeric_range_filter.DataFrameCommon` to `data_preprocessing.numeric_range_filter.Common` - * `data_preprocessing.datetime_range_filter.DataFrameCommon` to `data_preprocessing.datetime_range_filter.Common` - -## v0.6.0 - -* Added `match_logic`, `negate`, and `add_index_columns` hyper-parameters - to `data_transformation.extract_columns_by_structural_types.DataFrameCommon` - and `data_transformation.extract_columns_by_semantic_types.DataFrameCommon` - primitives. -* `feature_extraction.sparse_pca.Common` has been removed and is now available as part of realML. - [!89](https://gitlab.com/datadrivendiscovery/common-primitives/merge_requests/89) -* Added new primitives: - * `data_preprocessing.datetime_range_filter.DataFrameCommon` - * `data_transformation.datetime_field_compose.DataFrameCommon` - * `d3m.primitives.data_preprocessing.flatten.DataFrameCommon` - * `data_augmentation.datamart_augmentation.Common` - * `data_augmentation.datamart_download.Common` - * `data_preprocessing.dataset_sample.Common` - - [#53](https://gitlab.com/datadrivendiscovery/common-primitives/issues/53) - [!86](https://gitlab.com/datadrivendiscovery/common-primitives/merge_requests/86) - [!87](https://gitlab.com/datadrivendiscovery/common-primitives/merge_requests/87) - [!85](https://gitlab.com/datadrivendiscovery/common-primitives/merge_requests/85) - [!63](https://gitlab.com/datadrivendiscovery/common-primitives/merge_requests/63) - [!92](https://gitlab.com/datadrivendiscovery/common-primitives/merge_requests/92) - [!93](https://gitlab.com/datadrivendiscovery/common-primitives/merge_requests/93) - [!81](https://gitlab.com/datadrivendiscovery/common-primitives/merge_requests/81) - -* Fixed `fit` method to return correct value for `operator.column_map.DataFrameCommon`, - `operator.dataset_map.DataFrameCommon`, and `schema_discovery.profiler.Common`. -* Some not maintained primitives have been disabled. If you are using them, consider adopting them. - * `classification.bayesian_logistic_regression.Common` - * `regression.convolutional_neural_net.TorchCommon` - * `operator.diagonal_mvn.Common` - * `regression.feed_forward_neural_net.TorchCommon` - * `data_preprocessing.image_reader.Common` - * `clustering.k_means.Common` - * `regression.linear_regression.Common` - * `regression.loss.TorchCommon` - * `feature_extraction.pca.Common` -* `data_transformation.update_semantic_types.DatasetCommon` has been removed. - Use `data_transformation.add_semantic_types.DataFrameCommon`, - `data_transformation.remove_semantic_types.DataFrameCommon`, - or `data_transformation.replace_semantic_types.DataFrameCommon` together with - `operator.dataset_map.DataFrameCommon` primitive to obtain previous functionality. - [#83](https://gitlab.com/datadrivendiscovery/common-primitives/issues/83) -* `data_transformation.remove_columns.DatasetCommon` has been removed. - Use `data_transformation.remove_columns.DataFrameCommon` together with - `operator.dataset_map.DataFrameCommon` primitive to obtain previous functionality. - [#83](https://gitlab.com/datadrivendiscovery/common-primitives/issues/83) -* Some primitives which operate on Dataset have been converted to operate - on DataFrame and renamed. Use them together with `operator.dataset_map.DataFrameCommon` - primitive to obtain previous functionality. - * `data_preprocessing.regex_filter.DatasetCommon` to `data_preprocessing.regex_filter.DataFrameCommon` - * `data_preprocessing.term_filter.DatasetCommon` to `data_preprocessing.term_filter.DataFrameCommon` - * `data_preprocessing.numeric_range_filter.DatasetCommon` to `data_preprocessing.numeric_range_filter.DataFrameCommon` - - [#83](https://gitlab.com/datadrivendiscovery/common-primitives/issues/83) - [!84](https://gitlab.com/datadrivendiscovery/common-primitives/merge_requests/84) - -* `schema_discovery.profiler.Common` has been improved: - * More options added to `detect_semantic_types`. - * Added new `remove_unknown_type` hyper-parameter. - -## v0.5.0 - -* `evaluation.compute_scores.Common` primitive has been moved to the core - package and renamed to `evaluation.compute_scores.Core`. -* `metafeature_extraction.compute_metafeatures.Common` has been renamed to - `metalearning.metafeature_extractor.Common` -* `evaluation.compute_scores.Common` has now a `add_normalized_scores` hyper-parameter - to control adding also a column with normalized scores to the output, which is now - added by default. -* `data_preprocessing.text_reader.DataFrameCommon` primitive has been fixed. -* `data_transformation.rename_duplicate_name.DataFrameCommon` primitive was - fixed to handle all types of column names. - [#73](https://gitlab.com/datadrivendiscovery/common-primitives/issues/73) - [!65](https://gitlab.com/datadrivendiscovery/common-primitives/merge_requests/65) -* Added new primitives: - * `data_cleaning.tabular_extractor.Common` - * `data_preprocessing.one_hot_encoder.PandasCommon` - * `schema_discovery.profiler.Common` - * `data_transformation.ravel.DataFrameRowCommon` - * `operator.column_map.DataFrameCommon` - * `operator.dataset_map.DataFrameCommon` - * `data_transformation.normalize_column_references.Common` - * `data_transformation.normalize_graphs.Common` - * `feature_extraction.sparse_pca.Common` - * `evaluation.kfold_timeseries_split.Common` - - [#57](https://gitlab.com/datadrivendiscovery/common-primitives/issues/57) - [!42](https://gitlab.com/datadrivendiscovery/common-primitives/merge_requests/42) - [!44](https://gitlab.com/datadrivendiscovery/common-primitives/merge_requests/44) - [!47](https://gitlab.com/datadrivendiscovery/common-primitives/merge_requests/47) - [!71](https://gitlab.com/datadrivendiscovery/common-primitives/merge_requests/71) - [!73](https://gitlab.com/datadrivendiscovery/common-primitives/merge_requests/73) - [!77](https://gitlab.com/datadrivendiscovery/common-primitives/merge_requests/77) - [!66](https://gitlab.com/datadrivendiscovery/common-primitives/merge_requests/66) - [!67](https://gitlab.com/datadrivendiscovery/common-primitives/merge_requests/67) - -* Added hyper-parameter `error_on_no_columns` to `classification.random_forest.DataFrameCommon`. -* Common primitives have been updated to latest changes in d3m core package. -* Many utility functions from `utils.py` have been moved to the d3m core package. - -## v0.4.0 - -* Renamed `data_preprocessing.one_hot_encoder.Common` to - `data_preprocessing.one_hot_encoder.MakerCommon` and reimplement it. - [!54](https://gitlab.com/datadrivendiscovery/common-primitives/merge_requests/54) -* Added new primitives: - * `classification.xgboost_gbtree.DataFrameCommon` - * `classification.xgboost_dart.DataFrameCommon` - * `regression.xgboost_gbtree.DataFrameCommon` - * `classification.light_gbm.DataFrameCommon` - * `data_transformation.rename_duplicate_name.DataFrameCommon` - - [!45](https://gitlab.com/datadrivendiscovery/common-primitives/merge_requests/45) - [!46](https://gitlab.com/datadrivendiscovery/common-primitives/merge_requests/46) - [!49](https://gitlab.com/datadrivendiscovery/common-primitives/merge_requests/49) - -* Made sure `utils.select_columns` works also when given a tuple of columns, and not a list. - [!58](https://gitlab.com/datadrivendiscovery/common-primitives/merge_requests/58) -* `classification.random_forest.DataFrameCommon` updated so that produced columns have - names matching column names during fitting. Moreover, `produce_feature_importances` - return a `DataFrame` with each column being one feature and having one row with - importances. - [!59](https://gitlab.com/datadrivendiscovery/common-primitives/merge_requests/59) -* `regression.feed_forward_neural_net.TorchCommon` updated to support - selection of columns using semantic types. - [!57](https://gitlab.com/datadrivendiscovery/common-primitives/merge_requests/57) - -## v0.3.0 - -* Made `evaluation.redact_columns.Common` primitive more general so that it can - redact any columns based on their semantic type and not just targets. -* Renamed primitives: - * `datasets.Denormalize` to `data_transformation.denormalize.Common` - * `datasets.DatasetToDataFrame` to `data_transformation.dataset_to_dataframe.Common` - * `evaluation.ComputeScores` to `evaluation.compute_scores.Common` - * `evaluation.RedactTargets` to `evaluation.redact_columns.Common` - * `evaluation.KFoldDatasetSplit` to `evaluation.kfold_dataset_split.Common` - * `evaluation.TrainScoreDatasetSplit` to `evaluation.train_score_dataset_split.Common` - * `evaluation.NoSplitDatasetSplit` to `evaluation.no_split_dataset_split.Common` - * `evaluation.FixedSplitDatasetSplit` to `evaluation.fixed_split_dataset_split.Commmon` - * `classifier.RandomForest` to `classification.random_forest.DataFrameCommon` - * `metadata.ComputeMetafeatures` to `metafeature_extraction.compute_metafeatures.Common` - * `audio.CutAudio` to `data_transformation.cut_audio.DataFrameCommon` - * `data.ListToNDArray` to `data_transformation.list_to_ndarray.Common` - * `data.StackNDArrayColumn` to `data_transformation.stack_ndarray_column.Common` - * `data.AddSemanticTypes` to `data_transformation.add_semantic_types.DataFrameCommon` - * `data.RemoveSemanticTypes` to `data_transformation.remove_semantic_types.DataFrameCommon` - * `data.ConstructPredictions` to `data_transformation.construct_predictions.DataFrameCommon` - * `data.ColumnParser` to `data_transformation.column_parser.DataFrameCommon` - * `data.CastToType` to `data_transformation.cast_to_type.Common` - * `data.ExtractColumns` to `data_transformation.extract_columns.DataFrameCommon` - * `data.ExtractColumnsBySemanticTypes` to `data_transformation.extract_columns_by_semantic_types.DataFrameCommon` - * `data.ExtractColumnsByStructuralTypes` to `data_transformation.extract_columns_by_structural_types.DataFrameCommon` - * `data.RemoveColumns` to `data_transformation.remove_columns.DataFrameCommon` - * `data.RemoveDuplicateColumns` to `data_transformation.remove_duplicate_columns.DataFrameCommon` - * `data.HorizontalConcat` to `data_transformation.horizontal_concat.DataFrameConcat` - * `data.DataFrameToNDArray` to `data_transformation.dataframe_to_ndarray.Common` - * `data.NDArrayToDataFrame` to `data_transformation.ndarray_to_dataframe.Common` - * `data.DataFrameToList` to `data_transformation.dataframe_to_list.Common` - * `data.ListToDataFrame` to `data_transformation.list_to_dataframe.Common` - * `data.NDArrayToList` to `data_transformation.ndarray_to_list.Common` - * `data.ReplaceSemanticTypes` to `data_transformation.replace_semantic_types.DataFrameCommon` - * `data.UnseenLabelEncoder` to `data_preprocessing.label_encoder.DataFrameCommon` - * `data.UnseenLabelDecoder` to `data_preprocessing.label_decoder.DataFrameCommon` - * `data.ImageReader` to `data_preprocessing.image_reader.DataFrameCommon` - * `data.TextReader` to `data_preprocessing.text_reader.DataFrameCommon` - * `data.VideoReader` to `data_preprocessing.video_reader.DataFrameCommon` - * `data.CSVReader` to `data_preprocessing.csv_reader.DataFrameCommon` - * `data.AudioReader` to `data_preprocessing.audio_reader.DataFrameCommon` - * `datasets.UpdateSemanticTypes` to `data_transformation.update_semantic_types.DatasetCommon` - * `datasets.RemoveColumns` to `data_transformation.remove_columns.DatasetCommon` - * `datasets.RegexFilter` to `data_preprocessing.regex_filter.DatasetCommon` - * `datasets.TermFilter` to `data_preprocessing.term_filter.DatasetCommon` - * `datasets.NumericRangeFilter` to `data_preprocessing.numeric_range_filter.DatasetCommon` - * `common_primitives.BayesianLogisticRegression` to `classification.bayesian_logistic_regression.Common` - * `common_primitives.ConvolutionalNeuralNet` to `regression.convolutional_neural_net.TorchCommon` - * `common_primitives.DiagonalMVN` to `operator.diagonal_mvn.Common` - * `common_primitives.FeedForwardNeuralNet` to `regression.feed_forward_neural_net.TorchCommon` - * `common_primitives.ImageReader` to `data_preprocessing.image_reader.Common` - * `common_primitives.KMeans` to `clustering.kmeans.Common` - * `common_primitives.LinearRegression` to `regression.linear_regression.Common` - * `common_primitives.Loss` to `regression.loss.TorchCommon` - * `common_primitives.PCA` to `feature_extraction.pca.Common` - * `common_primitives.OneHotMaker` to `data_preprocessing.one_hot_encoder.Common` -* Fixed pickling issue of `classifier.RandomFores`. - [#47](https://gitlab.com/datadrivendiscovery/common-primitives/issues/47) - [!48](https://gitlab.com/datadrivendiscovery/common-primitives/merge_requests/48) -* `data.ColumnParser` primitive has now additional hyper-parameter `replace_index_columns` - which controls whether index columns are still replaced when otherwise appending returned - parsed columns or not. -* Made `data.RemoveDuplicateColumns` fit and remember duplicate columns during training. - [#45](https://gitlab.com/datadrivendiscovery/common-primitives/issues/45) -* Added `match_logic` hyper-parameter to the `data.ReplaceSemanticTypes` primitive - which allows one to control how multiple specified semantic types match. -* Added new primitives: - * `metadata.ComputeMetafeatures` - * `datasets.RegexFilter` - * `datasets.TermFilter` - * `datasets.NumericRangeFilter` - * `evaluation.NoSplitDatasetSplit` - * `evaluation.FixedSplitDatasetSplit` -* Column parser fixed to parse columns with `http://schema.org/DateTime` semantic type. -* Simplified logic (and made it more predictable) of `combine_columns` utility function when - using `new` `return_result` and `add_index_columns` set to true. Now if output already contains - any index column, input index columns are not added. And if there are no index columns, - all input index columns are added at the beginning. -* Fixed `_can_use_inputs_column` in `classifier.RandomForest`. Added check of structural type, so - only columns with numerical structural types are processed. -* Correctly set column names in `evaluation.ComputeScores` primitive's output. -* Cast indices and columns to match predicted columns' dtypes. - [#33](https://gitlab.com/datadrivendiscovery/common-primitives/issues/33) -* `datasets.DatasetToDataFrame` primitive does not try to generate metadata automatically - because this is not really needed (metadata can just be copied from the dataset). This - speeds up the primitive. - [#34](https://gitlab.com/datadrivendiscovery/common-primitives/issues/34) -* Made it uniform that whenever we are generating lists of all column names - we try first to get the name from the metadata and fallback to one in DataFrame. - Instead of using a column index in the latter case. -* Made splitting primitives, `classifier.RandomForest` and `data.UnseenLabelEncoder` - be picklable even unfitted. -* Fixed entry point for `audio.CutAudio` primitive. - -## v0.2.0 - -* Made those primitives operate on semantic types and support different ways to return results. -* Added or updated many primitives: - * `data.ExtractColumns` - * `data.ExtractColumnsBySemanticTypes` - * `data.ExtractColumnsByStructuralTypes` - * `data.RemoveColumns` - * `data.RemoveDuplicateColumns` - * `data.HorizontalConcat` - * `data.CastToType` - * `data.ColumnParser` - * `data.ConstructPredictions` - * `data.DataFrameToNDArray` - * `data.NDArrayToDataFrame` - * `data.DataFrameToList` - * `data.ListToDataFrame` - * `data.NDArrayToList` - * `data.ListToNDArray` - * `data.StackNDArrayColumn` - * `data.AddSemanticTypes` - * `data.RemoveSemanticTypes` - * `data.ReplaceSemanticTypes` - * `data.UnseenLabelEncoder` - * `data.UnseenLabelDecoder` - * `data.ImageReader` - * `data.TextReader` - * `data.VideoReader` - * `data.CSVReader` - * `data.AudioReader` - * `datasets.Denormalize` - * `datasets.DatasetToDataFrame` - * `datasets.UpdateSemanticTypes` - * `datasets.RemoveColumns` - * `evaluation.RedactTargets` - * `evaluation.ComputeScores` - * `evaluation.KFoldDatasetSplit` - * `evaluation.TrainScoreDatasetSplit` - * `audio.CutAudio` - * `classifier.RandomForest` -* Starting list enabled primitives in the [`entry_points.ini`](./entry_points.ini) file. -* Created `devel` branch which contains primitives coded against the - future release of the `d3m` core package (its `devel` branch). - `master` branch of this repository is made against the latest stable - release of the `d3m` core package. -* Dropped support for Python 2.7 and require Python 3.6. -* Renamed repository and package to `common-primitives` and `common_primitives`, - respectively. -* Repository migrated to gitlab.com and made public. - -## v0.1.1 - -* Made common primitives work on Python 2.7. - -## v0.1.0 - -* Initial set of common primitives. diff --git a/common-primitives/HOW_TO_MANAGE.md b/common-primitives/HOW_TO_MANAGE.md deleted file mode 100644 index 9e0d3db..0000000 --- a/common-primitives/HOW_TO_MANAGE.md +++ /dev/null @@ -1,94 +0,0 @@ -# How to publish primitive annotations - -As contributors add or update their primitives they might want to publish -primitive annotations for added primitives. When doing this it is important -to republish also all other primitive annotations already published from this -package. This is because only one version of the package can be installed at -a time and all primitive annotations have to point to the same package in -their `installation` metadata. - -Steps to publish primitive annotations: -* Operate in a virtual env with the following installed: - * Target core package installed. - * [Test primitives](https://gitlab.com/datadrivendiscovery/tests-data/tree/master/primitives) - with the same version of primitives which are currently published in `primitives` - repository. Remember to install them in `-e` editable mode. -* Update `HISTORY.md` for `vNEXT` release with information about primitives - added or updated. If there was no package release since they were updated last, - do not duplicate entries but just update any existing entries for those primitives - instead, so that once released it is clear what has changed in a release as a whole. -* Make sure tests for primitives being published (primitives added, updated, - and primitives previously published which should be now republished) pass. -* Update `entry_points.ini` and add new primitives. Leave active - only those entries for primitives being (re)published and comment out all others. - * If this is the first time primitives are published after a release of a new `d3m` - core package, leave active only those which were updated to work with - the new `d3m` core package. Leave to others to update, verify, and publish - other common primitives. -* In clone of `primitives` repository prepare a branch of the up-to-date `master` branch - to add/update primitive annotations. If existing annotations for common primitives - are already there the best is to first remove them to make sure annotations for - removed primitives do not stay around. We will re-add all primitives in the next step. -* Run `add.sh` in root of this package, which will add primitive annotations - to `primitives`. See instructions in the script for more information. -* Verify changes in the `primitives`, add and commit files to git. -* Publish a branch in `primitives` and make a merge request. - -# How to release a new version - -A new version is always released from `master` branch against a stable release -of `d3m` core package. A new version should be released when there are major -changes to the package (many new primitives added, larger breaking changes). -Sync up with other developers of the repo to suggest a release, or do a release. - -* On `master` branch: - * Make sure `HISTORY.md` file is updated with all changes since the last release. - * Change a version in `common_primitives/__init__.py` to the to-be-released version, without `v` prefix. - * Change `vNEXT` in `HISTORY.md` to the to-be-released version, with `v` prefix. - * Commit with message `Bumping version for release.` - * `git push` - * Wait for CI to run tests successfully. - * Tag with version prefixed with `v`, e.g., for version `0.2.0`: `git tag v0.2.0` - * `git push` & `git push --tags` - * Change a version in `common_primitives/__init__.py` back to `devel` string. - * Add a new empty `vNEXT` version on top of `HISTORY.md`. - * Commit with message `Version bump for development.` - * `git push` -* On `devel` branch: - * Merge `master` into `devel` branch: `git merge master` - * Update the branch according to the section below. - * `git push` - -# How to update `master` branch after a release of new `d3m` core package - -Hopefully, `devel` branch already contains code which works against the released -`d3m` core package. So merge `devel` branch into `master` branch and update -files according to the following section. - -# Keeping `master` and `devel` branches in sync - -Because `master` and `devel` branches mostly contain the same code, -just made against different version of `d3m` core package, it is common -to merge branches into each other as needed to keep them in sync. -When doing so, the following are files which are specific to branches: - -* `.gitlab-ci.yml` has a `DEPENDENCY_REF` environment variable which - has to point to `master` on `master` branch of this repository, - and `devel` on `devel` branch of this repository. - -# How to add an example pipeline - -Every common primitive (except those used in non-standard pipelines, like splitting primitives) -should have at least one example pipeline and associated pipeline run. - -Add example pipelines into a corresponding sub-directory based on primitive's suffix into `pipelines` -directory in the repository. If a pipeline uses multiple common primitives, add it for only one -primitive and create symbolic links for other primitives. - -Create a `fit-score` pipeline run as [described in primitives index repository](https://gitlab.com/datadrivendiscovery/primitives#adding-a-primitive). -Compress it with `gzip` and store it under `pipeline_runs` directory in the repository. -Similarly, add it only for one primitive and create symbolic links for others, if pipeline run -corresponds to a pipeline with multiple common primitives. - -Use `git-add.sh` script to assure all files larger than 100 KB are added as git LFS files to -the repository. diff --git a/common-primitives/LICENSE.txt b/common-primitives/LICENSE.txt deleted file mode 100644 index 261eeb9..0000000 --- a/common-primitives/LICENSE.txt +++ /dev/null @@ -1,201 +0,0 @@ - Apache License - Version 2.0, January 2004 - http://www.apache.org/licenses/ - - TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION - - 1. Definitions. - - "License" shall mean the terms and conditions for use, reproduction, - and distribution as defined by Sections 1 through 9 of this document. - - "Licensor" shall mean the copyright owner or entity authorized by - the copyright owner that is granting the License. - - "Legal Entity" shall mean the union of the acting entity and all - other entities that control, are controlled by, or are under common - control with that entity. For the purposes of this definition, - "control" means (i) the power, direct or indirect, to cause the - direction or management of such entity, whether by contract or - otherwise, or (ii) ownership of fifty percent (50%) or more of the - outstanding shares, or (iii) beneficial ownership of such entity. - - "You" (or "Your") shall mean an individual or Legal Entity - exercising permissions granted by this License. - - "Source" form shall mean the preferred form for making modifications, - including but not limited to software source code, documentation - source, and configuration files. - - "Object" form shall mean any form resulting from mechanical - transformation or translation of a Source form, including but - not limited to compiled object code, generated documentation, - and conversions to other media types. - - "Work" shall mean the work of authorship, whether in Source or - Object form, made available under the License, as indicated by a - copyright notice that is included in or attached to the work - (an example is provided in the Appendix below). - - "Derivative Works" shall mean any work, whether in Source or Object - form, that is based on (or derived from) the Work and for which the - editorial revisions, annotations, elaborations, or other modifications - represent, as a whole, an original work of authorship. For the purposes - of this License, Derivative Works shall not include works that remain - separable from, or merely link (or bind by name) to the interfaces of, - the Work and Derivative Works thereof. - - "Contribution" shall mean any work of authorship, including - the original version of the Work and any modifications or additions - to that Work or Derivative Works thereof, that is intentionally - submitted to Licensor for inclusion in the Work by the copyright owner - or by an individual or Legal Entity authorized to submit on behalf of - the copyright owner. For the purposes of this definition, "submitted" - means any form of electronic, verbal, or written communication sent - to the Licensor or its representatives, including but not limited to - communication on electronic mailing lists, source code control systems, - and issue tracking systems that are managed by, or on behalf of, the - Licensor for the purpose of discussing and improving the Work, but - excluding communication that is conspicuously marked or otherwise - designated in writing by the copyright owner as "Not a Contribution." - - "Contributor" shall mean Licensor and any individual or Legal Entity - on behalf of whom a Contribution has been received by Licensor and - subsequently incorporated within the Work. - - 2. Grant of Copyright License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - copyright license to reproduce, prepare Derivative Works of, - publicly display, publicly perform, sublicense, and distribute the - Work and such Derivative Works in Source or Object form. - - 3. Grant of Patent License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - (except as stated in this section) patent license to make, have made, - use, offer to sell, sell, import, and otherwise transfer the Work, - where such license applies only to those patent claims licensable - by such Contributor that are necessarily infringed by their - Contribution(s) alone or by combination of their Contribution(s) - with the Work to which such Contribution(s) was submitted. If You - institute patent litigation against any entity (including a - cross-claim or counterclaim in a lawsuit) alleging that the Work - or a Contribution incorporated within the Work constitutes direct - or contributory patent infringement, then any patent licenses - granted to You under this License for that Work shall terminate - as of the date such litigation is filed. - - 4. Redistribution. You may reproduce and distribute copies of the - Work or Derivative Works thereof in any medium, with or without - modifications, and in Source or Object form, provided that You - meet the following conditions: - - (a) You must give any other recipients of the Work or - Derivative Works a copy of this License; and - - (b) You must cause any modified files to carry prominent notices - stating that You changed the files; and - - (c) You must retain, in the Source form of any Derivative Works - that You distribute, all copyright, patent, trademark, and - attribution notices from the Source form of the Work, - excluding those notices that do not pertain to any part of - the Derivative Works; and - - (d) If the Work includes a "NOTICE" text file as part of its - distribution, then any Derivative Works that You distribute must - include a readable copy of the attribution notices contained - within such NOTICE file, excluding those notices that do not - pertain to any part of the Derivative Works, in at least one - of the following places: within a NOTICE text file distributed - as part of the Derivative Works; within the Source form or - documentation, if provided along with the Derivative Works; or, - within a display generated by the Derivative Works, if and - wherever such third-party notices normally appear. The contents - of the NOTICE file are for informational purposes only and - do not modify the License. You may add Your own attribution - notices within Derivative Works that You distribute, alongside - or as an addendum to the NOTICE text from the Work, provided - that such additional attribution notices cannot be construed - as modifying the License. - - You may add Your own copyright statement to Your modifications and - may provide additional or different license terms and conditions - for use, reproduction, or distribution of Your modifications, or - for any such Derivative Works as a whole, provided Your use, - reproduction, and distribution of the Work otherwise complies with - the conditions stated in this License. - - 5. Submission of Contributions. Unless You explicitly state otherwise, - any Contribution intentionally submitted for inclusion in the Work - by You to the Licensor shall be under the terms and conditions of - this License, without any additional terms or conditions. - Notwithstanding the above, nothing herein shall supersede or modify - the terms of any separate license agreement you may have executed - with Licensor regarding such Contributions. - - 6. Trademarks. This License does not grant permission to use the trade - names, trademarks, service marks, or product names of the Licensor, - except as required for reasonable and customary use in describing the - origin of the Work and reproducing the content of the NOTICE file. - - 7. Disclaimer of Warranty. Unless required by applicable law or - agreed to in writing, Licensor provides the Work (and each - Contributor provides its Contributions) on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or - implied, including, without limitation, any warranties or conditions - of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A - PARTICULAR PURPOSE. You are solely responsible for determining the - appropriateness of using or redistributing the Work and assume any - risks associated with Your exercise of permissions under this License. - - 8. Limitation of Liability. In no event and under no legal theory, - whether in tort (including negligence), contract, or otherwise, - unless required by applicable law (such as deliberate and grossly - negligent acts) or agreed to in writing, shall any Contributor be - liable to You for damages, including any direct, indirect, special, - incidental, or consequential damages of any character arising as a - result of this License or out of the use or inability to use the - Work (including but not limited to damages for loss of goodwill, - work stoppage, computer failure or malfunction, or any and all - other commercial damages or losses), even if such Contributor - has been advised of the possibility of such damages. - - 9. Accepting Warranty or Additional Liability. While redistributing - the Work or Derivative Works thereof, You may choose to offer, - and charge a fee for, acceptance of support, warranty, indemnity, - or other liability obligations and/or rights consistent with this - License. However, in accepting such obligations, You may act only - on Your own behalf and on Your sole responsibility, not on behalf - of any other Contributor, and only if You agree to indemnify, - defend, and hold each Contributor harmless for any liability - incurred by, or claims asserted against, such Contributor by reason - of your accepting any such warranty or additional liability. - - END OF TERMS AND CONDITIONS - - APPENDIX: How to apply the Apache License to your work. - - To apply the Apache License to your work, attach the following - boilerplate notice, with the fields enclosed by brackets "[]" - replaced with your own identifying information. (Don't include - the brackets!) The text should be enclosed in the appropriate - comment syntax for the file format. We also recommend that a - file or class name and description of purpose be included on the - same "printed page" as the copyright notice for easier - identification within third-party archives. - - Copyright [yyyy] [name of copyright owner] - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. diff --git a/common-primitives/MANIFEST.in b/common-primitives/MANIFEST.in deleted file mode 100644 index 3e677d0..0000000 --- a/common-primitives/MANIFEST.in +++ /dev/null @@ -1,2 +0,0 @@ -include README.md -include LICENSE.txt diff --git a/common-primitives/README.md b/common-primitives/README.md deleted file mode 100644 index fe2fbcf..0000000 --- a/common-primitives/README.md +++ /dev/null @@ -1,83 +0,0 @@ -# Common D3M primitives - -A common set of primitives for D3M project, maintained together. -It contains example primitives, various glue primitives, and other primitives performers -contributed. - -## Installation - -This package works on Python 3.6+ and pip 19+. - -This package additional dependencies which are specified in primitives' metadata, -but if you are manually installing the package, you have to first run, for Ubuntu: - -``` -$ apt-get install build-essential libopenblas-dev libcap-dev ffmpeg -$ pip3 install python-prctl -``` - -To install common primitives from inside a cloned repository, run: - -``` -$ pip3 install -e . -``` - -When cloning a repository, clone it recursively to get also git submodules: - -``` -$ git clone --recursive https://gitlab.com/datadrivendiscovery/common-primitives.git -``` - -## Changelog - -See [HISTORY.md](./HISTORY.md) for summary of changes to this package. - -## Repository structure - -`master` branch contains latest code of common primitives made against the latest stable -release of the [`d3m` core package](https://gitlab.com/datadrivendiscovery/d3m) (its `master` branch). -`devel` branch contains latest code of common primitives made against the -future release of the `d3m` core package (its `devel` branch). - -Releases are [tagged](https://gitlab.com/datadrivendiscovery/d3m/tags) but they are not done -regularly. Each primitive has its own versions as well, which are not related to package versions. -Generally is the best to just use the latest code available in `master` or `devel` -branches (depending which version of the core package you are using). - -## Testing locally - -For each commit to this repository, tests run automatically in the -[GitLab CI](https://gitlab.com/datadrivendiscovery/common-primitives/pipelines). - -If you don't want to wait for the GitLab CI test results and run the tests locally, -you can install and use the [GitLab runner](https://docs.gitlab.com/runner/install/) in your system. - -With the local GitLab runner, you can run the tests defined in the [.gitlab-ci.yml](.gitlab-ci.yml) -file of this repository, such as: - -``` -$ gitlab-runner exec docker style_check -$ gitlab-runner exec docker type_check -``` - -You can also just try to run tests available under `/tests` by running: - -``` -$ python3 run_tests.py -``` - -## Contribute - -Feel free to contribute more primitives to this repository. The idea is that we build -a common set of primitives which can help both as an example, but also to have shared -maintenance of some primitives, especially glue primitives. - -All primitives are written in Python 3 and are type checked using -[mypy](http://www.mypy-lang.org/), so typing annotations are required. - -## About Data Driven Discovery Program - -DARPA Data Driven Discovery (D3M) Program is researching ways to get machines to build -machine learning pipelines automatically. It is split into three layers: -TA1 (primitives), TA2 (systems which combine primitives automatically into pipelines -and executes them), and TA3 (end-users interfaces). diff --git a/common-primitives/add.sh b/common-primitives/add.sh deleted file mode 100755 index 7059b16..0000000 --- a/common-primitives/add.sh +++ /dev/null @@ -1,24 +0,0 @@ -#!/bin/bash -e - -# Assumption is that this repository is cloned into "common-primitives" directory -# which is a sibling of "d3m-primitives" directory with D3M public primitives. - -D3M_VERSION="$(python3 -c 'import d3m; print(d3m.__version__)')" - -for PRIMITIVE_SUFFIX in $(./list_primitives.py --suffix); do - echo "$PRIMITIVE_SUFFIX" - python3 -m d3m index describe -i 4 "d3m.primitives.$PRIMITIVE_SUFFIX" > primitive.json - pushd ../d3m-primitives > /dev/null - ./add.py ../common-primitives/primitive.json - popd > /dev/null - if [[ -e "pipelines/$PRIMITIVE_SUFFIX" ]]; then - PRIMITIVE_PATH="$(echo ../d3m-primitives/v$D3M_VERSION/common-primitives/d3m.primitives.$PRIMITIVE_SUFFIX/*)" - mkdir -p "$PRIMITIVE_PATH/pipelines" - find pipelines/$PRIMITIVE_SUFFIX/ \( -name '*.json' -or -name '*.yaml' -or -name '*.yml' -or -name '*.json.gz' -or -name '*.yaml.gz' -or -name '*.yml.gz' \) -exec cp '{}' "$PRIMITIVE_PATH/pipelines" ';' - fi - if [[ -e "pipeline_runs/$PRIMITIVE_SUFFIX" ]]; then - PRIMITIVE_PATH="$(echo ../d3m-primitives/v$D3M_VERSION/common-primitives/d3m.primitives.$PRIMITIVE_SUFFIX/*)" - mkdir -p "$PRIMITIVE_PATH/pipeline_runs" - find pipeline_runs/$PRIMITIVE_SUFFIX/ \( -name '*.yml.gz' -or -name '*.yaml.gz' \) -exec cp '{}' "$PRIMITIVE_PATH/pipeline_runs" ';' - fi -done diff --git a/common-primitives/entry_points.ini b/common-primitives/entry_points.ini deleted file mode 100644 index 5dac201..0000000 --- a/common-primitives/entry_points.ini +++ /dev/null @@ -1,63 +0,0 @@ -[d3m.primitives] -data_preprocessing.one_hot_encoder.MakerCommon = common_primitives.one_hot_maker:OneHotMakerPrimitive -data_preprocessing.one_hot_encoder.PandasCommon = common_primitives.pandas_onehot_encoder:PandasOneHotEncoderPrimitive -data_transformation.extract_columns.Common = common_primitives.extract_columns:ExtractColumnsPrimitive -data_transformation.extract_columns_by_semantic_types.Common = common_primitives.extract_columns_semantic_types:ExtractColumnsBySemanticTypesPrimitive -data_transformation.extract_columns_by_structural_types.Common = common_primitives.extract_columns_structural_types:ExtractColumnsByStructuralTypesPrimitive -data_transformation.remove_columns.Common = common_primitives.remove_columns:RemoveColumnsPrimitive -data_transformation.remove_duplicate_columns.Common = common_primitives.remove_duplicate_columns:RemoveDuplicateColumnsPrimitive -data_transformation.horizontal_concat.DataFrameCommon = common_primitives.horizontal_concat:HorizontalConcatPrimitive -data_transformation.cast_to_type.Common = common_primitives.cast_to_type:CastToTypePrimitive -data_transformation.column_parser.Common = common_primitives.column_parser:ColumnParserPrimitive -data_transformation.construct_predictions.Common = common_primitives.construct_predictions:ConstructPredictionsPrimitive -data_transformation.dataframe_to_ndarray.Common = common_primitives.dataframe_to_ndarray:DataFrameToNDArrayPrimitive -data_transformation.ndarray_to_dataframe.Common = common_primitives.ndarray_to_dataframe:NDArrayToDataFramePrimitive -data_transformation.dataframe_to_list.Common = common_primitives.dataframe_to_list:DataFrameToListPrimitive -data_transformation.list_to_dataframe.Common = common_primitives.list_to_dataframe:ListToDataFramePrimitive -data_transformation.ndarray_to_list.Common = common_primitives.ndarray_to_list:NDArrayToListPrimitive -data_transformation.list_to_ndarray.Common = common_primitives.list_to_ndarray:ListToNDArrayPrimitive -data_transformation.stack_ndarray_column.Common = common_primitives.stack_ndarray_column:StackNDArrayColumnPrimitive -data_transformation.add_semantic_types.Common = common_primitives.add_semantic_types:AddSemanticTypesPrimitive -data_transformation.remove_semantic_types.Common = common_primitives.remove_semantic_types:RemoveSemanticTypesPrimitive -data_transformation.replace_semantic_types.Common = common_primitives.replace_semantic_types:ReplaceSemanticTypesPrimitive -data_transformation.denormalize.Common = common_primitives.denormalize:DenormalizePrimitive -data_transformation.datetime_field_compose.Common = common_primitives.datetime_field_compose:DatetimeFieldComposePrimitive -data_transformation.grouping_field_compose.Common = common_primitives.grouping_field_compose:GroupingFieldComposePrimitive -data_transformation.dataset_to_dataframe.Common = common_primitives.dataset_to_dataframe:DatasetToDataFramePrimitive -data_transformation.cut_audio.Common = common_primitives.cut_audio:CutAudioPrimitive -data_transformation.rename_duplicate_name.DataFrameCommon = common_primitives.rename_duplicate_columns:RenameDuplicateColumnsPrimitive -#data_transformation.normalize_column_references.Common = common_primitives.normalize_column_references:NormalizeColumnReferencesPrimitive -#data_transformation.normalize_graphs.Common = common_primitives.normalize_graphs:NormalizeGraphsPrimitive -data_transformation.ravel.DataFrameRowCommon = common_primitives.ravel:RavelAsRowPrimitive -data_preprocessing.label_encoder.Common = common_primitives.unseen_label_encoder:UnseenLabelEncoderPrimitive -data_preprocessing.label_decoder.Common = common_primitives.unseen_label_decoder:UnseenLabelDecoderPrimitive -data_preprocessing.image_reader.Common = common_primitives.dataframe_image_reader:DataFrameImageReaderPrimitive -data_preprocessing.text_reader.Common = common_primitives.text_reader:TextReaderPrimitive -data_preprocessing.video_reader.Common = common_primitives.video_reader:VideoReaderPrimitive -data_preprocessing.csv_reader.Common = common_primitives.csv_reader:CSVReaderPrimitive -data_preprocessing.audio_reader.Common = common_primitives.audio_reader:AudioReaderPrimitive -data_preprocessing.regex_filter.Common = common_primitives.regex_filter:RegexFilterPrimitive -data_preprocessing.term_filter.Common = common_primitives.term_filter:TermFilterPrimitive -data_preprocessing.numeric_range_filter.Common = common_primitives.numeric_range_filter:NumericRangeFilterPrimitive -data_preprocessing.datetime_range_filter.Common = common_primitives.datetime_range_filter:DatetimeRangeFilterPrimitive -data_preprocessing.dataset_sample.Common = common_primitives.dataset_sample:DatasetSamplePrimitive -#data_preprocessing.time_interval_transform.Common = common_primitives.time_interval_transform:TimeIntervalTransformPrimitive -data_cleaning.tabular_extractor.Common = common_primitives.tabular_extractor:AnnotatedTabularExtractorPrimitive -evaluation.redact_columns.Common = common_primitives.redact_columns:RedactColumnsPrimitive -evaluation.kfold_dataset_split.Common = common_primitives.kfold_split:KFoldDatasetSplitPrimitive -evaluation.kfold_time_series_split.Common = common_primitives.kfold_split_timeseries:KFoldTimeSeriesSplitPrimitive -evaluation.train_score_dataset_split.Common = common_primitives.train_score_split:TrainScoreDatasetSplitPrimitive -evaluation.no_split_dataset_split.Common = common_primitives.no_split:NoSplitDatasetSplitPrimitive -evaluation.fixed_split_dataset_split.Commmon = common_primitives.fixed_split:FixedSplitDatasetSplitPrimitive -classification.random_forest.Common = common_primitives.random_forest:RandomForestClassifierPrimitive -classification.light_gbm.Common = common_primitives.lgbm_classifier:LightGBMClassifierPrimitive -classification.xgboost_gbtree.Common = common_primitives.xgboost_gbtree:XGBoostGBTreeClassifierPrimitive -classification.xgboost_dart.Common = common_primitives.xgboost_dart:XGBoostDartClassifierPrimitive -regression.xgboost_gbtree.Common = common_primitives.xgboost_regressor:XGBoostGBTreeRegressorPrimitive -schema_discovery.profiler.Common = common_primitives.simple_profiler:SimpleProfilerPrimitive -operator.column_map.Common = common_primitives.column_map:DataFrameColumnMapPrimitive -operator.dataset_map.DataFrameCommon = common_primitives.dataset_map:DataFrameDatasetMapPrimitive -data_preprocessing.flatten.DataFrameCommon = common_primitives.dataframe_flatten:DataFrameFlattenPrimitive -metalearning.metafeature_extractor.Common = common_primitives.compute_metafeatures:ComputeMetafeaturesPrimitive -data_augmentation.datamart_augmentation.Common = common_primitives.datamart_augment:DataMartAugmentPrimitive -data_augmentation.datamart_download.Common = common_primitives.datamart_download:DataMartDownloadPrimitive diff --git a/common-primitives/git-add.sh b/common-primitives/git-add.sh deleted file mode 100755 index 896ab85..0000000 --- a/common-primitives/git-add.sh +++ /dev/null @@ -1,5 +0,0 @@ -#!/bin/bash -e - -# This requires git LFS 2.9.0 or newer. - -find * -type f -size +100k -exec git lfs track --filename '{}' + diff --git a/common-primitives/git-check.sh b/common-primitives/git-check.sh deleted file mode 100755 index 8a6b468..0000000 --- a/common-primitives/git-check.sh +++ /dev/null @@ -1,21 +0,0 @@ -#!/bin/bash -e - -if git rev-list --objects --all \ -| git cat-file --batch-check='%(objecttype) %(objectname) %(objectsize) %(rest)' \ -| sed -n 's/^blob //p' \ -| awk '$2 >= 100*(2^10)' \ -| awk '{print $3}' \ -| egrep -v '(^|/).gitattributes$' ; then - echo "Repository contains committed objects larger than 100 KB." - exit 1 -fi - -if git lfs ls-files --name-only | xargs -r stat -c '%s %n' | awk '$1 < 100*(2^10)' | awk '{print $2}' | grep . ; then - echo "Repository contains LFS objects smaller than 100 KB." - exit 1 -fi - -if git lfs ls-files --name-only | xargs -r stat -c '%s %n' | awk '$1 >= 2*(2^30)' | awk '{print $2}' | grep . ; then - echo "Repository contains LFS objects not smaller than 2 GB." - exit 1 -fi diff --git a/common-primitives/list_primitives.py b/common-primitives/list_primitives.py deleted file mode 100755 index 0d5da96..0000000 --- a/common-primitives/list_primitives.py +++ /dev/null @@ -1,32 +0,0 @@ -#!/usr/bin/env python3 - -import argparse -import configparser -import re - - -class CaseSensitiveConfigParser(configparser.ConfigParser): - optionxform = staticmethod(str) - - -parser = argparse.ArgumentParser(description='List enabled common primitives.') -group = parser.add_mutually_exclusive_group(required=True) -group.add_argument('--suffix', action='store_true', help='list primitive suffixes of all enabled common primitives') -group.add_argument('--python', action='store_true', help='list Python paths of all enabled common primitives') -group.add_argument('--files', action='store_true', help='list file paths of all enabled common primitives') - -args = parser.parse_args() - -entry_points = CaseSensitiveConfigParser() -entry_points.read('entry_points.ini') - -for primitive_suffix, primitive_path in entry_points.items('d3m.primitives'): - if args.python: - print("d3m.primitives.{primitive_suffix}".format(primitive_suffix=primitive_suffix)) - elif args.suffix: - print(primitive_suffix) - elif args.files: - primitive_path = re.sub(':.+$', '', primitive_path) - primitive_path = re.sub('\.', '/', primitive_path) - print("{primitive_path}.py".format(primitive_path=primitive_path)) - diff --git a/common-primitives/pipeline_runs/classification.light_gbm.DataFrameCommon/1.yaml.gz b/common-primitives/pipeline_runs/classification.light_gbm.DataFrameCommon/1.yaml.gz deleted file mode 100644 index 0529242e4eaf1f8f6960975ed4703be8c5041fbc..0000000000000000000000000000000000000000 GIT binary patch literal 0 KcmV+b0RR6000031 literal 8605 zcmV;OA!6PiiwFpHjU8S912HanVQp*x?48?l+eVg$-}P7U%`*>@`+m{WdOUXG+9d0; zo!P2vZJ|IClsMr<3xZCRzkZv#86PalFfX=9(GM;)N%T2=(EZ_{&yPfJI;o03tJBVh zSMNW(`Q?YBeqQCXqMDr^9e4V}vtpLsv$iPuh3n^oyswKv9*vj5h;DAB>DxT^LoYgK zgB#dxksGT`^wIW=a>ffMdqhU2_n437I&`4F6SGP?B9vKSO) zG3gcA8urR!Hl3HfVz%(R)9GEzum4DYPA9Xf%!iX|cKg!R``PR~Pe-MY31x2H=ocS{ zcYSm^$DK(&E>1gd()k-y$>DjQCkFtw&eiu`@6K>wPIKQ0no$E6{F2>XH^7gb>(<*)B=CzJY3(Q32rC5CS zk7@aPQD%$j-P8TtgkFB^dNNJ#V2b0YeLpR(p7tueW~Dq@d@%T4(USvxTy%@xvF%%V z+_l~uJDGYOsBRvjKDroQ6r(iz7f#$Vp&NyAy`CG0zAAdkhC+z664I-|7d_keN)&w` z;~@QKR3Uo76>?DIUDJ&r3~rgwSH|mZw|DG=j>o27bdO^s&}pGU8{Q6s@|Kb z_onN;X~d=8wXS!q>tWLMIO)1kT_2CGd!Xwcn7UC@H)?Qd>UK=sj;Y%*bvw2`I9s=4 z>vnA2j>R0P8?|+#wr%;bS z4}9GNkNI9V>gz^*-Ds#A4RxcTZZy=5hPu&EA9kpF5U}Lx9)!9FvF<^vdl2g$#JUHu z?m?`35bGYqx(AV~9?M=}-3u&yVbHQ@0R(~;>VuRG%)lPmB4JO$9>GJ{fHo4ik&0Zy z_<$d&U|qpF2qb|ZsR>K;3GhgeM}ju#I))a!24f^3BMmHoITFy3hW0Qba7O|>60nh= zjRbBacq0KE3F1f~M}j#L(2N>8wuJ-;6?%@DQ`lHmpfPhStQURffEU8 zNMJ*P8xpLLfQ1AtByb_Y3khII5JLhP63mc*h9os2ujsg>gCR=bq7H^AFhqeNN_e!8 z0Dve8L{VUf0z(uSqL8mBFhnt9X{0O|A2S%HFkpxRLlpBHCOF`W0$&suqQDS^yhMQ^ z3Jg(Thyp_t7^1)s1%@avM1dg+3{hZ+0z(uSqQDRZhA1#ZfguXiP@sl#7_DQP345T1 zali3lD84Chua?wzG(18gD)C<(cp^) zQMBTH3oL*k8Vu23h*q!%qG;YG@jj_>qXg@`JK`M?Z;8Mj4fbfTM}s}u@XieOKpPF( zXxu1iut$S78uv;X?9rCDg0K$uXs|~kW6@xb25mHGqmi*_$9qrM1Bo<9q(LGL5^0b~ zgG3r6(jbxcy!C~3kVu0>8YI%lbTmk$K_U$jX^=<<-g(0wxTL`)4H9XPNP|QgB+?*} zj=a^!y*{{Pz$F7N8F0xU?=hg10i_HmWk4w-xbFa8z$^o188FL$Sq989xUVuGm;u2I zG9&|@8O1FT_yW2a(9M8u26Qu^n*q-ZcxE*BU|cW47YNSAsKGwa5o2B zGRTPxXk$Pd1KJqS#(*{knT`Q_4A^799s~9mu*ZNs2JA6lj{#o{h+>f281Th_F9v)u z;EMrY4ESP@_ZZN|fHnrSF`$hBZ4B|Eh+@GI3u;(U!-5kQoUq`81t%;xVR0vA z!3nFmhYEYR%d*IVEEr;u6Il?&f+!Y5vB-if_+r5q3%*$J#d5ot`^DTa=8iG9j2nB# z7%g{=xoymSV{plWOBP(R;F1NGEVyLBC5ueQBGa)TkwvCs!6geWSx*ZXAGl;eA`22( zkjR2P7VNQLj|F>dpgqhUu*ZTu7VNQLj|F=yvLFi*S&+zrL>44+Adv%!9N6Q)9tZX~ zu*V_qaUhWci5&7C2QE2~$RX2l;F1HE95Nk;+{S@Y4wQ1pZ5){8Kq-g&F^BxcflCft za$t`GdmPx~z#a$MINX6bu*ZQt4(xHDjWe`>D1ki=>~Ua^1A83UyV_BhbSu}sA} z6$@2hk7KE-QLBPIR;yU9LKft}B?l5YkjQ~V4kU6QkpqbwNaT5_fkX}@av+fdi5y7ez!wL;co4-SxAEYM2VXph;z1M-qIeL+gD4(E@yK*M z_~JngkKD#1WAR{tM~>nZkAr96ga;=)IN`ww4^DV+!Xqp3;DiS!JUHRO2@g(q?Av%` zEFKK;V2B4pJgDJ84G(H~P{V^79@Oy2SUjlV!3mG7#DfzaWbhz^2N^so*(_zVmd#=| ztJy4PgA=|{-G(o$Z-WyaWbhz^N5bo?v?^b6u_Q<93_B60VE0_Q2>boNEAS#01^d|D1bx( zBnrr10!S3Vo&fd)@FiIGiSZ#v31CkEZ342A0NMo5CV(~pv=R=EUn2Mtan}=(%|y^9 zf;JIEiPYRtbVt=4Wp~uwQFuq?9i?}X)}iPT4Jlqji`Nk2A-_pYit`%kJYXg@sn4S% zPf(L5sQwjHPbk_|v`hJ(pngwi+NGXP zP|zo+=o6Im3C(Cw;wLn{()5Z7KtTzhpaxJ-1SqHi6qEr9>Hr0WfWpu#3IheTfWp!Z zYWxI6eu64LL7AU$?21A^L8YH?91)d&g3>=h?Vq6BPk4^V(+y8IC(kqvOz)JprCM2P&p_l9Te0K3W^5>^@5@yGbZaALSspq zHKfK3t#LzaED5)U__(P)Zpx4823gnCBR3?-4Gl6`*U%j|gvSl7u~h7e_-Y7@B>~kC z95*z^4Uus}Wh{vxNdz@y#|_AsY8|&B1I^so8LY z!zW}YQI^ywONx{wRmzewWl3dnYSIldhf-xpb+V*9IW_qWATo!VL+o#6kZSCgRH#1)z z>Sb5rvKc>jWwhJigj~^r+(-o5ANQEf(LP-JA5%OZlV^`NE~~E1T^Zk@^}Sl#m=lj_ zo;@aCwm-gaeU*7kSGC96S~bsl%B}s2(!P^+AL4mv-L?+V-E6CUJok@$+rI{U-|EMq z_4TcXa-!|;D1Gbsd6=a&~nd6Ad- z_~v1@3y%k>nf$qVO!n7~?PtAwG`i~Q zK?kG{LHu?d#?kcbY&bbP?flRGUbU|u*h!57^^z|t!x2;-azTlG* z(&;5W-Ja0fo-fL2fBq>n$De)RDqzELF}Zp=dF{p2cQHAu&QJO4MV|&?r zYF_4}?0OFN_4wqIBNvOOLeEbAnfiQPetKl{!lhBstET1MXPA{x7_{CeIMIhde@=FY3gY)=h!qM5ZTs&2NF*(=i>P1zR!|uF# za9OMoD%m1bzs$$oqReJRo>uDgx7gg5HtG%8Q zrbQUu&G&m**cLJPak$^}z_p0MKd1AQ!0dGxe2XxAD2Eq&9){3nMNG$6H+y>>h}a?! z@BZ;-uVWzF-zOh_FQ)VTzE;RKdE`Ixsv1ttvOGDLpA{>|wMuke6r0T2plcJ(Tm}vtBlO{;pH! z>+>`iR@rzys)pIkM*GpVxreF%LEe-!zE}k)^GSa?&Spi?KfT%${8GV1yQ_}t-G0{T zbpYPJd-L;~4{!eU>f!o}S4z73>o0k~pA8o~vDwXrGRrS1;=oJQLjo5?8Ae_?=2z z!%4Nu;14CPr(WV(W8&;xiEE8m>{E$rjWFy@iEE7*>`jSljTr1xiEE88>{E$rjWFz2 ziEE8O>`jSljTr1viEEuG>{*FxfUWUEk!!tp>{pp{+R6yLe^> zPa8?Xq85iQuOE`I7H7`hbKZ743wt^$jW&taQ&DNO$)lctN~2A=W#3gAZAvWruF_~z zT6qF0jW$JxDvA2@UOoVMNGFFjNjSMFik)kU%1B{nEyR;ezUFI{4TaCW2B zMR_8WcV_dK*qcMs)T>3k#g(}%r)4&sWRqz+{9@ktA%V^R?!qvw!H@Z9&WrTjROqE! zjnmF#I{CChuRp!~7r4`fYdBl{g2Q#cvxhonA_zaO#x&_o`&XC5i(&6~;P}RivlqYo zICy^c;^yCciAOiT%>cA8LTjeEO@nFeE!cBOBjX~UEIDAinU6DeSy}e0GaqNv+CKuZ zSaweNL#g1Gw)_yxmseXD56zA75OaT*2CzRbsK_qVWz#Wjp2L9S+DA z3duVhjV+XvcQ_i`DK77DK(;J-IvXk79Us-l%gd4NW>~b}>q;=PVyingLyAYmybSYB*k0VjAvAvHX~S zPrDD+F76G}GRG&`cv=>#_%R=6X%>AfW>?qMMs$}BxhxV`M;GUL+ZZhl80F(`KX1FN zX7ld#DgWvNvca^>y3O;a#NywVk1dP;YYb}1F^iJQN^5>`;416AeN83>NlU-AaUo=SB$K@z4@5>Gpt0Z+5SQJi(-A*;S*s z{C`5p?|*&w{-6K)?W)1~!G8&5WI+=(BmB$ExwPO4hC zvMlmim{Rzv;H9QQ6@|&Gnu=!X-!{i6l3wl!6wNeW*K^am?||pl)x%G+1iNnUYl{ORj|zqO7;e2H?lNeVXjO;w zn}?5i$kkYBvDv?^{I}b3yZW>BG;CDg-H&g7nqAKq{>S&Pewy9djI_}oX4h{%%>FXt zKNI`yAAhE7FC*LXYowry?!)*tQ~8nKz5n>%2TW0G=kL>e@feo-W&R~Rvi@zxjS8zz zSk9Vf|2<#);TJc}^e$zUD=PEMlsrstQa*FiR$+0K(u(k#=KQL8u+WT|l2|5cD>5C{ zZC2!_2uqNhP+qY-msj_<_x>)-{-zNdl~6bq)_Pd}tP;Kynml7+C4^te`chX`FkjiA zIKn?AJS)j~Ui&;~1xHku>vdQOWo1#6nI-8mRrw-wMXoYc*QzL8!HUXX1w_8x&c8m~ zY#P!8L;SKXfo$nZUdzlEzRYx`c~)qvG85EBP)kKlF7#kA4^G^txC2~hD|GJjy5Ov0 zUKeDgG~SiU8By`P&ShS3#bhl+t+_WrVUWmmErY%je3wpJS98_6DRI--~OZ-h$?NQ~)1+P*{ttI%qjj*)C@S-nsJK!Sx5mb$IcOV`AjLItEdnYc$17KemcAfdO?#c2v=xq@~oXs41IBv{Rgg9J4w$OmJqG~|PkR$77tZ2=^d z1!V#&N5NWAE^dnkJ)t=RNr8k<#wckRX$^R431tFG8gS8|_B70LZJ>4oH5ho(f&{DI zz{Du5!eobz7yz3g0(`RHAriaARqLdU}h264zh9o2#_FHRfK(a zahhW&#jSQQUmQ3}4zz{?ZR%jTkdSjgXM%J z6Sm4BI5-LdT)7~&QqlPbT^~eD#ZeHT#Dj<>ORQI7QHZlpSd_xN2s=8%;bjJk!uc%l zZ3A8BAY!ZVg+t&N35)e2&XVD)5^JSGMd&qiDKIa-k;&F88O~=34r&m$aq`*h2lhjh zc(NqUQp(Gf_zP-F8nOSE)E%>g?nauz%9%nRUefAFcn&Egbd!{%?qH=LSpBdCcV`(jC2im$TM6?z4y{c!|gUbQi z9YegVZ|4^=TZEp3fG@kbCzyEEl5>QDFKZei9Q<$>dWVHCt2i+7(2V8;9;*VFk}d-|mBA)vJ~I8FtjyVdAfvz+a3NUskp+*7(F;H_7Y> z6qeq=upqo^Y|Ejp?>>W|2MGKK5VnSJib!nPnth?@tG4VlZpPW;joS`~=&(y<=&_=9 zH9IS5hpP9n3USov$}sE=rYC1i<4bJY*=ph1Y1{ibmy_Y=WtWfoGR<*qzUNn9F%h_( zPw6AD7yl7hM5J!VEqw*{eZB%4`o{O>G}*;4^LS$Sy^rh$+uqlw??+=)H`w;TqRVMr zW4Xb$2Mk>fmmS3owmmTDa-QrcZm{ivL6;+B$8v*h4;Z=}8#|U8Y?2MyS5J$U0lmI^M~8!%k(2)nCON@7Y{TY@pEzsL*F5}{uFCXCLVXR4>@N# z9(H|*e6VLeoKHw;j60f(5RyB^5*LxUh{Q#78Ng|tNSC;X*+jXB9usGBTtp8nro=_` zfMGIRL=Oxm!$tJKU`kv>4;ZGzMf8AST3kdAASS~_^uS<3TtpufCdNh7u!ou0h<s}0IrEO2Vq6`69+{Aiq)0n%X~*s0j@yo58~27=Vnt3aVQ5x_ z_0Z8o`V0{f4TItInRT?4R-B-tt=OJ@opf~4(TNo~?=bFiI=j6UcRUIBw<|`$wFMwELHK z{}Mwpz!*-Ip?S0L)or<}F)xIsu}W!o21RY9mFK2uNOO6@$14=www4riQq<26+~gE> jI!!ZKtWj7je!9gXPG<@J3ah}ht^I{f%Go~m8g&kzWY@mu^7PJ#yCuf-vdSJ3J@OiM- zcsn#_MW&6?!3C3RX|+qN$d7ux(cq*fyh+bu=V#e?oG$I8r|(|8esfezi<^8LRpoS2OzV@_VKF@)O^ftnQO!o>^rWZNV4!kgWRNl+ zi0DY7oGNlFO>`_fGec9TGoLJdDV)(J8JfN`!S&;hzMl<>VSiwx^WGXOf{mM2#jvP~ zXAjA}Z~TvU|LnaitKuK0KmM~fa(!>C`KkBv?N7b`=!xe(`PcV<9(H>g zyN%29QJ#&{i+FsSsoe9+A1*$8n(>#*BR?r7W%cRmu-9cBH(7qy6LEp*Ae~5>ZGSJT z--;@2rgyFenf7_sx4DSZ+v}nqlpDmwl`ALXcSeY#W<#k%QD|}2A1W96rU|I%{m+LUkv)EC{`$8o}Unf@evmlCoky(=opPwaK99>l9*|?ar%?oLq zG+B`i&!Q7aVa~i$$`&El(ZE1NRH^Jt=tK>r92!yBxZJ(W%rNpqOK}^LmS&EsY&s|> z=`2Doj`OIRPaDglSvsrZFIhE6uLIJnt9fzM9K6`cN$vem5jEA#LLR8e>*=8QC`dyr0%P&7><9U1W_j}1TF#0{w`m9=?b?dWf zeYUO7uJzfs`-FkE1tHswwms4AShRZ<=sL6-vTaAUyO!;)WxH$H?oGCPQ|;aq;?nL~ zwYygBFsXK&RNJU(k4LpVP;C!%+o*0E)i^Y5JGyO0x9#Y*9n&71Y1=VvJEm>NU=Fm6 znzm8XHfmtBJviI;z_vZGZKJkr)MDnfjoP+R+cs+3MqS&eYa4a#VY{{muI+)td~X|d zZKJMj)VGcLwo%_U>f1(r+o*33+qXUNSaNL-eA|Q2_8_!92yG8S+k?>dAhbOQZ4W}* zgTPgfWuIW(Cs_E2M$4cD5G1tF9;E1C2KLAn0eb@W2p+-)v=P9KkmMT12mA;L>k`&M zAOQpkMOdOwfJcBl00_5MYG> zECgsFfC~X$2mnKX7y`%;V1@uR1gQ~tM#m`~43P;=>R^ZjLnIg?6NeTO01!ojC=v{j zV2A`mB=QvrhDc^Cos5Z1vS1@ zi$!gK$bdu!B(flp1&J)!W5FH^_E@mTBJZ&v zkp+n?@*WE=S&+yg)3M-^1(z%`9gEz?f>IWgvdC>Lm}NmJi|a9q{KbMx7F@Dmj|F=y z*ki#S3))y*fmyJ}f;|@Ov7n9Bw16mqJr?Y-V2=fREZAeg9t-wZ(8jV%#X1!WRbY>0 zsj5?}f<0ENSgt}AWWgm15?PSQf5XFHg4n%RtbR78NKn;i7 z#vx;IV1Yx9;w1NjXW)bbCmcB8zzGLVIB>!tD{69whQ0 zk;k=@2Z=mLH|!JRLyqFX9uL}hWF;Q7@t}K^qU+c+kd!HXgL`pp6G@JOwmV z&`>o)r3|Gq)XH?kG8iq`hdx$mo5|HTx z+^Gp*PXK!YvY-GG1&}C!L;)lUAW;B&0@xEkn*hE9WI6$~3CMH;XcLgz1mrdW>FD8aE$i~)QJ;7h<&Pe3*kK$`&C1P~=q zb4Sq~Rd9z}*HPyIGqFj19wm7RHF*iu zzX?@&31xW+b$JOT!HMYDv`al-LP1|b zMPEWmU!oWdO8gQ0d(aUqZQG;y5BlHyqud95A6CFrgqYp$0JV zv`bB3;%V2@E=7U~Rf379U8)8X$_5ka1``Sg6DkK2N(U2a2NQ}16Y2$%j?9>>>j;en zY1WY%ceKVGv9TcBI^yH5`nW4UrW<5kSC8D0Aa^v#WL-yh+z}pkw8lcRE8?poFct(< zM{wNH7lf&>xNksWt*#~tBuM|s?l9(T0If{^K`k2~_?j{aDXIUNOZM}pkZ zAa_K_9Tjp%hTKse3j(U6MD9qDJ6hz97`dZA?g)?t5!4YRcNEDTNpeS%+z};rRLLD# zvLJ#wGUbjkxg$;PXp=kQPIBS`Kjk~@;*jtaRWL#7T{P>3w3M2=0${mC4Pkp&gU zf|6rF&2enf5HUhYvY;k8HZ7a9+j*Fw^E5;EaR&C>d1!%Dq2yRlb1W!2j!haOb0m`@ zijrfK10-{(Sr(Kj3+j{w#mItcWI;J{Y|;=RLw&NKKsh#Pmyn@GSx}@bs8SY`DGREP z1?9)F$*yc?Uk-uXpaxk`Xe_8Sj!haOHz++8R2#=84Urp^9}DV_1qH~03S>bEa%^_o z;P43IR3jd<^>oiGPdA_VI^(%tV>m?V{;ln-xDDbm%Rdy8?4^t`Ojk1uza9e+%YuijmoR`coeUuWg?XjW%ceaGv! zZK9lSUcVhTs^^Or&^DinlivHEUcP+w{(bzRxLCX`{O)&n+r%rUe`!YX;nPJ?U1U`@ zS-#A+@w0iQ?zF7aU(4v%Ag!~r`8cc6&U>@twsCYPyZlo+_+KuM{kS|okM>S_|MS0#_T{^$({wZ))oFU;)7^cE#bI1-xIC0st=8fEE1%-A z40rf+H4CqLzNpH<{3bO0&o;O`{#$#;{7sjWV!C)sd+W>SZZSQtKb*8)mV6V)>#2s{ zW^v6->rtLI2XV&U`s**g#oou?UcT`D-O^I4v(H{cJ-FO7J$?7$^_$j)<1){hH%X`S zY6N%sz2RtFL|=|Sl#}9kGKy2-_}em{$3WDx&D_W z^v#UE{-$~V^>r)Fs{BJ7+Hu>_*#k; zb!~A~F2gs<n%qM3>mClMRo)Kn$=REtHf+W`n(!b)rv-F~wY=b=v zky;}}FY1`*)cZJr^cvB5H{Z*(W!4DI_oKa>HFk~Ad|S?AX19;gace~9v>ILPXLS5J z%PL;kHutRdF+O39_`Lh(^*)A1tdmv$S2q4_FN3pA!u&qE_FGZT_wuYI*2$K?$?C<; z-Yoi@ofi)d(|6-CtG~t|t&=bRd%CzUm|o=d{sm~Al==O5zPMk$pAlLobN+5rj0ekw zwto>?CxL!j&PGiI;2zQ0uNy?`3I)y3d6tIms!CiWJ3 zsOH6Iy?lwodU=6xdH9B-I-ShN^(b8y7>_Q!SXuov(ydqh{HtPkxU6Hhd-<%7%P_os z_xi`zr>}p0wYoMgR50$2+IS4muqi@3Zkaill)di7`Ba2?P1mHP~KWA7W-7kIuxu9m8nAsYMn6bQ?2Px zRQl^c>`nFPP$oLmgVqkk{uO=>6`pk>v1g^6L(OKbP#lUg_lee_3iI@b0;=k-cV-wWySM>#UJ^2J}7SA z@A`&!cdz06&p$t<^(bDGL^ozl(PLeM&GymU&5Td$#SFRKgH;bhUiNhRGHkEp+c^GB zS&oZriY+!?uz#Ik{?#ev-i|DG{&}%vR+xxB7N?`hU4g8Mi*c4051c_&5f}5cDg4E? z$Gw&v-?8I-cE4$NuUern)@|#N)YzBtTc{N}5%iWHNj=mG54FNWt#G@w!uC^`hg#vv zWwAl1cCc2sK{R%*R=7bdcBxjlK^S(XR=7b7cBWRiK@4`OR=7bJcBxjlK^S(cR=7bR zcBWRiNe;C;wZcuJuw%8tA!-bVYT;(_*sXfuX0h0@is5F#*s+@7X2ICMs^KOloI}-c zud9Z_-aPYjpQ>SVYF^#33N~!FYUresHhECha8>PXxoezzt+4;~ZbemX=N1 zAB!-jL?Pz?HY`WuYphJFV{|P zDyms#vp9%_A0Jz<{CCl{ZdFHUJzKQw4N zSoPaVTz0Phx0R^uQYCOJ0oj?F;8vosGu6SZL}Qoggj)&7E>#S-5|G`h9d0EgJ5xp6 zPIk6C^~CMOW5+6s+X>3T%-STQw7gg)7{N--( zL^Qf%k9P6$p^sTEik`E3Rxz)3jydK7_MT*aq=Vw3{;;Z2IiHS4lNgYj8;fhyL1V|m zBF>F!brm!+A)k-4TRNZpUsbQ@d|YJ180^97?jPj(Xqt{D7xOp=KE>eVRnc^Xc;37N z@3x(1*`&E|uyAo@1$gww=Z=XrI-XsONAlW%{Gesm?D&u+cc0p|Q_^kgZ`#Jb zv{akdt|O+!|F?H7%WdRVbg!@A&NGXa@P5oHiR?^LH8XbkvB+#x{Xmx)%chF5oy>fF z0J2HqP!uVVprQgHtuFDpH~=ng_qk}?t5e~w3wOO5xlO}XsOv1K>+;O7|M0nP+-_3l zslT;b#&GW%j{V%nq8M|RS?`BQpi`)=u$YqHmyT)AUQ4{u%7K6e3C z)mboH?!Ug~_pjc)|L1Q%p9P$6-B({nTR~MVdQE!XIIFqTt?IR~ zT=dMgZBzG5waj%5ZKY{M)2O=ZO%ta6IDAvj$Qd`yKiHZ=On!I2%xzy z-^bI*7jGJ8FU`C$^yc=;PlK7I90}fayrq&^#@dd`#u2W$^J+_JRNcs4I^LLtDo#rw zNiQoct*(5csp`)4RVSG?MoC3&psS{?cT@MJbyo+px41BA7?$?t+hW)~_~8Y%w<^)9 z_UTu*AM5bS)$h`5!5ek%`?y`L{%V+p!Sr4K@b<^in8n+YUwxM0w-Hss zuRf+J33>lBnSOH9t1!J=-qpgib){PBr?(NV^46#Gi>u|D`QId^XCb5U3%T!_u4;5` zD%rKFYKS0}RK04QH67>j>}Gu9uKnx}N-RtQ=}2Ew(wW*c*0`2<3)IV+8%;Fh4cCG- zl()Q*R#_Y7v1)a#T4NRCN{C?{wr%YzvTa2V5@m7{HIt*>F?ow(0&2Pl)8$=cO@a>AQqH!*7w>4@*Sv11AiQU+m((e5Fbu0@?;myqqnm8n(99WFZ*sQK_V&t~WbYkL zyJb(7l+w+|*$eYGZ(qOsaQSL(HT>=}toNJw_U0|m(g1vP-e65v=3vcx9@WsEhMXFDYY0e?(_qfHe|~F)F@!;+m=^bz0ZI&TVI0bF z4iePDv6dJm2ADCxjL8^XreL-yXhDNyKu-fL7^u_6X`Bd<;+!0C;S|ys0}0YN)Cst7 zSPNVdkU%pP_lU#d;4lj~^p@K=O~5P{pr-&m1(zVfY8DhEXhA?Z7+WEs9E`M(93;>M zkPsS-39K9eYem>t7YTMkQUa0y33J9MNf>DfcuEd+0!k8akzn>D%yOxqbp>4c1$DyMDzJRYKzlIK%0de^ zZb1_QQd~~MEYK9w6i9LZG=n-}gVZozyk`Uw)CqWMz*B?8)LQv-FkdVMih#eO3|4EY`lnNVdi@g%K`>o#Hu7#1rT`=wbED>%n(Wprh>hp6ikd4 z(IZ9Anu0C%BB((bmStELv08IsDZ{E@iP1HmmI~BapihlBQ-zt)m8A;RK-YH7n_Y8f z*F0Eiux5i=Sp)}1fq^R*?r=sCoqw?PUc^)!1qMdki&!(qb~#psI5UA&2`r1SqciLu zX0R%p&kUb7&~^4AYK0FR49AFRY!`9n1Rs^iS}HVzUNhwk%iO|RBS~?p}S|;z`fD8+n|{DYHogxJL*H2_^%=G7h}a|jctoH-m}*snLGl8 znKy7;5Z;BoWmnfXfBWPB5a=U7SZ#zkBC%p?wuPe4b=kVPsins?WPZrzw!tCV?Go|T zF1p^##zxjd)eqSf;;7O2#;`S*UP?PV8^6+XO~b)y+j^W!8a`nAsFP_@tz2>7+xpYVXv%lWg$+o7eXvh61iq7F>jdiL3jZ`1MGA)9`D?Yt@% zA19yP#pkh~Jr^%0?YD3iA#4egfBRK@dr^-+)fbCrtP8*OS4&|QyoeX@QO?%Q5MCSoaqB0ASK;!q`rE%2 zB4nAmt_~-3?mUC+@S28BpARD(6wNBU%pQN zcr%I5!zDNw8ND{ScJ(x@nCO&RmUk~~=;sy>Adgd&yH4Oe4wKkHK9!pq(j%DRwGHV( z%I*$_^bqBAp+kCfvbo(MJv7-|^^hK#obG){k4{dPK%_?}uNxuK3Gl@dn)D6{(|j(|{RmrWc2q0_s0;s_v?Ybc)o=ScS$*7$ac zM}E&Nd?o9F-!t3AxU$q2Q7EbmeLMlH2vzr8G=7ZFa6TW7seFYqD_-fO38calJ=OQe zGoyqG_wyhuPKo3xY)+wV3P1A@7*2_ZDU8c*AWnu_DI7{6OHTN1?L*KwE&8MI91nux z#0ZVTT%DjDV2~3RaR3ZCaS;c=kQWzm z01(-55r_Ct`EU`3ppY3C(Zf5;!bTj1M_zoyVOV6wNE`-6W}L)fV4NN+ama>KSc#Km zB@`o^CSWBLRW|7f33*PWruwm;s0+oUq^7VEg_S6*#6t$KuoBOGYv25N*1}ID!6iGU zA_*!vaTZAc$%eg1f<`txMiMk~VljYqD$L_=aICq)8T}N!hLTA3oQO+fB+m_4lsI817xeu{{p9zr!I7(K3O< zAvLu;wZp*~S9>@d%m`W>j^c3a9Ju0e6o+HqZsxjKJ*C63*N&yB&^E!2z@ybMSA}`C6qW6 z@=%)hA|4xatmTn%(n|H~cj+S^rsZ-mFBa8t>abXxOczD=p(t0=<>Gi?^mwdC(yFM` zXe^T>LVI0|oU+NWd}t@O(8D0Ed?~y&R!r=nx6vP_H^X5*E+&U#t9%H~IvHL1Xjx2( zvRI6YYzup3v0ARnQL(E19t=ij>&rjVpUcInD)Z^0T3x?%@qV^C&C^jSWI~y%H^#+> z>Bl|>gTuihpBKl2*XjJt{`U6ogBQ!P_}lxR{yv!c!(g5LW$@z7FN6OY$fF>>|KZR5 zVGrZ5+45vM%4gYGI=}T)9)znm&Q8x)eCP7a&x`r8Jik8e)v`(-xq59N(*iS5c`548 z{=O`KE6S{%-XlHEO&H~eZY0z64yHJa+Ku~6EWrB2v%F1wPgx800WbENE`iUB7Et|HRY#;Jl6Bqi*fPC#}|I6y;)AOS80M>z4#%Yt(%Mga3DSc<8UAwf9uBI zrt!CJ{OuZl`^Mj)*(VG%ElAaDH0{Y|$FkY8MAxy=P)$3k*|lnRt(sk{W^bz5n{M`| z5tnAyy4kgEhDkT$q?<-{b3D4~fo^(Wnnq32sKKde+A&Q#rfJ7C?bznvY}1Zy+ObVL z7IUC!)HaRUrcn!{&B3{*2d?RXYZ`S;qYg8#Y1B21x~5UrH0qm1ebcCK4%;_9@J$aq z=6lnqZyNPYqoHXuG>wL)(a;fv5SkvurU$Xz#~B(3EHIV7+UZejFEthG_U~XNI*v#+QW#z9SQJAz(#^L61b7z zjRbHch$Dd<3Fb&ZM><}8U>&HDV2uQ9Bxoan8wrr4ya_E{?qC6AkwA+CP9&%yfei_6 zNU%Zz7810Oz=Z@aB!D473<+dNFhc?wlGKR2qT`YdhA4rHIvAqB5Cw)P;n6|@0HP=m zMS&p-3{hZ+LcXHF5XFq8m9k)b%wU+pfFTMDQOs|c;D9d*d{JPC0z(w?5(S1RFhqeN z3Jg(Thyp_t7^1)s1%@avM1dg+3{hZ+0z(uSqQDRZhA2=&ff~wTw2o;e?137}(E`Q? zYA9qW3TcW$o}$1P1->W{MS&;^L{Z3A6fzWr6h(nH3bavyIX0|=FA987;EMuZ6!@aR z7X`kkNPEock=BDK8d-@3Uo?oKK@<(5Xb?q9-bTP4ZhtiRqQMspzG(18gD)CH(Tevi zumFZ=FhqkPTEQNOqIsLd`=r*560Gy?h<8N1B?5ai*rUN74fbfmJ2Th=Z8T`3aigTc z9u3-P+$(9YM_b+s!aCTa!5)o_MT0#Ww9%lAM#iEY?>%7;B+?*}28lFCq(LGL5^0b~ zgGAc%))&@6A`KF0kVqrb(IAlqi8M&0K_VS^=M8({k_MMFNTfj`4H9XPNP|Q=@>U=B z`rwiQmkhXMz$Jsc$AD4>lro@{0i}%Kz5{##vkaJJz$^o188FM>zRG}L1_U$6kPLWc z6t_g+3+QG*Hv_sE(9M8u20Sz1nbF*Xfjw;S7!b^GR|f<$+{!_QWVo5b-5hYqASW`Q zjR9>8Xk$Pd1KJp5ItJ`9V2=TN4A^799s~9mu*ZNs27ECfia~B;z!w9)81Th_F9v)u z;EO@tV?Y}N+8EHrfHnrSF}UY4V2=TN4A^6k6B&@mfJ6rDF<_4YdkolP!5$0tSg^-} zJr?Y-V2=fRENEjv8wck zD(vAd%OVT1V2DLdWI+@QqF4~cA`7zMiv?dS_+r5q%k5(B7jwgyJI35HZtWRkwA?l3 zwlVjO!6geWS#ZgMOBP(R;F1NGEHWL7Ovi#m7MYF(mn^tsJuP5-;F1N2EJ$QQA`A9d zu*ZTu7VNQs_Aq#oexa2@0hfK$TOAcIe$aEZX8wW}`P|6{s)1ok+v$ALW#>~Ua^1A83U<3Jn7G8OAoEL4F# zj-{$rtqS&7tzx+fS&##l97yCqA_o#VkjQ~V4kU6Qkwb>$Kq3bcIpjnRByu2;1Bo0+ z5_fkX}@av+fdUmW=2K@^YN#)B^&eDNTP2T?qT;z1M-qIeL+Bh&HViw8A4 zavP6~#e)SNIf_?24xWJ%9-Q#tga;=)IN`wwkF3Ol6CRxK;DiS!JUHR8Z{v}%cre6+ zAs!6zpoRxEJgDJ84G(H~P{Si*@t}qWCp@we4^DWH!GjDQWbmwHvy{zRHjCM;X0x0P zPWV=J8@{l<4NiEF!GjDQ8H)!QJjmd|0uL5=u)t&Wd#rwsJj5dp@yJ6wIN`yGfIK9C z8UdUL;6y+k5boNEAS#01^d|C?J0c zAW;B&0@xG4mtff^#)ljwfIR`U3CKzUXcIu20NMo5CV(~pvDiDfWauqS{$0qhB&O#p2IXcIu20NMo5CW1B*w28=3B4`sqn+V!O z&?bU55m`wDZ6atBL7RxXo(T3tuqT2&5$uVeO$2QsXcIx32--yOB_i*MiuMp|@FgPC ziP))$U{3^lBC?_p=ejpF6DcI`aPj(mwG-yL7$+a zPf*e)G^0U@pV0J5(<>?f1toxj8bCo2pr8s+PzETd0~8bj3PZ0b3>4G?3QISr@e>sJ z399@AWq!i3D+>Jtm43o;L{$C>O8*44e}Zy9;W;8tH$2^-98gdXC@2UN)BuV=yVL}V zK)Zo|8l3yPM^n5=6FjU{Q;k{Y+P z#x1e2B-~o!#)-G{|IKOLyE79=Ei{Qn4%It0gd&1XN3K+|n4g zM8++Zu_S^d5!8|$w{*uX;c-iO+>#!*w8xT=X{nD}^5d5NSduv{1#(M*+|nSoM93`_ za!ZEXQXWeJs-;A3Ns(JxSwETjJ!FI=LlCZYh#mlH`^Oxg|rU4p~x&EU83JP0Ibr9Ey=86~~g2V@b_% zYSIufLP@fuCOI`7o3z{dGDGXz4DGKou;wPFYfnEU8A8lq07m4G}WbCrb*HQ=@ z%C(N=5XcQ`kR^r2l1k&$q#<&H(ql=raca^Kxk34{r2be^fGnv%mXsi;X3Gr@pOB$M zSyH1cDN>eHDND+fC6&pkNjJzGN|hzm$&&Kq)Z{mS$Q){tQ=GjB? z<^IR_wXZS{>8kc{TX)T~o^q>yQR+Ks`w-84>$BDI4Y5G)J`*qZ( z*3;r=AI7k6-Q0ReSD=UFOZWA#uk~EF`o0^}eYD^9uIF){&Br+l9!u-4QQGSZ_N}MZ z+E*j)v-NPVD-SoHc%1QkT4UHp>C0QYS#ev$Wi>i2=K1m9w5rZl$IqV4iz=@lsC`ub zH!i0iip6-k8ZAE*<@wRFJb89`!0Oot@$CB9wOP4d9Q|jtTpX;bysU0|{I-fm!b&UR zuiw=ls@4||psoKZjtB34dGX@qyLai0;_TvS;TxXeU3cPyk^0wq6sPBBMR}H&`TXi( zwzbET)J*oJWx|mj3cJb2f5z$)OtG+%(<+{~~%wPHBjSd@p zx;~-TJ)f1!@%mF}4nOVsY_w^2Up)?_zOMogVYoi#`p!T>ieo%d0d~7S(i= z)$v}jufy(B?&9cY@AKh$mX~jrzc>2XX8Mkniz-jkx4fLYUEIEl)3c)4bycoM)w;}Q z+2tG@>-o_qN6zY}La&bgnfiQHetKkc?b56mRm<|@X}#;2i#MFN(Q>w)FYf=;y|}RR z`Is6=%ChG_{+PY|@#Rl1e}4b&Y7K1~>!tUtXI^(XfywjBd2r!l{n+)BqTJ>*zbU8l zygdK!;{3LAonE-h;Onwg24(IWUgrk#XSwB_2o}Als^}MRe>2O`$x-7N` zmFy6zpV#wYQD&rv07=u0ss|xm>3Nrq^Nc9m4RwoSyYO457=4Sk5nQ_Ie$N*dY*a|M9xl zF_2yI&Hu?~zx6u`T~f(+$%o&H<+|V33fU!({6}6@)5S@aC+G5$;?D7SHCyJ@gN{a* zO!8lglu53N>}*u^I1*iw$#=8$#hz5pL(wItd^Ig*)sxh_8IWvA!wGIhQ@Pm^ht z&DXPPnq6(QA6%NdwF(f7>{bOR^Tl{M&sIe-KEBu!{HubC@)sR9yZx-w%K*H2`}(KX z?_dA><*oG>X^d9k=K4!M9%s{fCpNpv3`OaUVCl+eb~- zfBt!%Rnv4&lH6F;MUSoq>(3{1pJsekUCfZ1J(%RP)ux`x;%t_WiaQRxEYciY)b*+~ zWq8z8#~Th@uf8l=UsU39S}C)m5|`65n(a$m(nx3AzQnaJaqUZ7olf94DsfF0)m;X^ zFL6Ef64w?Jr*|c;En?B964w@C=uL@hix~8##I;2X`c&fDA`E>hacvQXewDbk2t;p6 zTwBDTM4-<V(qEprn@-!~t|{$HDTB^cr+xBv`Bs8 zsPY6<8ePgMPeP^9rBc#&l|~oIPVYq;UF14XMx)Usk9Xo*qj#`j z>a>XaU;Er|n07dGH=MS|yrmE?Jlt{L9Jss371&&mzUU}H{kfni<9R+|1f!W^8D)G zdV@z-zs&%&FhX0Vxov}K?bYl#r;&M)FE$)-znPEM#_lTf@mlQQcg~e|R&V-Yihbs@ z&wTc!;)k=#w^+~5&W|o&?aWm8W+Tlb)5ScN3JJm8*9X zlr78F&PU(zd$QP5uT#X^JJ!zZvTUv8k+SBx&mI1<@C8@WDJ)xCx8lfdI3ihC<@1#N zX2bJY@u8UUKzSrO99We_(e=3NP56tg`_&ij#a|zsuC#YJ`Xryv^LAi1`8g}{a#1h7 z^0V^1IFcI6jS^aR&+qU)Bi`jHOtr~kcSKIZQ{JHGHR}Ur`FuFe zyDqw`mHPRp*Nf>S1!dD=qdXmTwp`sjLpGcHZF=@aAy}=4mo9d-P}hR)M3DNxyU^oH zib={?VK@DH_ZJqElrb*&EsIH)*BdGKGMarcsl%DO;j}&OTGC!dbJrf;l*M>Dy2+V` zqow)(_O4{fZ5)Z->nn8Y?gKlpFMXN`_cC+v*x}_|!37LgwA)povOUY!4?s~89?g;{ zk#ZC%=ui<^Ahrh}Gl`b~nNU=mb5cL-gdQxI8_np%e!p+4eZucQFFxgBVG)hhP>fFz z7SWbP2Y&eFFoMLAr><5?mMmGa{m8MNVVk!lOP|I_CVdtrot|&fH?~6Yb4H|Ix^XBq+i(Z|k`Y_1-*f7zOjj zIv7~@>FeG4{*zaY%^SN{hF;y?`f1Q;%9Y?%#Y-xgWvr~IEF9sQJKt<6jmisIOUDbd zZHm*p;T%A>tX=9XB)CNat?P|AnpGtQ-qQ13zy#KN;w=1`N__7^# z4}J(3^;#ub)js|9@k8!&1uM2cyd$O`KK%C)Q$*|h{dK?I#S?$N z|Lh-W|F@z__|?ZWCE?kB->?64>)SBBOJ3!|lzFB~>Zi96F0))$esPstGygwH=}of^ zYX72r58RW+taL6O=N-3rfn`-aOnrxQ~AJhem8wLa7Y zVt@D$wBybfkjSuVB6?SnmpEj{>czpzlgR4H9ZlP$rD60?nrk z)CVK2EYx7*8Z;pw#rZVM0!=YZffV;oGbj@bq=xyT83zf<1Uxn1sX=3E&=VR~hgOh= z78Puc20S%rOl|Np~PS+ z=o?Bw#dr}tQuM4T=wdHIGbqEn4D%vZ9T%1|EDDww?fA4*V4DT{)QB@x_%OP%RADpF zwe5Jb9e1|l!BT@I8*G(DaBvhDxN=_Xq@wc=y55VJile|liF*-C=2$Psq7Y{$uqc6f z5q5Nj{mTp%h4Y!=+XlMMUc^@63kSn7VjAm3oH@Z)CDKWSiqLDOoMB#kBjcS{GMvvG z98@pv1N^hq59Eg^@ob4rQp$6Z=)(KAydFI#DWSWS=rd}|HTU5;&H04qC8dOJl9Kd2 zrDW}=l@gx)(&aV#v#=e_0k)QVZDPRI`|DI!TY2?6y?y!j7jIs*3A?jBfk*yXY3W~s z+3s)q(DsZ~UwWuDgpxLo?n8SvjhHaJC;Xla@p=2=)tmOH+tYDA($}{Qq>Td(nufOx zEhMqpFh-qaLTFtXsmc(~kQ?h*r8u>`Dr^;k54ht6*Vg1!E-IyD#dm-;VFS{tveG(Z zvNoBhM49DWlB_5=$qVYFsW&?r9WK8a$^)@jf-7qVp4SZjau2)VI1J7%V!0>AI{d=< z%Kdb?ji*}+V)vY7_;us1_jB>vypf!({)C$V4^HCEFC4h~m>2HjHyOWeLpPfHC_dbh zUG#MLbt9<5uP3tm_+T2VbyO}6=o5o_Y%G-$^hjI#LPR@J-+1R`6u>?I9>Oz1pas%Z_>*CjK%6{$i|nQ`w35lF11uG~U2vL3kT% z%dxKaU!E`m1bPApdqcQHB=&60p-}WqTMijFD+oK?xb-+h$12inbKUE*D!ChZw9xv=e^)tIqdGvSR52G=wTa-s&G31)Dx!j#R0)`>C z;?Cm6g)XL0}W z7$0hgzd4UyG0wXbkZr=X1017$k-`lQX){cnlOne99T! zVmtr?O#M7?!v62q`xb!jdflYiV zHC3#4E^!g7g>n%iCeGrxh!I#UiHjHk!(zCI z5g06nix`2ylDLQwFf56S7y-kwxQG!zEQX61fx&{fh%qQEjEksY4>Pe58|Yz%CEjCSE$bJY;Kl>Fn~5E#WI*mxpWy zUje&3WE=P@*ySNRyh~@7hlCm|o?IRhW^gs!@(@4rnz-d5zT~xV%R?N>tKpW1c$Qbg zEpJ@R^LE^n-CaUDYR63pJ^79s6I7qR`?+v%aikw}-rCAYj zzK&Kz>M3-zB2<`Zb+jN>w%25yqN4@TN~Aj@la5ZT$g_W;?Y{~5+WC=5P+1aFG6^7y z;Y=n$V=?T>Bxo#&N0|hWB{3?K0J1D@WfDjh!?H|*#&YP~wmb%A8eEpe z#7u+9!WfxpKv@_wGYu#s7#hW_p5Q!GU{mZgGk_-KLo=)+{U* dJ>Oz6YV-uhEyHVoV5eKG{{urUBLcaj0RWb7lL-I- diff --git a/common-primitives/pipeline_runs/classification.xgboost_gbtree.DataFrameCommon/1.yaml.gz b/common-primitives/pipeline_runs/classification.xgboost_gbtree.DataFrameCommon/1.yaml.gz deleted file mode 100644 index 1bc8198f799eef81f18edb2757c7567243eccece..0000000000000000000000000000000000000000 GIT binary patch literal 0 KcmV+b0RR6000031 literal 8575 zcmV-_A%NZ=iwFpDjU8S912HanVQp*x?48?l+eVg$-}P7UEi(_Y`+m{WdNNMp+9Z>* zo!P2vZLvTAlsMr<1%ghTzkZv#*!W;khIvUOh<c?+Simc9-WxYH*IUN*}^KzNqv$iaY(iPb_D|9)|qVX~qG0Lnoh0S6S^5~om zZfr+oW~?^RN1Gevj2BMkCxgMHI2*`dhojW_D4R})H+F_^U%z(f#sO`pCOUwV2P!d3}~TEa&Hwc{zMnR?Ep^el{?=D0D8Zib~~$ zOpXZcb(uS5lVjP)j%}$&LEQLKdTXp0+f#3&KTU6j)2t}Rr-fBM1ZSO$E`78r$7NN{ z^K!U@y{cR;R#jdu*M1KM`Q_^RkM!qazO1WkGOw4nFI~MqTwY}9s1!1x%*`7``EK%| zkHO${FwbV?+2B<=f78Fc{`=tTMOFUo?GJw+O#ErE&i*|3`lp`<|22@$g825kKaPhz zjKijj^GTjfhnMO6uBY-K+`Msl@qWp7uFw3eoGq&Nx2L^c)afHPuMK2cU?wUr#rm_q zEvjG3YPg=>xh^sj^6b>*GEMJb%G0QQkrr33y-KfHDNoiP9OrhFTUDIenB}Kdiu`mG z#_H5snPomp|LO6`<>a!QrrE!C;+6?2Ez3MgDO@>r&W%PQ>T#ZHoeps!x?uCNDDrVp zxadctQKpKK)z`5lH|x+8CQWfGPL*})G`XB7N5{pfmvKBwNXd=PPcEy)Xj;yi z=B2V;*{saQBPj||+EMUYyE5h`I#tMoDxHm_5qhlD*vir+BSB?$tfMwc-i4$aGbdFx zFBY@mGC}WbAWo{)d~JEM94_nhPF5Ae4*?m})v7#MpS;w`+2EU3Z?D(;YI?h()7dO0 zd7YT?Vd-9!2{qNng{>OuR?Xg2vp3!B zO(QPNu646(-3*g%#z{Ag>gITK(*xb~z%-4Trcr}a)3jrnc1+WbY1*;P!P%xA+q7ew zb}Z&V)2MA4wN0ZIMw^3kO%Ght1J^X_nnoRFUel;+8g)&hu4&Xajryih-yF7Ydf=NL zc+B^vQQtJ`n?^&^XlNP@O{1Y{G&GHd=CDK4gMcO1^dK}nh)oY-(}URbAT~XSO%Gzz zgV^*SHa&=3^;q@->t0~t3xk$L3m_1*&>W;}VFvcd772S2_6Q!r2DFjDja1|s#s~aJ z1?vjdK_CePNljRyPk={)JQB1?*Dl^}1S}+IA%P1CUPu5#f*2CWkYI)cG$g4Jc}2%19Sl(d7j-a1fguVEQNp8z1OP-) zAc_J*6d0nw5QTh2fgy?+ODko;_?W>kg#kkp7^0ZpFu?&|6!@aR5Cw)P33i@I`|!8hp{8YI#n zkp_vh=dCZSgG3r6(jbvWrlUb34H9XPNP|Q=@Xj0dz$FbXX^=>RL>eU0Adv=%bmXl* z?)AYX11=eG$$(1+d5;053@BwlDFaFw!F>n#0%jR7%Ya!1%ran>!F`nh!3+pykRciH z%qVV&z!%WXfNlnKGoYIR-3)kUz%!${2LpT9;4vVW;jRt{X1JAu49Rdahr2o8l0i;n zKpO+v7|_OmHU_jY$aD8Xk&2CWxyT-_873oASW^)kpYPe*kiyR1NIoO$AUc;?6F{v z1$!*mW5FH^_E^xyf;JX>vEYjZUo41XK@~$$>-;Byz}!97yCqA_o#V zkjQ~V4kU6QkpqbwNaR2w2fjG)#e*mwxs3;3Jow^46c3_!5XFNi9z^jVibtm7!50r| zc;q%78H)!CJaQDTcpN+fCpNb2~eH)zcAcF@PJTev!GI)@|g9RQe@L++*>i1av9(jmI9^#RQcyPjl69IWh z05t+Q5x|LnJS2b`0n`YfMgTPeI1#{!08RvC9s!w007C*85|D=k;bBDaaiZ6eqc z!JY{AM6f3!(}`eD1bZU#o(K{}&?bT}5qyatN(51&XP+1Y_!7aFh`XMMY$k#>5wwXQ zN~GqFqC2YYD7&NXj>0=C?Cv<^j&Xi4!}TD+DR5BW`MQk>UP=K(XRNqrtAd4ife zLG`boDo;?BC#cI4lmrXevdiI9sV6Aa6V&PniuDB5dP32zqFu`O1oeAD(=PRVf`UFl zMW3LgPiRJi57bx?P*6N5s23D1nK4<{5*ka=tR*#W zX^mTAV@bHR#K&#*aa(>&H^{oS9=Rn!ZfTIox|Z&^B|L6vjiq8*pTED5NV;JBqR zZi$RrDq~3mNg}8vJ8tQYTf*a(^0*~EZfTDtA=6SHx8%ny{jnr-S_#=8Bkla!vw>|4OdYbM5Lr@*oSKyTlQ|S4ODc{fCC8GQ z zfMgCe%aSr>Nu9E!7+F${EGb7$O&TI(s85y@D5oav5;D{%ONx{wRmzewWl8n1r2IHF z*_CS@%OQ{()F4X=jU|=FsYyfR2BpW6YU9+TA##KAV@dt7qySk`fh;LOPR*7Z96ljK ziL#_dSyH4dsZy4dDN8DoQ zvZPQsH65FtO?pKHY2KGnyez3+mXt3`>X)T%eUXaH(d{o*(NOEFRl3zPNA!U?>+jWm zCBJIC`IYE-OQCi6v8yLF+J|&*yC3&(tE}yU8xV(m*d#GwDz-7@9X8U#AQEz zK9R=d^!C1 z{_NF%q|KK{=@_#vt^P%+@1*TRJdds0)-k&KZFP@l|H#*eYrt_m?6-A@oaoO!M}PMG zJkG%`t%pR7zPFAuvPkbBTb)5>$r~k)OuR{?86w2 zt-D(f=?e6aeCfU(j%+Sg z*Ok9Eo{weLJ*2Tetyce{^p{tGm-$6G%gzQDb$z)!d;WY@*4g@j+RxVi71iWjIWH#5 zeDSWV-alJZ=g+SXSU!Izp5H#Zc37?E&;GMq%ukkeR@ECGzb&H)Ot zzsj@0o1ee_`sJH9>5cO8>S^JdpW!WyFhcyY9>vA`%d)!6s%&=iFx$1q&G|`hm-lF9uBWwx<4Y;YJ1by$Ee)48j<;&PeFUP!Kd33dfW45wJ25} zLv#Ac2d)A(nU(Xar;|5cOnsO0^ZMe9zh3ll;ML-{C0<^pnKG{@`EVWYCHp$o=vY+k!GE%SO&eK@V#p1F9(dCM2m)ogzFQ}^|iolnQqK+Lv@J8x9R`o@NzwUiay6ecZkJ{dN|7JUQY?rAq=lq{azNfLkzx~ z^m`t-4l(%WVwDn@UWdVV2*cZIa@q4Rgf1&$F}u3i>vbSvhd{jk$E#k)Kz7MD|0kRN z+V3cINhRMTAAT(ttA1ZAWS2bhA6Z>b=I6sKIhUQ6TgT&@=_0EibTqnTlK+~gOmbNc zFY~&`k?4|4zL~DB_N00qiY__jHR>)QG)F>%CmoW$`wv&dZ&q>K=NiR^=zX?DhOzr_R^sX*{Wiv(>bo3~x5t zPp-|~TLp+hcB=wZ*}PcHhRd=n&aU@)MS1seA2z((M~zp1{PBKRPtrX} za$~tJdUQ3o{(Lg`amEkps~K{)2jgtI+|*N5UQV;T+;ZSmndabpU9U=0hDY6Wyy3vr z^2?(2MI|nywB1xr`qC0t>Px#ll(?cWaZ}mtSmHXCxQ-<*y8aiHxF+*@o53GTTu;5k zwZp{eU5RUlSoEpHwL=(sQ{vho2E8e9?GS@LmAG~YL!U}qJA|QMC9WL;(VG(24l(Fa ziEEcA^sK}+#@6_;$hBKM`c>xIEfzg1bnO<5o|U?G3&ye7bu4xri(MUyT}F8uwkUS3 z^OXBL7Dk$V7rUI28@_DxnPS&{C8O=CU9+stFNV{_`6TBN+5G*6L+0iAdi0$gJ7`fU zYPYd~^QA?l)R*7K_okxtQXPv*$D-0_&)i+7?Qz$Xj-`}A=c>~&dAokA%VxnbciXy$ zL+5U4x6~~fq5Kx9oBCYp7L^Qgzp0xN!g;y()a}tHVcx~CuO!U7p!Si3#oNUFn1mgZ zuzSy(z2m$+?kw!-s5H7HT2Do#(It<10xFFz<(9syG`f^n`mWOGQd)TeDvd5hl_#Lm z=u%F35-N=@m6E=zG`dK3dN0!GBG-8`8jUV_wcH_b<9K%eq4=do-c~4OXB4u{}njC@#6f&kKc`-pTD^Ix7y&* z&2KXREsW5PX>QwKT6=5uyr+>_nawvGaM;YpS-qzpfmrt?I;PL}FFR+9KW0A1%;#7t zKFn#n>rTAeR^cZne7>4pzJGQFYiG8J#es{)k2oraDIY)LfE=KZ{D`A*fRgefj>ch% z%a1r9hbc2Z;(#2g;QWX~a)8qFBaX%)iqMZZ90w~$KjN75uq?fwxb&+$y`Px$tW3S1 zp!BR5cMcMItve;8!r-=7=Y?Rw`+1fa}ZQ1&s9qucxq*GY6wr<5UyWxmr zVVTWR_B$NCpO){+DG!v-M27?Gsw}%6ce@FHwROMz!oB$0gVU9Ei=)r8*(_@ZW|NjCWwFe&w3IFv%e#ZR z`MA~SdKq>-U|mJoYVEBL+-BW#Odm)0oJ7eBX*>OlgfpN|lxt}Z9 zx-X^oaV3jRZ}RDQB|BcpKAt&s$7y@qSF+HF<`k2jWmiFQ)&KQ5zyJN+`+xrHkE?+5WBtYFk$FuNdia-_ zb4_Ukvrci7tH!cM7PYfd+tM^vIjI`q%Cg8CVMh{JDgN3G?DOpvrC;7HBurk>Yfp{YlC z^ZN1r>%3ge-16b;s@pyIA=J`qm1tJ`^qa?zxqmBIX|Y@Z&G#U zq^cE0rJ^ zbEV^)nYuB?6a{m#DN8ANo*Qz8bXiwb*;JL!rYMSBRE72m!%JXe)GH=S{&`#cExUmD1F%RL+Qs=S?p2f-5E)AsWqVvF_ZZ69yEO7`C2yjr(pO&L6VTr8TtdHede57)mhwT8DY?Rl2DPVQlxqW}8z{lRlNKbH{RZZ{;V8v<8qnbebfkgVZ(zI_nC}Kgj{&V{ zpzTIM2^vZ;kSFx50nKL!%7dOZ4oYxw36?RC;`c0!0?RSYfE2gS3dj=-q=oU~KmS02 zJONJ&cv{ey7W9OL*%EAn*b4%b zxEC>JiRDVn3UL++vr_mj!j8_c|Cqt7a6AkA+CbOYi&!iC;1JkH!eY6Ivt;7mpRO1gM-6WX(^#2LeT z!tdD-pLf@<-?p2?o{jS{JOs|P!BKI*!J^`AMGMWGB_Vk2gi_X4q)i#(8FF&9sEp)J zR)woX-Bhxcg|v?HDp!>;x{~V!(?!^TcBZVX&4g|!Qt8zdxv89ZO2`2#^Td= zBRQM>31{f-9mLCD*gtib7xmqf7oXN)JDU3tKH8CX?%$hfw;e$pemf$&4|gVM8|RB1 z`b?)D>`Nxafx7Mr5#5UVUY4`#!9|Ddjv-!@w{u6#2B9Y*;EQJN2_|0Fki;{*2 z2S40|-eKX3B2EkqUsmr)c=)njJ-S#v)5W{O#ScStdM|Hai-^=szolJZ-)9%t(3iZ217zpJ zjN^%2@jkLkYkS|Az8{TIU0T}%i!KLrjpfqX9x!w{Pj(cS*7m@l%Mr4pxU{wh23<~# z9m}P)Jz(f^QtVhRt?dCrmm^}wb7^f4Ai5k5JBmwd`}j~@{LOLPTH6POF0SN=uC46@ zMHfGET&Dx~L8FT&Ii53t`ykQ9nHI6dF;1c%7>SigtVCiZy08)? z1f7GGu%zsnSqa5tyS4H-R>F~^#7ZPq;!wx!y4`5pq(oYapLM1!lJ#FEb@|#%#{c9T zhBp49`xUp^B-@{C|G|g^y`$g8ZTv5XUG8GvFNIz1 zV!khcUGB0>Jau-t%hK@F+2t-v!WY0UcUcU+0Cu^{GVn#P%UxD@r_L^S2{o8Jx!fhp z;9|JtE`H=Cam!tN$xGpuyEv2=!!395EH8#zUb>hkt+**>nK;9Wn_`mOxmVm2ael&z zn^KOa6}PnFc5uaQN4Jf8!7Z^OC+9FUE5dr{XsOl*Y4_66l1p`F9UYF0)W_>+Yn-)7 zM<*SfSdl@xahKEC?W?%sNx(0M9~m(!!!RWy24n!vWW;C;z@Cg4jbV6{5d$&|qcUPZ zhT>L649Ng2%ZSk!f^Qiy9D^}0BgSNc49t0OnHCds9!w_2$eah1i7_+h0i_Q^qXl>8 zSp8EZ_?cJ#NJ~z~ul|uDllDWU)xX5h4A6&DWoX{a>*}`LRhbt;(^$vM88)s4N`=n7 zah2Rj$7vE-m1vrzs1JAC

!}O*2`nQCKW~y2T=G;Jw2Hz)I= F0RY!Tdu;#! diff --git a/common-primitives/pipeline_runs/data_augmentation.datamart_augmentation.Common/2.yaml.gz b/common-primitives/pipeline_runs/data_augmentation.datamart_augmentation.Common/2.yaml.gz deleted file mode 100644 index e449db8be24e8e87ef231c9db3bb0f53d63caf39..0000000000000000000000000000000000000000 GIT binary patch literal 0 KcmV+b0RR6000031 literal 95641 zcmV+W{{#RZiwFp+X(wI)18#YBE_q>XYyj-t+mao}l_uzWJ_YWo`vTBjCqM9NswGvI ztm~kWs2ZzoYl9^+6BN3D1S|pQ&`&?U0g%|sK|=dyQbPTUXe*mhFnvdaM})hta9{D^ z^7+lH>p#8v^4?d!{Owo2`j`Ly#nr{Di$le)0DYZ~px7 z)y1ElJiNMoc5!p{@ZG^4e)XHb|JA>IaecE2{hc24aQ`6pRlZXBQhfd2gF`nH|DS)i ziu~cp?em*w*Eg@eTzkE~`TEJt^}|10-@bhE{N~GhkFH19Va7CFXEXiLI?uSeXd+WD zrnX0qFCWR(qpNo(^yt#uJ?8ys7mx49Be}T$==ypc`tlHurmRZziQ2)zTL;Yd-w0%Ts*t}^4_o3O?>)ae)C`N{rvgu^?&*5pa1KVq zzxRIr>)+k`f9~mnarp25_qh=D)ms z0-jwzdw%=pZ*Kpq=dV_gcmM64w6NB6svr0A$LF{Iaee#n_?VA=abXwX_xoYa`*Gbb z?oYpz`;Q())9Y((S53Zn(;oEdI^?3?zj`$FevEl}|Ivlm{Y&fPdd_(~{UB;I!rQ?JeZ@ngC4ez=TrF&{~U_-YLtWde*&mTR#es+3z>2q9Pua^91 zRaEVf>$r$VhpXnIk1roTYL6e8kH_nHtB@XAyPlU9SJ$rVKOFRGd~g5s?pR;ku8rr< z9=^Q3Ud0Yy+`hg!9{$P8hc92Pzg*m|lHXjFd#`R^Uw?7DgR5uz^4>rE>Z`Yn|Mk;v zn)kY;S5Gcqtq$bdcKrM6bvn22TlJMa`_;|W^`G89Uaz06LtOsH>lZKI{kK0{JbiuI z5!_$r^-bq-|DK%uZ$0_ncJjad|86J$yPy2;aq_?C>9Nv*Pme68hnCZ0%jv=8 z^yqSWcsV`3oE}t952~rUrw7&3gX-x)_4GJ;dK^8isGaWGPWNV~d$ZFj*y*nAbk~-4 zeOezst&g7`)KBZ8{7=iH_4xvN4RyymJ_Ly(jy zmyVvAp`>;wsUgZtS1`3cnR&c8y=HnyBSvbC>y;yIrEwy)O-*f6Q`^)-8lY19)71Vn zwLeYmPgDETJeuXN6567G>~a(W13o(ra?+mJ5tZXZaPA0N9uW> z=?bQnqp9U+YAc#rho<(RY4p;TV}P`m)E>0ueWo)@twVb{voyYG&tXQ6GtzmE^t&{A z>BzrJXO>1LO|3)IK&7dzSZWWJ+JmL`V2Av>6x|$hu$1ZxsdYMY+grl;2Esr`9s zW1e5A@*7neojku_dEQGpm(;2}wJJ|zm8U4rQ;YM+`%Lvqqm!q1El*>Wr?JXYi}Tdt zJhduMW0j{i<~_e^rsJh3&(mv{r*`Bc|1O#v_emk=j_KHWn#X zj5Ml68r34TzesH?QX7jjszqvXky>1&HWsOkMHBBDKv( zZ8OrS7OAC1YN?T0YNVDLX^e|B#zh*{T57i~wau0y&Xyw1md2`<+Gb0wv8C47Qfq9f z{k8PY)l&Owsr|Lo{#t5(EsbL>jbklEl`XZ@mRf2{Ew!bnvRU3|szYk2Ew$8^T53z< zUrX(_rFPp=OKqwBwbcGvYJV+HGp6>}@~mT?c1-QBZ4-~_Jo7wc8XH?`sV%k5mLkrU zT53xzwWYDKrSY$&II^YI*ivh3sr|Lo{#t5(Ew#UvT3kyluBA5C(%9H?8%vR6OXFrw zBV|t`WlycKr;)O!w%JqL?5S<`G>Z20ZrD@1?P*NysTKFsihF9sJ&mb7MWa29sXaxb zJ+=3q+Ivs!y{GoxQ+w~Jh4<9Ldzx|SsfG8{!h34rJ&ncP@)6S2P3^s>`InyBbWfvi zPvdP*<84nZyr&l4Q%u{_n_^Gxy{8u5(+JyB3-75F_q?2tT5(U~ZO>~BsTKFU+ORD* zq~qn)hrIldM&F)B-=11{Pc5~lmfF+U*wbj(Q@ib{-S*T{M;a+dYMUdq&5@$mky`3V zEp?>UI8yr?sr`-A{zhtlBel4ZTHHu&Y@{|eQX3nojg8dCMjBH`YH=gAxRF}iNG)!p zHa1cl8)jx;uo)T%~mRU<{QBek)S+So{KY^3+bky_PA*Qac)XB`+`Kr8YM5%HFoLmyWls^ra)D z);Q8AI#SylscnwbHpk3I$cu?-0WpoKGqu~9+U-p3cBXbaQ@fpMgq>-Gov9Vi)QV?n z#WRhYGqvKGBIB9b^h~4ZOf7t-7Cuu8pQ(k<)QV?nsWXj@Gqu~9-dd;Sy`-~Ft$3z( zJJV=5Q!AdS70=X)XKKYWwcDB6?M&@Gqv}b+WSoHeWo#Wrq(}Gd!MO=&lEM!)ZS-m?=!XcncDjt`3R{-c`-jP;O7%zD52>lH&AeyL=bYE>4|(zaVEH6%`)xkSyexmn>+*-ZFn`Ev@rS$^fAGBL zya0cQe3Fq5lvm^rc@h4Q*WC|!;r-C^u3A3ly!L)*dCz$*{*V{r4?Q0!FUKExKIgnV zf9QG7J?}ZM*B|nN{bA%Pj9i7ho`1;8^oN;ul^5y{bNg*R$(%W}rOrqiEoDoP`5;3k z)mL(U+MY7A<;6(eRc=<2o7I*Zvn9t!8V}cIu0n2QTY}7%A+x2(Y&kMpTFjOgvn9rC znK4^xjCej!ZjxKB%$6*(Wy@^oGF!gPmN2tr%t&sbTL#URLbK)2Y)Lc{`M9wiH(!n1 zx+S-6$*p_Kv)K}DBscag@kUxcjokJnxBV?KXUoiyo{yWmf-O~N%hlPEbtJca8M%Tv zdq+mDLhcGAcLiGpkIcO1+!e^y6{HATavG21Jf5{_c{69I<$fb)@2u8*$eVf3Ih`)& z^GHtUk(|-9Hb>sf$ITr@&hc5BdCyy3UG6tDcN978XKm&K<-t1V`mD{o=iE``bRWt2 zK9UoD)@H6kZu>a{NOB6$+T1b@^MP_>*WB21j*#TEptYHQn_Fnk{t?ZsEa&)0PV!ls z`M0?ZYHov?+hEQKlAH!4IS*(J8hKZ_(dAU2wVBU3H#*I&ET;-d&J|jlIW04f&N+EV za`uqq^dZUlLy{AS#I|(u91`aoqP3ZiE8Br`1@lmDdF;-4MQbx3DCfQA)FR2bMPgg( z>z4bvCBN?3ulY1`gS9Q?HILnv$8O6FHYXNI&MaD+xkkPnDAy>D&X!wf&Nf<`c~^PB z%}GU)vx?Sc-c@d}*0w$8lgvYE&O(x$h9o%;X>I1y$c;Ux8?9(7?>V<_%Oh+~K$4tu zv^KW{=PkoI*C@Ar%L8uCJd&JjBstqia=MY^d?U#TN0Kv+)@Dww&K*TgL0X$RqdJey zo;!-1qO=0lysO;yJrBn@X=z2Z`K)s5&bdizGw({bUFDO^t=n_^&G|}eGw<29UFG_2 zIqbQy`;dz|Cn*k~5hkr!q;-Ws;oCwB7*no^zVI=f>{2vFF?;$q7!9 zGo03D-g9owIRR>I=5x;NH|II6&AhAJeski}+RPUxH(1a4?l~1oaxRqQWGKnmP-`;Na*mVaWF^VjN^3Je47nXe9wj3;tCq*XoZYlG^CvpFmF46o$@xywa)jJ+NSqU( z)@J_RC%3_t+h9(6lAP%zIk{W(;Uqc5X>I1}=Qfygo+KwWt<8L( z+~{(qljKw<$+=EzGao3o!Is-#&Wu`{+b39g&)cV5+qYcX$6UF=w(aAt?dz`X^RC=} zb6S+N+?;col;k`q$%#^ubEDQ~zLL2)x7?gtZq7NCN^&lhHnsBr3^SRFc!E zq;DTx%eJdrh1^l(>?z6VQQRJ*C$!SxP^QI&xPOZ&cBijy?Ym_^R zp4)!UtvlyIt<8MgJO`B%qa z?3o*TPN^ZM$ZRYys*4=Y+?m3*!Nm*+%?>V>boQWkl z`ASA^usNw}ZRT%UbCVo-c5CG6tekZvInhdTrq$Za2a@eT`JD60R8GZ`oQown8B20D zmgEF1$r)IZQ?S-%K5lN3Iqzz1=3V9ZIj3bw&dZXVinTWLo^x}~*;tYjuq0<-tl&N&N9avIj!%)iZTFlSpyPPdYrVI?`KO2+oh_4d(qZdN&!N^&lhjQykRe3E&T z%o$XYQ>Y~8P)SadlAI|eIaNw>u9W0tDaqMVlGCLm=S!{4{7HCjk~w8+ZRX?VCOLB( z%!yKxGo>V_LrKnulAI8=HuIiyJDRy22PMngQ zIVCxDN^(Y&%zMskLr$8~oHV65X=-ieJ!i=wCrz!* zd~q`Ea2WZxGuLS5)5s9SA=?i*;YxGD)!NK-N!C#^R-{=g(##cUPRCN&P!!2~P6MvW zW~OXt%BH4lY)VGJa{iU({434*SDNi2&0Nvi9C>qNUa~}%o3mtHMKZ6FleIKwXKBvP z(wvT^IUP%LI+o^iEY0ayn$xi~r(zqt<8L(+}I@}Ez$HXdp7k#b6vZeG#`ChaOxwg7bl z05=$ElaV$WDK}@$&3UtuHY{n=k~9z2o0pVxs5IwL>BcE-p3(*?in)AL^ z&zBFB`<#qSY0e7MoE4_onbHhRspVcJXN74_16!N<;^clKXN76b3e%hwra3E2b5@w< ztT4@4VVbon&0Lk{Br(k>mFAo=%{gP5bH+61jA>S@H0O+I&Kc8;Q)x~T(+pB+&I;3< z6{a~WOmp6s=0q>e*wh;LByZ;5=GL7vyEJEZX$Gj)5PHa)dCwaSoLhHJ^wON@r8&_{ zGeosE^MP{v&3RvIGuLP{7IVyy^S;zKf^k|nW;#&zuhwSXRc^2u#L_Hc zY0eqboHM35XH0X>nC6@@%}HXKlf*P9iD`ziG$)B^&I(%ra;|S~&RNo0n|aT9bk3NT z=A1FjIb)i0#x&=Qt<8Mg+`4lbn8uE!ZiVwX=N1~dg=RfVGoPi|&sv-LG;;gR31*rT z%rqyMX%@CLCzxqYFw>l1ra8fEZRYys;W($CX-+@WoPMS`-%NA9nPy*0GpVIH-%K;A zrPDo@x$WnCGtJ(X=JYeo>1UeBEzQ|zYjZnp zu5TX7TaL^#*rhpFO>?fAW{FF4a@yL=yUL9{C#S8=yyrZ2=X^8G`DSY~|JJwP=IZC+ zIOm&b&NtIcacNFJTbubba$|2Bq@0^`+bHE6ink3@-ZbTnQ_ihBCzxqYFw;zNY0fv( zoNu-^^Ko-?&go~G)6X=!UYfJfG-sn}&PLOmjixypO>;Jy=4>?0`DU8)%{1qmX-+WH zoM5IoqfB!~ndXc#%^78yF)z&tW||YsH0xfPc`wbrm*xaB%~F@Z}DzB!KxAnnDoAW`|Wgg@+l?SI6VFi`x@6rxw?>_p2 z)01~QJV^ZAXb&vyZ>DV>lD1dZosPEx#C(1+)A8e=dH>^(wpUD%&d1jNPp_V}{CjC5 z|NhX@)n8+0zPgsLwl~Z56ML||DW~oId?fvK#U2l~f&D?Exgy;xnUYRM*2Pb|TQ~Fc zVyw5xRJnf0=QefwdA{_`)1N)xxRFb*cky(tk*~{o|4P@*WP9q@bCur>BJEx+(sf+# zRrxuV{2Uzbb2|QN7Ww!2^hLJ)ukCG3CGX!X{oPb@JF@Lg*@G>R%+-^8I%{Cghg;1e zweo|cW~n1z{dM-~DR22H^uzX4ud6-M&CJ{4`E+gMySZYW{FJXpC%5A5>T15*Zd)sr zk*afS#izet;qZek2-xmkZYalICEYF0Pls&{M|5lD>vo;~eZ74>*xpss_UdF)r?PHG z9(eQajt~Fo-y_$4b&7yCz*%|3#*f!qf9{D)lr5=CXtMn|b>gEwFKMQuyt@`=V@jRfJ=WFlk`D)rWAPvt= zYa6b8wrzj;QMW@bKeypCHyr8tQHXVKwwbzovA5tL-zh)jX03VbcgdG#Mb^0&$Rp$G zadXaYKJ)dIq$ab9r`y!>ZS%+-y6$1o8tc-@$zgjediwnqcf`yW%C{!!hkSbLwJG;- zGk3VYcNm`A%-lMtnFn^WzMWqFy{`@@KjgUs%O?{pH;HK5BcEd?pSe2<&nzcO!)mI>px`Wt;lEc=!ZUg)A zc&FET=Er)EpF{HQZREJYTE3d=g*0EDmU~Nc$!`OP9B0HK2Nq4YFs^;mriIkv$;EO#2V@Bff{dmFj>I{F9CS!zX| z-;~B-Zi9Rdf%dnXHn(2D4MSru(M3J@0{VY;S!savXoGOlt32XdBVDw@l5)>$y9!zRhm*{QfOl8{E39 zeKO*BF>_@0Dfc~y`QU1&dnX^flIMqX>oZ0U z>}3m)Jiqa`{o2m$Ns8@W`rKY=bC;4k({-@49a{t>zP&GR(*eD2FSy$aat;b~_!fL_ zvq&S4OYM;ROFQJ32%ol1wQRv`A6qN)Euzc)sc1fZ?Ro5wZMtFWjkn(Q7y_lpev3f1 zzIo(x^KB?`%^g(h`CPo^zB0yP>;1L|!4BJ;S?+>tZZk=TE#MozMNIkl7O}lpY%hd| z+yk2B7ZclNrur5zZq+?bRpeWptD{@QCEHYuZ11;v*j`+=x9RP@N46K4 z-+Fv>c0l?z5f|H(>((dc+R8S0+_qW5eJbS`Ugz@L__95kaoD=t?cKuiWP|NJw&r)0 z?Zr#C;bfcARNba7ws<%q55C**u)i9J^{rdU4@%#M@@-CSo3-(6S-`ht2+faa?!Lvh z=|;;#ars%%Bi`8{X5lF~mIJkvFQVqZ z>)vtgGuGQYz;VVqMG%%x?l|hEj%r&Fm|MJ(ukrXG%uG#Z3(9R9GBiF2>bEb!x5@R~ zwp8=udK_Z5)zfr3+uZoJfahCKx~(t9VQi7t_7?4VUFi6@rPn9h-}?JDeWd$npN2cz zX6m*V=a77(j;EW)m#r!5Hmf>i8_&izzHFVcj1PXFzl+?urk2m|7)a*T<~^?7LdWgd zI8LkX(anbXY=cCupZgZKZxc+%g_-e|uV%S7km=jB+!j1< zZ&KUPZ~KgC-x|QSxV1&AK0oNX(g?XNlI$-^$ApFy)>yu<#|5YS&YgqnV^@`f@$Ega zBJK&$swK#k>7=n;`LA8#(N+rCXlQ)&}=+^!T2amgBY= zkbQ<&_i-bKsK>rAo$fwC-11Ak?n_?VTlR6nb{|LbxSQ`~%ddI1=VV#F8rpIamiBE; zJdg5Cw^6d^w@CHeP-3(XrbzY~d>&^bPslG&(%+_bi!Vw<8k&cNi0uUqxD zM`fGbntgjK-FlsU$t&`sZ2JmX4mX?j?bU9d^WEP|_mJ#^x2Mz1!!`rEUHPr_+{3Hf z=KZiuPaL+j+5Do|WSe?xTN2hjjcC3_>iO(q8wh2~HgMUd6SkSaZE~gMKOejE)Is$; zS0gjWM{Ror(rxTIc95yV-In)cOCgK4=Va}(@7tRGwg{w>Tb=I{+}mPCjIlQZ+iqTZ zu0F_P**b;8Hva4rhg*i#_7dN7FnHKz@p6B+b>_#clhhryd?n_-8kqZj$#0{+^+fyC#6En7{i~UVD-@(46y=l*(+n&|Y_BVvFrQvNe61ugB z4?^&5iY>N~JoZ(BZ4}94;xSb>U&#FQ?{h2pT=x8(J#|(0Jck^8^*tKfqo?gcZsT$6 zC)>BKLboAeTUXpBPWJ&Xwsp;(Pe-=&LfxX7eBJkqYTe?}cKBerWlwO~?rIKG^IM{B zOPYOKn({4E={QEDOYK`nvSp%dra)jE6`g96}bx;`EG5Y(Kc}wzP%^! zNgmr9lIusNM@{!h?XA1`pqJk|zbzy`hV*&baO<~yn|kUw*wF1u2Jas(X5{T{W@uY` z-xgfARH4Yjh;7q=$F%-DkCtB+ZR<v*cvY?$0BmZZqq9B)NCcF+ZMQeH=Z~`N{S$ZiBqa7J6aTZgu13vKg5#}(uJrk`8FabV6}=r(^fx8#V}-^XNsW62FAcX8XD z*>X~1TU=^eeq?O#JKId#K3l)Vc6<8a+(N}|)ntFH#{PZG+@ihIrnczq7+`;FE;;ts z)@@~b%QccMSd(qm@q;|_Eu&>y2-oepA168YyxBbj+UDZ)BX{EXlD(%k?_>2;hB%8BXuMfJm@qzzb#$vUnLxKg7@w?FI8-gUv8rLMz;^T<{av7FQ;1q-0>TQeC4*9ZZ8G0(I7=XUo(%z9OZEc8hzDacnAY+d9NH^>zG8 zHr?xOVxq;ijB?mssF}Hbnb)v_A&PM!9#8(bB}_z7u_w%X#Zg9u=Rf1 z3XXh7(ozaow(rOKHvPFTNAJ7e61c{e=Dw{V#6BH0x93FiOu27)9Bo_N+h>T{{(fj% z{&CypipQ1=r`vMgzP7lhHhs9lo>RE?eBzkS4I zW1nEl&F+{>nCiDLv+UVgbN^msd+=?0U!40~!yoVVqteMz}(9gc6Ys%`I{`*)CX*k%%SpV!q7-Y55Wm^`t& z4Hbtx5gUEaj@Xlf@&wnGCbB(cA1wXtUwCaoz@BU|x0BgkH)CJ(-MEJ1d{Y|9_9$R` zo!!xtIPB&3X^H(4^qeKPg`$Uj`Cxk)9=ZEFCfM%l_bFK2f)v;7O+dD&BM%nG%<4R= zk_U_Z%bMc?d5*xg$-?d9m=COrZH_wjtaabJl>F_?mJ+#rg=zb{bbg21*CRgo++$0? z)GcHGm^ho?`L=1}wxxO;wkKmpyn|t!MCBE<98*N>k#(wDB`nJd`_6g+e(}uQ72p^0iTRQh) z%YWRzH`;E@{!z-F7>5Hw&k52lZ`&s)#~=h??Hgup6q1XgRozp50gmt z*mcX)&0mvki6Glc*go+2Hg_4@isf-w-(#@Uj<(g=&0N~E0=8WLeOTQeKl7kDwq(z~e>s+ioxW$wVZQb*jJdr;+*skoBpnY6s{r<`R57tHZ3^mD} z%MPz>G{|OxZQojKkHnF>%Wt{4qdU$*?rk}bh`Ht>JAJf|(7W@ug^{|Y%{)R(I-|x=$Rr1)@)O_~-C^r!9kPJNx4xn_AhH zKDh};zD+(g*>RLDYTrIgu+45cVl83=wflwVG_WQWaLlRd=PzR1&^quW2W{lJIY&UX9Kpl$D1N1tjZN%A+@TRQ7L z@v=3`d@tITY9hA3@@yHUvQLD5kPMi|^)1DEdyux#OSZ|wEfsKEpNzfB-B!`Y9$xLM zvwO1oHqPtDm+aC(-(Ll`b+E0^+1`D(WpvviZksD2x7l&oX8GFI0C$i_wr|4CcA|SvN8BmpZ9(gU?2Rqd-sVu+{&mVW8{f8E(%9x~ zcfq^ul-|~#a^62+thYE~Gu-TYw3Tk6Y}Q!w|b z$1OsdyA8PY{u^)LvSWMO;>b3$EdJrt=jrjc51@~ePJ4ds{>qtxn*A;4gYWRS{Fp5_ zXaCSd_sPm_x!Ct!S#~dZTYArT$Hw*syJh<9-$rh|#_=sGjr1E2x+7(M+nsNpH*Cwl zyLEc}k|nQ~?1@U-xA2?)xhFOqmz(n^!~56n+p5Ht&$p!l?CT}Ar83&CplnP1AE+(c z8+5e&Q<`nPI5zTpGm^aTxmJ6pz4NGBzoolEzNIc~5yWwBI!`C(`h58Pc24%$RwaF3 zqu5Nfw&(3{@xs=TY`IUqU3}GD4c@-b>-%?3TSJ%aQ1r!iP^q~ zlfyPE8oS}Ky>0D@{kkv6Zz;^N|LZf`-I`lF-PY&#nYlgipZR9D9pRW;teC?K+nt#$ z&m*?ak2UhMx6euJ*`wQtwWa*DZMA-zm)pN^YkLabcB%J&h(~gyXWQbNZy(_8%Q&&8 z+-%sAY>93k%*V?<#j^ct2wNB0ww0UM7xB(qPBfu zCR^I`9vJSmv+aAC*v!3TFFW?zm{5n%%*+d$ozS zu|Zrrf3i)`%l=OiY!mXn|Kko@QbOA@S z)vFgTzx>(Ho?XAXxVm_C@!nBar%!IKAKt#cdGLQ;KEL_m<*SR^S6|-yJ3Z*({z2}me5LZG_?O4HMpR<{&%Zks zdiDC{m+MrH|Lgk8d%ykN&wu{Q-~M*};d-6J7x(Y2V}9pO)(EKHqpzLZyZ8G)zqr1A zadCU`?9IOJ-Me`G^|R}nR}U|rKYjh|=G}k1ckft{FYo=Y|NUoWN_xI1Q9$sENefs7H-+%051>C#7IrhDKPoIDN^^=>gzuoc8#j~~BhYz3JJbCr- z;hX>T@r&WBDZJa)`wQ~T*2zWr*f|}q$6@IBo^$%xhmM!D~yiIi>o+{!}X(i zxE8-!PuF3#%ej6uu6sYMdw&>@ujY!1uGYK!qiz@1dUYkI|H$ZZm+KC`eDeI}^o&-; zu5Z46_4{=S>A$|c6<^TQ~Aa7%eM_~ zzZ$^F+@*Dg&^}3FaFP^?U zy|yo3-M+qj_4@YW>BF~e_R5|;cz@t`SK`6Dn{)ERb;55Rp?iO~Ld3uO_75+wpI%?C z2l}Kh`0f_)mmb32?Yo}C=YRYwK7wCeJbL}~;`TSs|MdCX~AAGIWsa=1ScK~0kzkl`W_Q|8yuRhut{`l8Q&bU@Lub;iUR4+f}#MIWZof`_{|{)9cizM8>+28p@;A6&ddB_w$AA6ZZ~p1ufBQfF{pbJsFTeQJ zSHJq#fBEeXzHeV$|LIQG=@UKHcE)9Tc71X4?ppogH~;?Me)muR?WgDTapD)#Vn?C2Uy7<%QOLhNVJLgh8`P=}H(^*(&^_%Ob>wWSM*Z=tX z*^{d$um1d#yI$jr>-E0F`pI9edCueYZKw5%;q=>+|(G=N00l`}+T$++6+k z#r5^oPwz^d6TH9gxPHPH>a6hn+b-)Te4);X-QV|BKj8~?PUQanBK_jY?K{ zY%MSjL(f|ayuG-&dj9O;%j@Hc$uT_nODnFuz1i{UZa>)R+w1b{-~8&IfA!U`{_U3^ zUw}0kJ-&MZ_IOJ!Zm%BRJb(4@|2%(kbA9#j)!Pa0hu^N!iNUt|Ns17t=9Vb_T_p&@=w-7@O{5{a|PG^{`MU6FK=EyefpihJb&rG<=~@n5`5 zpO=o`{^|45@#m%E&r8RjmyT(!*XO0<&r8RjmyZ9_T(8ed$Dfyuf8-1GdFlAf3-x*F z_{-2f)_;0dw{AMk%4>@lw@ZKi>_up z4nNYO>)MyLA7Rne`V{*y7hQRloc=0{hJU?vtUE6~-F30>Klw7#PkKG+^TH8!Q9iE_ z{lGo^C@Vytmx9i|9zWeRpU;as&^7tVuHt-Nu7R$}PkUYF^8yTfU4GIlEuWWEe!zwK ziLRY|UL--T$WL=Ml>dJGO%lM#}x~6HTwY{yB&Ml zEU*3LIl#Zv$lw!s^W%Se(_}t!h;34KzmT`rFP>gpUjM)+==ORw=9}Z%*=l?FSMPTG zJwJT?5_{~RTXxCqPFJPuqT8LWO4((%J6)C93vYM2G^Ll` z?sR2JFTUL^pe()ocDGAYbOG*e7p3MB+}*B8%|*DoU6Y#2aCf^VpBLhOA`5YMdra=Q z6z6XKL)^(@FZS^rAL89tNk7_BoE)@w`9Vu@A6wA+VQX>EE?!;!{^8T-Uq89bzqq*h z^Y{GZ=KAYn?aw~3zurpCaaE)p{73nR4Bzfc#@$zH5`OdZPxNqDm zH9x@xigR8ke@gS<;5+0`Wfl*6i~Olv73@9or*c)WH_4yMRXJcw{P>^1JbC)`;U6xZzRvUL-#!bM7sr1p=H~g$`-kmq>F@W% z?yXMsk9<1xkw*<#+>k@J_M`k$TW|Mehr2Is z=sF|&zE5L5FK&EZ+&H;&@g4W=C%w3F$6({k6%w|^jlaTG*}`AG#f`thHQD1|!o>}` ztMWHl+@QNEf0M-xx~uZHTil?#G=HX#SOYE^EX)Bpt~l2gT)QL zYx37$+~B(=fBnS`zH9Q=U)&&jMSL6J{?KWBISh$2^N`at{xO35_>Q%Y{$oz#_XnnW zIQWk~jsK1Cv`?&Fynglkdw%j}()#)bdr*Jt6@Z&Frs+>=`|Zs8N3HKwQH-d!`?4S5%Y!)&!{_J8`OlN{fAP*~8~r=(+~?`}zy0+5&Fg0`{`}xg z2-f_ET#~ZW^gqg#sXJl+qg?qS6!lKgty< zJ;DE@T%pnv{XfbTDmvl+<3xuwC;oq&Yg2Op;K#W(H5USYoNH5ZLEy)^H0OlO&tJcK z@%q)nlt1}bz94_4+MW{ve^c8=U)yt{*>7sw>1%sVWcp2Q2YqeNi88;b?WC{mIT7Q# z+UkM5wt7wo_)TqvzP9HCb>Gxh>1%sVnD$L=jlQ<$1YY0N*6C{t#8OHhODPabDSa%Z zKrE&7v6KR_l+wpi3dB-MA4@3^OBsDEWk4)t^s$ryv6RurQU=6QMjuNV5K9?-EM-6} zW%RL>0kM?P$5IBwQbr$384ybueJo`_EM@eulmW4n)5lT{#8OTlOF0lrIejeUKrH3- zv6KU`l+(vj4#ZMUA4@q9OF4ZkYO9c>11$`_PKr9vXu~Y!D zRHKij8W2l0`dF#~u~egvr5X@RHTqbp0kKr0kEI$AOEvmfssXW7qmQK;5KA@sSgHZB zRHKij8W2l0`dF#~u~eszr8*Exb^2JU1F=-6kEJ>gOLh8Ksspi9r;nvN5KDFXSgHfD zRHu)nIuJ{B`dF$1u~eszr8*Exb^2JU1F_VgkEI3>OAY#1Y5=j+ppT^n5K9gESZV;V z)S!>01`taP`dDfJvDBcCr3Mg74fOAY#1Y5=j+q>rU05KB$^SZV^X z)TEE4CJ;+a`dDfLvDBoGr6v$dP5M}B0rU05KB$^SZV^X zlpN@TsrOYltG@K%)cY!&RbTpm>U|Z?sxN&=^}Y&c)t5e~dS8XJ>PsJ1y|2Pq^`#H2 z-dEwQ`qGD1@2hZDed&X%_f4G@v!VjejBBcvIl`e>tF8o-!AX>WcbLoP3>B0}D3nHcqKbbCw znJ)Zjx*%%0@U!WHxaqVi1x!VjqnBB={Mr7no2F8r9fAey@HbLxV4>cS7I3nHouKdCN=sV@Ae zx*)2$@U!ZIxaz_Ws|zBl3qP$ch^;RCxVj*^y72Srg81sf53CC!tP4M}E{L%%{K&c> z%DV6~>w-Az!Vj$rBCQKQwJwOYF8tWKAlka{bL)b5>%tGN3nH!yKe;Z5xi0+Zx*+Ph z@U!cJxa-0XufMOtS@q?QuMm4(`0;f?^mXCq*9Gy{g&$xSL|_+wf?W`UUHB1pK@@i3 zXV?XC*o7Zr7erzgeu`ZXi(U9Jc0n|D;pf-|@z{kQWEVta7k-jm5R+Z_QFcL8cHw8) z1##JhA7&RsW*2^%T@agH_;GeYbavtA*#+_0g&$}aL}(X&qFoT9UHFl9L6mmkXW9jE z+Jzr#7es0oeyUv%t6li9c0sgu;pf@~@!EwSY!^gq7k;u`5VKwQ(RM-9cHw8+1##Pj zA8r>!ZWn&KT@brn`0;i@^mgIr+XeC4mEUhy#BW!Azg-c(UHScXMf`T<_uCcm+m+vM zSHy2ue!pE2zg_wLc18Sl<@eha@!OT(Z&$=`SAM@;5x-se{dPtCcIEfm74h4Z-)~pM zZ&!Z5T@k-s`Tcf9{C4H{+ZFNKmEUhy#BW!Azg-c(UHScXMf`T<_uCcm+m+vMSHy2u ze!pE2zg_wLc18Sl<@eha@!OT(Z&$=`SAM@;5x-se{dPtCcIEfm74h4Z-)~pMZ&!Z5 zT@k-s`Tcf9{C4H{+ZFNKmEUhy#BW!Azg-c(UHScXMf`T<_uCcm+m+vMSHy2ue!pE2 zzg_wLc18Sl<@eha@!OT(Z&$=`SAM@;5x-se{dPtCcIEfm74h4Z-)~pMZ&!Z5T@k-s z`Tcf9{C4H{+ZFNKmEUhy#BW!Azg-c(UHScXMf`T<_uCcm+m+vMSHy2ue!pE2zg_wL zc18Sl<@eha@!OT(Z&$=`SAM@;5x-se{dPtCcIEfm74h4Z-)~pMZ&!Z5T@k-s`Tcf9 z{C4H{+ZFNKmEUiFUxl;k%O77Me!KGf?TYyA%I~);;!(?uKa$xB7VE_`|XPO z?aJ@BE8@2+zu&Hi->&?AyCQzO^84+I`0dK?w=3eeE5F~ah~KXKe!C)myYl<(iumoy z@3$-Bw=2Kju87~R{C>M4e!KGf?TYyA%I~);;!(?uKa$xB7VE_`|XPO?aJ@B zE8@2+zu&Hi->&?AyCQzO^84+I`0dK?w=3eeE5F~ah~KXKe!C)myYl<(iumoy@3$-B zw=2Kju87~R{C>M4e!KGf?TYyA%I~);;

W-)@NCZv1|`A%45@`|XDK?Z)r78{)Sc zzu#_%-){VVyCHtN@%!zD`0d8;w;STO8^7Ofh~IAfe!C%lyYc(&hWPEq@3$M`w;R9T zZiwG*{C>M3e!KDe?S}a6#_zWq;

W-)@NCZv1|`A%45@`|XDK?Z)r78{)Sczu#_% z-){VVyCHtN@%!zD`0d8;w;STO8^7Ofh~IAfe!C%lyYc(&hWPEq@3$M`w;R9TZiwG* z{C>M3e!KDe?S}a6#_zWq;

W-)@NCZv1|`A%45@`|XDK?Z)r78{)Sczu#_%-){VV zyCHtN@%!zD`0d8;w;STO8^7Ofh~IAfe!C%lyYc(&hWPEq@3$M`w;R9TZiwG*{C>M3 ze!KDe?S}a6#_zWq;

W-)@NCZv1|`A%45@`|XDK?Z)r78{)Sczu#_%-){VVyCHtN z@%!zD`0d8;w;STO8^7Ofh~IAfe!C%lyYc(&hWPEq@3$M`w;R9TZiwG*{C>M3e!KDe z?S}a6#_zWq;

W-~PS|XVsTKzC!$VZ#RCw-4MUs`2BW6{C4B_+YRyCjo)uK#BVo#zugeO-T3`>L;QB*_uCEe z+l}9EH^gr@e!txizuox#c0>GjZ#RCw-4MUs`2BW6{C4B_+YRyCjo)uK#BVo#zugeO-T3`>L;QB*_uCEe+l}9E zH^gr@e!txizuox#c0>GjZ+FCRcYeR!5x?E}{dPzEcIWro9r4?p-*0!sZ+Cvb z-4Vat`TcfB{C4N}+a2-So!@VF#BXZ+FCRcYeR!5x?E}{dPzEcIWro9r4?p-*0!sZ+Cvb-4Vat z`TcfB{C4N}+a2-So!@VF#BXZ+FCRcYeR!5x?E}{dPzEcIWro9r4?p-*0!sZ+Cvb-4Vat`TcfB z{C4N}+a2-So!@VF#BXZ+FCRcYeS9eHG5CFMoW6`0dW`w>#puJHOxVh~Mt~e!C-nyYu_)j`;1) z@3%YRw>!Vz?ug&+{C>M5e!KJg?T+~E&hNK7;#puJHOxVh~Mt~e!C-nyYu_)j`;1)@3%YR zw>!Vz?ug&+{C>M5e!KJg?T+~E&hNK7;#puJHOxVh~FOketRH(d+_`1f%xsg@3#lyw+Fx9 z9*Ex_{C;~NetYoy?Sc61!SA;R;oK;``_zLmcgWqot#BUFNzdaDYJ^20hK>YUL_uB*U z+k@Y455#W|e!o2szdiW<_CWmh;P=}D@!NynZx6(84}QNr5WhY6{q{io_Tcy11M%B~ z-)|4ZZx4RIJrKV=`2F@k{Py7Y+XM02gWqot#BUFNzdaDYJ^20hK>YUL_uB*U+k@Y4 z55#W|e!o2szdiW<_CWmh;P=}D@!NynZx6(84}QNr5WhY6{q{io_Tcy11M%B~-)|4Z zZx4RIJrKV=`2F@k{Py7Y+XM02gWqot#BUFNzdaDYz480)4e{F>zu(>vzrFGM?G5qU z8^7P)5Wl_g`|Szu(>vzrFGM?G5qU z8^7P)5Wl_g`|Szu(>vzrFGM?G5qU z8^7P)5Wl_g`|Szu(>vzrFGM?G5qU z8^7P)5Wl_g`|Szu(>vzrFGM?G5qU z8^7P)5Wl_g`|Szu(>vzrFGM?G5qU z8^7P)5Wl_g`|Szu(>vzrFGM?G5qU z8^7P)5Wl_g`|Szu(>vzrFGM?G5qU z8^7P)5Wl_g`|SetSdw z_QvnGH^gsm{C;~w{PxE0w>QLZZ~T6HL;Uu}@3%L^Z*Tm5dqe#8#_zW`#BXo>etSdw z_QvnGH^gsm{C;~w{PxE0w>QLZZ~T6HL;Uu}@3%L^Z*Tm5dqe#8#_zW`#BXo>etSdw z_QvnGH^gsm{C;~w{PxE0w>QLZZ~T6HL;Uu}@3%L^Z*Tm5dqe#8#_zW`#BXo>etSdw z_QvnGH^gsm{C;~w{PxE0w>QLZZ~T6HL;Uu}@3%L^Z*Tm5dqe#8#_zW`#BXo>etSdw z_QvnGH^gu6{C;~!{Pxc8w|B&E@BDsyNBs8A@3(ivZ}0qmdq@2C&hNK(#BcBXetSp! z_RjCOcf@b+{C;~!{Pxc8w|B&E@BDsyNBs8A@3(ivZ}0qmdq@2C&hNK(#BcBXetSp! z_RjCOcf@b+{C;~!{Pxc8w|B&E@BDsyNBs8A@3(ivZ}0qmdq@2C&hNK(#BcBXetSp! z_RjCOcf@b+{C;~!{Pxc8w|B&E@BDsyNBs8A@3(ivZ}0qmdq@2C&hNK(#BcBXetSp! z_RjCOcf@b+{C;~!{Pxc8w|B&E@BDsyNBs8A@3(ivZ}0qmdq@2C&hNK(#BcBXetSp! z_RjCOcf@b+{C;~!{Pxc8w|B&E@BDsyNBs8A@3(ivZ}0qmdq@2C&hNK(#BcBXetSp! z_RjCOcf@b+{C;~!{Pxc8w|B&E@BDsyNBs8A@3(ivZ}0qmdq@2C&hNK(#BcBXetSp! z_RjCOcf@b+{C;~!{Pxc8w|B&E@BDsyNBs8A@3(ivZ}0qmdq@2C&hNK(#BcBXetSp! z_RjCOcf@b+{C@lUDx6hc{`d;<+dIGC-VwjO^ZV@`@!LDU-`)|wz4QC+9r4>czu(>w zzrFMO?H%#kJHOxF5x>3j`|Ta^+dIGC-VwjO^ZV@`@!LDU-`)|wz4QC+9r4>czu(>w zzrFMO?H%#kJHOxF5x>3j`|Ta^+dIGC-VwjO^ZV@`@!LDU-`)|wz4QC+9r4>czu(>w zzrFMO?H%#kJHOxF5x>3j`|Ta^+dIGC-VwjO^ZV@`@!LDU-`)|wz4QC+9r4>czu(>w zzrFMO?H%#kJHOxF5x>3j`|Ta^+dIGC-VwjO^ZV@`@!JQ#-#!q(eenD31M%Ajzu!I( zzkTrg?E~@K2fyDw5Wju!`|Shq+XuhjJ`lfs@cZop@!JQ#-#!q(eenD31M%Ajzu!I( zzkTrg?E~@K2fyDw5Wju!`|Shq+XuhjJ`lfs@cZop@!JQ#-#!q(eenD31M%Ajzu!I( zzkTrg?E~@K2fyDw5Wju!`|Shq+XuhjJ`lfs@cZop@!JQ#-#!q(eenD31M%Ajzu!I( zzkTrg?E~@K2fyDw5Wju!`|Shq+XuhjJ`lfs@cZop@!JQ#-#!q(eenD31M%Ajzu!I( zzkTrg?E~@K2fyDw5Wju!`|Shq+XuhjJ`lfs@cZop@!JQ#-#!q(eenD31M%Ajzu!I( zzkTrg?E~@K2fyDw5Wju!`|Shq+XuhjJ`lfs@cZop@!JQ#-#!q(eenD31M%Ajzu!I( zzkTrg?E~@K2fyDw5Wju!`|Shq+XuhjJ`lfs@cZop@!JQ#-#!q(eenD31M%Ajzu!I( zzkTrg?E~@K2fyDw5Wju!`|Shq+XuhjJ`lfs@cZop@!JQ#-~PS|XVsTKzC!%=!SA;Z z#BU$`e)~ZD_QCJB55#XD{C@jD{Pw}`w-3Z`AN+p%K>YT>@3#-cZy)@A`#}8m!SA;Z z#BU$`e)~ZD_QCJB55#XD{C@jD{Pw}`w-3Z`AN+p%K>YT>@3#-cZy)@A`#}8m!SA;Z z#BU$`e)~ZD_QCJB55#XD{C@jD{Pw}`w-3Z`AN+p%K>YT>@3#-cZy)@A`#}8m!SA;Z z#BU$`e)~ZD_QCJB55#XD{C@jD{Pw}`w-3Z`AN+p%K>YT>@3#-cZy)@A`#}8m!SA;Z z#BU$`e)~ZD_Q~(JPsDGZ{C@jH{PxN3w@<`xpZtFNMEv&2@3&9HZ=d{r`$YWq$?vyM z#BZPce)~lH_Q~(JPsDGZ{C@jH{PxN3w@<`xpZtFNMEv&2@3&9HZ=d{r`$YWq$?vyM z#BZPce)~lH_Q~(JPsDGZ{C@jH{PxN3w@<`xpZtFNMEv&2@3&9HZ=d{r`$YWq$?vyM z#BZPce)~lH_Q~(JPsDGZ{C@jH{PxN3w@<`xpZtFNMEv&2@3&9HZ=d{r`$YWq$?vyM z#BZPce)~lH_Q~(JPsDGZ{C@jH{PxN3w@<`xpZtFNMEv&2@3&9HZ=d{r`$YWq$?vyM z#BZPce)~lH_Q~(JPsDGZ{C@jH{PxN3w@<`xpZtFNMEv&2@3&9HZ=d{r`$YWq$?vyM z#BZPce)~lH_Q~(JPsDGZ{C@jH{PxN3w@<`xpZtFNMEv&2@3&9HZ=d{r`$YWq$?vyM z#BZPce)~lH_Q~(JPsDGZ{C@jH{PxN3w@<`xpZtFNMEv&2@3&9HZ=d{r`$YWq$?vyM z#BZPce)~lH_Q~(Jzpuhs_2rMR5Wju$`|T6)+b6%@J`ult^84)*@!KcA-#!t)ee(P5 z6Y<+8zu!I)zkTxi?Gy3aC%@l55x;%%`|T6)+b6%@J`ult^84)*@!KcA-#!t)ee(P5 z6Y<+8zu!I)zkTxi?Gy3aC%@l55x;%%`|T6)+b6%@J`ult^84)*@!KcA-#!t)ee(P5 z6Y<+8zu!I)zkTxi?Gy3aC%@l55x;%%`|T6)+b6%@J`ult^84)*@!KcA-#!t)ee(P5 z6Y<+8zu!I)zkTxi?Gy3aC%@l55x;%%`|T6)+b6%@J`ulN4*Y=oNgd9sF@1=2QitjnqqP?%u z1!6A5dtaps#9WB@zDgH}xe)Vxl`ar-A?o`oT_EN{-1k+wK+J{6@2hlym zpVZ;J8W)JU5dVFZE)a7e0{kjnAm&00_*J?<%!MfMt8{^w3vu9A=>jnqBEhfH1!6A5 zf?uTz#9W95ze*Q~xeyOt4*VeeNgd9saembA)Zx4u7l^qKCw`SK z5OX0?{3=}_=0dFaRk}dTg=q1sbb*))@#0tM0x=gN#;?)^VlKptU!@DgT!&&&{jcfJ3j4L$3uKdW9T%E#S~Ag&cY<;Lt1N&}#vQULl8G3pn%& zIrLh{jcfJ3j4L$3uKdW9T%E#S~Ag&cY<;Lt1N&}#vQULl8G3pn%&IrLh< zp;ySE*8&c`LJqwaaOf3s=(T`DuaHBp1sr;X9C|I_&@1H7YXOH|A%|WIIP?lR^jg57 zSID8)0uH@G4!ss|=oND4wSYsfkVCHp9D0QudM)74E9B5?0f+v59nPz9ftU+&=(T`D zuaHBp1sr;X9C|I_&@1H7YXOH|A%|WIIP?lR^jg57SID8)0uH@G4!ss|=oND4wSYsf zkVCHp9D0QudM)74E9B5?0f$~8hh7Uf^a?rjTEL-K$f4H)4!uGSy%uoj6>{jcfJ3j4 zL$3uKdW9T%E#S~Ag&cY<;Lt1N&}#vQULl8G3pn%&IrLhQ5?TLFjOAcx)xIP?ZN^j5&3H^`y40uH@F4!sp{=nZn{t$;&skV9_; z9D0KsdMn`28|2Vi0f*iohu#V}^aeTfR=}Y*$f36a4!uDRy%lii4RYwMfJ1MPLvIBf zdV?H#E8x%@MM208Rrz@azDp|=7Ky+IDW6>#Vca_FsqLvN5nZv`BB zgB*G*;Lsc7&|3kA-XMqG3OMuzIrLV*p*P5(w*n5mK@PnYaOe$k=&gW5Z;(T81sr;V z9C|C@&>Q5?TLFjOAcx)xIP?ZN^j5&3H^`y40uH@F4!sp{=nZn{t$;&skV9_;9D0Ks zdMn`28|2Vi0f*iohu#V}^aeTfR=}Y*$f36a4!uDRy%lii4RYwMfJ1MPLvIBfdV?H# zE8x%@MM208Rrz@azDp|=7Ky+IDW6>#Vca_FsqLvN5nZv`BBgB*G* z;Lsc7&|3kA-XMqG3OMuzIrLV*p*P5(w*n5mK@PnYaOe$k=&gW5Z;(T81sr;V9C|C@ z(7&(4c{MH&b3qQh6>#Vca_FsqLvN5nZv`BBgB*G*;Lsc7&|3kA-XMqG3OMuzIrLV* zp*P5(w*n5mK@PnYaOe$k=&gW5Z;(T81sr;V9C|C@&>Q5?TLFjOAcx)xIP?ZN^j5&3 zH^`y40uH@F4!sp{=nZn{t$;&skV9_;9D0KsdMn`28|2Vi0f*iohu#V}^aeTfR=}Y* z$f36a4!uDRy%lii4RYwMfJ1MPLvIBfdV?H#E8x%@MM208Rrz@azD zp|=7Ky+IDW6>#Vca_FsqLvN5n?*$xsha7q@;Ltnd(0c)g-XVwH3pn%+IrLt@p?Aok z_W};RLk_(caOfR!=)HhL?~p_91sr;Z9C|O{&^zSNdjW^uA&1@zIP?xV^j^TBcgUgl z0uH@H4!sv}=pAzCy?{gSkVEeU9D0WwdN1J6JLJ%N0f*iphu#Y~^bR@nUcjMu$f5TF z4!uJTy%%uk9dhWsfJ5()L+=F~dWRf(FW}HSJ|Kra3OMuuIrLG$p%2KRj{**TKn{HraOeYa=%au`ACN;I1swW-9Qr8W& zJ|Kra3OMuuIrLG$p%2KRj{**TKn{HraOeYa=%au`ACN;I1swW-9Qr8W&^ijZ}56Gd90uFsZ z4t*4G=mT=-qkuynkV78@9QuG9`Y7Pg2jtL40f#;yhdv58^Z_~aQNW=O$f1t{4t+ol zeH3u$19Ir2fI}aULmvek`hXnzDB#csJ|Kra3OMuuIrLG$p%2KRj{**TKn{Hr zaOeYa=%au`ACN;I1swW-9Qr8W&G8o`UW}lt$;({AcwvcaOfN4(6<5(eS;kOR=}ZekVD@JIP?v2=vx7YzCjLs zE8x&K$f0iq9Qp=1^sRtH-yny+6>#VqL2w-wHVN4RYvP0f)Xp z4t*=&&^O4TZv`Cs208StfJ5IPhrShX=o{qFw*n4*gBG8o`UW}lt$;({AcwvcaOfN4(6<5(eS;kOR=}ZekVD@JIP?v2=vx7Y zzCjLsE8x&K$f0iq9Qp=1^sRtH-yny+6>#VqL2w-wHVN4RYvP z0f)Xp4t*=&(7&(4c{MH&b3qP$E8x&K$f0iq9Qp=1^sRtH-yny+6>#VqL2w-wHVN4RYvP0f)Xp4t*=&&^O4TZv`Cs208StfJ5IPhrShX=o{qFw*n4* zgBG8o`UW}lt$;({AcwvcaOfN4(6<5( zeS;kOR=}ZekVD@JIP?v2=vx7YzCjLsE8x&K$f0iq9Qp=1^u2&X-yw&-7jWo1+1v&J+fJ5IQhrSnZ=sV=l_W}-mhaCD|z@hJuL*ENH^c`~OdjW^OLk@i};Lvx- zq3;D8`VKkty?{gCA&0&faOgYa(DwokeTN+SUcjO6kVD@KIP@KI=z9T&zC#XuFW}I3 z$f55A9QqD9^u2&X-yw&-7jWo1a_DCPhkim1{Vd?nPspL41swVbIrOuDLq8#heim@(C*;u2 z0uKFz9Qs+np`VaLKMOeY6LRQh0f&A<4*e|P&`-#rp9LKH2|4t$fI~kahkh1t=qKdR z&jJqpgdF-=z@eXzLq7{R^b>OEX90(PLJs{b;LuOVp`QgD`UyGovw%ZCA%}hzaOfxG z(9Z%6{e&F)S-_#6kV8KUIP?>8=w|_kenJlYEa1>j$f2JF9Qp}4^s|6NKOu*H7I5e% za_DCPhkim1{Vd?nPspL41swVbIrOuDLq8#heim@( zC*;u20uKFz9Qs+np`VaLKMOeY6LRQh0f&A<4*e|P&`-#rp9LKH2|4t$fI~kahkh1t z=qKdR&jJqpgdF-=z@eXzLq7{R^b>OEX90(PLJs{b;LuOVp`QgD`UyGovw%ZCA%}hz zaOfxG(9Z%6{e&F)S-_#6kV8KUIP?>8=w|_kenJlYEa1>j$f2JF9Qp}4^s|6NKOu*H z7I5e%u_F;3&dQILq7{R^b>OEX90(PLJs{b;LuOV zp`QgD`UyGovw%ZCA%}hzaOfxG(9Z%6{e&F)S-_#6kV8KUIP?>8=w|_kenJlYEa1>j z$f2JF9Qp}4^s|6NKOu*H7I5e%a_DCPhkim1{Vd?n zPspL41swVbIrOuDLq8#heim@(C*;u20uKFz9Qs+np`VaLKMOeY6LRQh0f&A<4*e|P z&`-#rp9LKH2|4t$fI~kahkh1t=qKdR&jJqpgdF-=z@eXzLq7{R^b>OEX90(PA`ZPC z_#OI_I-FNy`k3pa4(HXFKIS^9!+ABPkGW3ja9)k+W3H1roL6J|nCqkt=hc`#<~pgv zc{QewxlZbEUXAHvu9G^PS7Z8^>!c26)tKL*KdHlcH7*cyArAd2T_EN{9QswdK+J_W z^s97%m%!N4gt8{^w3vuXI=>jnq z;?S?s1!6A5p%!N4gt8{^w3vuXI=>jnq;?V1X z-=ROL!+A9>5OX08{VH7`=0Y6$Rk}dTg*f!9bb*))ap+g+0x=ij(67=3VlKp?U!@Dg zT!=%zN*9Q^5Ql!1E)a7e4!s`u9r}|xoLA!lF&E;{uhIo#F2tc zDqSGvLLB;4xDqSGv zLLB;4xw({)KdHlcH7*cyArAd2T_EN{9QswdK+J_W z^s97%m&&&{jcfJ3j4L$3uKdW9T%E#S~Ag&cY<;Lt1N&}#vQULl8G3pn%&IrLh< zp;ySE*8&c`LJqwaaOf3s=(T`DuaHBp1sr;X9C|I_&@1H7YXOH|A%|WIIP?lR^jg57 zSID8)0uH@G4!ss|=oND4wSYsfkVCHp9D0QudM)74E9B5?0f$~8hh7Uf^a?rjTEL-K z$f4H)4!uGSy%uoj6>{jcfJ3j4L$3uKdW9T%E#S~Ag&cY<;Lt1N&}#vQULl8G3pn%&IrLh{jcfJ3j4L$3uK zdW9T%E#S~A zg&cY<;Lt1N&}#vQULl8G3pn%&IrLhMM208Rrz@azDp|=7Ky+IDW6>#Vca_FsqLvN5nZv`BBgB*G* z;Lsc7&|3kA-XMqG3OMuzIrLV*p*P5(w*n5mK@PnYaOe$k=&gW5Z;(T81sr;V9C|C@ z&>Q5?TLFjOAcx)xIP?ZN^j5&3H^`y40uH@F4!sp{=nZn{t$;&skV9_;9D0KsdMn`2 z8|2Vi0f*iohu#V}^aeTfR=}Y*$f36a4!uDRy%lii4RYwMfJ1MPLvIBfdV?H#E8x%@ zMM208Rrz@azDp|=7Ky+IDW6>#Vca_FsqLvN5nZv`BBgB*G*;Lsc7 z&|3kA-XMqG3OMuzIrLV*p*P5(w*n5mK@PnYaOe$k=&gW5Z;(T81sr;V9C|C@&>Q5? zTLFjOAcx)xIP?ZN^j5&3e_x05YFr@Zf*g7);Lsc7&|3kA-XMqG3OMuzIrLV*p*P5( zw*n5mK@PnYaOe$k=&gW5Z;(T81sr;V9C|C@&>Q5?TLFjOAcx)xIP?ZN^j5&3H^`y4 z0uH@F4!sp{=nZn{t$;&skV9_;9D0KsdMn`28|2Vi0f*iohu#V}^aeTfR=}Y*$f36a z4!uDRy%lii4RYwMfJ1MPLvIBfdV?H#E8x%@MM208Rrz@azDp|=7K zy+IDW6>#Vca_FsqLvN5nZv`BBgB*G*;Lsc7&|3kA-XMqG3pn%+IrLt@p?Aok_W};R zLk_(caOfR!=)HhL?~p_91sr;Z9C|O{&^zSNdjW^uA&1@zIP?xV^j^TBcgUgl0uH@H z4!sv}=pAzCy?{gSkVEeU9D0WwdN1J6JLJ%N0f*iphu#Y~^bR@nUcjMu$f5TF4!uJT zy%%uk9dhWsfJ5()L+=F~dWRf(FW}HSu_F;3&dQIL+=F~dWRf(FW}HS z zJ|Kra3OMuuIrLG$p%2KRj{**TKn{HraOeYa=%au`ACN;I1swW-9Qr8W&J|Kra z3OMuuIrLG$p%2KRj{**TKn{HraOeYa=%au`ACN;I1swW-9Qr8W&J|Kra3OMuuIrLG$p%2KRj{**TKn{HraOeYa z=%au`ACN;I1swW-9Qr8W&L2w-wHVN4RYvP0f)Xp4t*=&&^O4TZv`Cs208StfJ5IP zhrShX=o{qFw*n4*gBG8o`UW}lt$;({ zAcwvcaOfN4(6<5(eS;kOR=}ZekVD@JIP?v2=vx7YzCjLsE8x&K$f0iq9Qp=1^sRtH z-yny+6>#VqL2w-wHVN4RYvP0f)Xp4t*=&&^O4TZv`Cs208St zfJ5IPhrShX=o{qFw*n4*gBG8o`UW}l zt$;({AcwvcaOfN4(6<5(eS;kOR=}ZekVD@JIP?v2=vx7YzCjLsE8x&K$f0iq9Qp=1 z^sRtH-yny+6>#VqG8o`UW}lt$;({AcwvcaOfN4(6<5(eS;kOR=}ZekVD@JIP?v2=vx7Y zzCjLsE8x&K$f0iq9Qp=1^sRtH-yny+6>#VqhQK^kDt7H`0Vx5S5F?kxP5;0`tt1wynJDm9JF36#w$j9;o|4+yDH#ZwkJ8{cY8cTWg=c{oT)h{>$I~cKyfOi<_(G&mO+K zzP|eM-tn*>-LcI$9Nz5sbhjVu^zFs?^>2Rl&%gTWSO50QFYe!4*YrF0*&j@a9{OgV zzh6JQeQ|Mn@$B7>um5!U^!3&C!^`JSUq8F~wi0iifP4Szf4|#&d$fQ1`+xe~FW>x^ z*DtYOtOwxt&#xX{UOavJ<_8};TT|C*?=J1r=U;#QAljk?5|8ed6`sV9bzyI={^z^#^ zO~r4Y|M6w|&)?tren9*C>f+Jsrx&-sdH%=K26*}Vb&N01-`Cq8y!_c84nMnme)H<$ z$<6icga7mL`OROa(^x0{^rIcdl|6g#{zP7^#(44a!7tW6|6#R)_rLtM2YLSt|DZd! zx|aSOckW$l`K#Z&pT9ZI59t@byS5)(&Nn^p5AAoML%+RnuU=fgc=+PU<$t_>@o<&6 ze)Z(p^n`mW*U?M~J{-agfDO0Q?<`m60Me$=aU&TGU!{_F34^H2Z&+yC+JKmXT% z`NbLn{`Fse`)RMrSJ!`f^;f$h=e%Y-yS})2cUgY%n}7drzx${E_SG-{`PV=Hv{&UH zQmudN%W}@U$jjHyeqvYUocEPy7k~PRU6pg*X`Vg#skNYU-g{0umfu`Iy?C{1^pCHf zJ-K@F>d%!g%{i0O?>m>D(v>-9^7?%T^HaJq=S*_H?__>TSLU3_@RN?_|9f(C_1hQM z*H@LV$yv7Y`!43EaZSz&b-(Rgej3;0oH+M=xAN1tCg()C@2|%%p4_gzKly7-!u;oW zNd%_tKPm8hw^{;I^`8`VzFRGUl!#9XIp3|8KpMm+#hdR|OCa^(lY-56t0j`|@F|lW z?l$9uq&R%a1c$rT5=m?Llt~SDt0j`k@F^1+?p8}Aec@9kFWjw`NXi0j?1S;jw6#Ri zL1|+jh*74EeIPoSHugcdnO_6z;uSErHnQlVZJht0fTod{UtIZnXqr zpHGVN-mR8E?DI(>-n-Qjh@V5{P|1DR6tYS^}}pCq-@V1~~w+&nJa!?^a77 z_W7iE?cHh##6F)Cti4+;f!ODhBDHs`B@p|3QkeE`wFF|HPm0mrt(HLSGiYO<0mMFo zHuf1n>@#R%p8>=^lQ#C5K@#U&p9#c1lQ#C5K44g_ndD#Do@J6j~4!T6kG#L0o9zg`owJp@o-*7Q}`Y zUL0Bw9a?yKXhD2v;RT`v5u$~ch!(_%7G5M;5G7i8nP@?rXyJvT1(BkKmx>m|iWXih zS`aN-c)4gnylCMCqXiM8g_n#L#EceRG+GcfT6o!LLELEJg`)+LqlK4_7Q~JgUOZY5 zJz9AAXhHmF;RU1x5u}BekQT&{7G6YJ5Jg&e8EHWrY2k&W1(BqMmy#C5k``V}S`bZI zcsXf7JZa$tr3DeCg_o2T#FQ3ZR9X;KT6kG$L0oC!g{1|NrG=N47Q~hoUR+ubU0Qf~ zX+eBx;RU7z5vGNgm=?sC7G7jp5M^3;nQ1|sY2k&Y1(BwOmzoyDnigJcS`ckoc)4jo zylLSDrv(wGg_oQb#GDphbXpK~T6o!MLELHKg{K9Pr-hfE7Q~(wUVK^*eOh?=X+ivH z;RUD#5vYZipccfS7G8u}5QSQJ8EQcsYT<>b1(B$Qm!cNLq846^S`dv|csXi8JZj+u zsRa?Kg_ooj#H1Eplv)s#T6kG%L0oF#g{cLRsfCxO7R06&UYuGGomzN#YC(Kz<@Ko* z@u`*9r&h$LR$iZ45uaLleQHH~YUTB*74fN+*QZv*r&eB{S`nXGd3|a{d}`(OsTJ|5 zmDi_M#HUtXpIQ;0T6ukHMSN=I^{Ex{sg>8KR>Y@PUY}YKpIUi+YDIi%<@Ko*@u`*9 zr&h$LR$iZ45uaLleQHH~YUTB*74fN+*QZv*r&eB{S`nXGd3|a{d}`(OsTJ|5mDi_M z#HUtXpIQ;0T6ukHMSN=I^{Ex{sg>8KR>Y@PUY}YKpIUi+YDIi%<@Ko*@u`*9r&h$L zR$iZ45uaLleQHH~YUTB*74fN+*QZv*r&eB{S`nXGd3|a{d}`(OsTJ|5mDi_M#HUtX zpIQ;0T6ukHMSN=I^{Ex{sg>8KR>Y@PUY}YKpIUi+YDIi%<@Ko*@u`*9r&h$LR$iZ4 z5uaLleQHH~YUTB*74fN+*QZv*r&eB{S`nXGd3|a{d}`(OsTJ|5mDi_M#HUtXpIQ;0 zT6ukHMSN=I^{Ex{sg>8KR>Y@PUY}YKpIUi+YDIi%<@Ko*@u`*9r&h$LR$iZ45uaLl zeQHH~YUTB*74fN+*QZv*r&eB{S`nXGd3|a{d}`(OsTJ|5mDi_M#HUtXpIQ;0T6ukH zMSN=I^{Ex{sg>8KR>Y@PUY}YKpIUi+YDIi%<@Ko*@u`*9r&h$LR$iZ45uaLleQHH~ zYUTB*74fN+*QZv*r&eB{S`nXGd3|a{d}`(OsTJ|5mDi_M#HUtXpIQ;0T6ukHMSN=I z^{Ex{sg>8KR>Y@PUY}YKpIUi+YDIi%<@Ko*@u`*9r&h$LR$iZ45uaLleQHH~YUA~( z4e_as*QYkbr#4=n+7O@GcztR^d}`zMsSWX|jn}6(#HTi1pV|YS+7X}Hd3|a}d}`YS+7X}Hd3|a}d}`YS+7X}Hd3|a}d}`YS+7X}Hd3|a}d}`YS+7X}Hd3|a}d}`YS+7X}Hd3|a}d}`Og$z;Pt5k@u`E?rw+uY4ql%+5T81Ded<7b>frUM z1M#VY*QXA|rw(49IuM^aczxOg$z;Pt5k@u`E?rw+uY4ql%+5T81Ded<7b>frUM1M#VY z*QXA|rw(49IuM^aczxOg$z;Pt5k@u`E?rw+uY4ql%+5T81Ded<7b>frUM1M#VY*QXA| zrw(49IuM^aczxOg$z;Pt5k@u`E?rw+uY4ql%+5T81Ded<7b>frUM1M#VY*QXA|rw(49 zIuM^aczxOg$z;Pt5k@u`E?rw+uY4ql%+5T81Ded<7b>frUM1M#VY*QXA|rw(49IuM^a zczxOg$z;Pt5k@u`E?rw+uY4ql%+5T81Ded<7b>frUM1M#VY*QXA|rw(49IuM^aczxOg$z z#_Llz#HVh&K6OKU>c;C+H^irIygqe9eCo#QQ#Zt?ZoEEqLwxGS>r*$xr*6DHbwhmW z#_Llz#HVh&K6OKU>c;C+H^irIygqe9eCo#QQ#Zt?ZoEEqLwxGS>r*$xr*6DHbwhmW z#_Llz#HVh&K6OKU>c;C+H^irIygqe9eCo#QQ#Zt?ZoEEqLwxGS>r*$xr*6DHbwhmW z#_Llz#HVh&K6OKU>c;C+H^irIygqe9eCo#QQ#Zt?ZoEEqLwxGS>r*$xr*6DHbwhmW z#_Llz#HVh&K6OKU>c;C+H^irIygqe9eCo#QQ#Zt?ZoEEqLwxGS>r*$xr*6DHbwhmW z#_Llz#HVh&K6OKU>c;C+H^irIygqe9eCo#QQ#Zt?ZoEEqLwxGS>r*$xr*6DHbwhmW z#_Llz#HVh&K6OKU>c;C+H^irIygqe9eCo#QQ#Zt?ZoEEqLwxGS>r*$xr*6DHbwhmW z#_Llz#HVh&K6OKU>c;C+H^irIygqe9eCo#QQ#Zt?ZoEEqLwxGS>r*$xr*6DHbwhmW z#_Llz#HVh&K6OKU>c;C+H^irIygqe9eCo#QQ#Zt?ZoEEqLwxGS>r*$xr*6DHbwhmW z#_Llz#HVh&K6OKU>c;C+H^irIygqe9eCo#QQ#Zt?ZoEEqLwxGS>r*$xr*6DHbwhmW z#_Llz#HVh&K6OKU>c;C+H^irIygqe9eCo#QQ#Zt?ZoEEqLwxGS>r*$xr*6DHbwhmW z#_Llz#HVh&K6OKU>c;C+H^irIygqe9eCo#QQ#Zt?ZoEEqLwxGS>r*$xr*6DHbwhmW z#_Llz#HVh&K6OKU>c;C+H^irIygqe9eCp2YQ+LFt?z}#AM||qe>r;2cr|!Hybw_;a z&g)Zm#Ha4OK6OWY>dxy^cf_aeygqeDeCp2YQ+LFt?z}#AM||qe>r;2cr|!Hybw_;a z&g)Zm#Ha4OK6OWY>dxy^cf_aeygqeDeCp2YQ+LFt?z}#AM||qe>r;2cr|!Hybw_;a z&g)Zm#Ha4OK6OWY>dxy^cf_aeygqeDeCp2YQ+LFt?z}#AM||qe>r;2cr|!Hybw_;a z&g)Zm#Ha4OK6OWY>dxy^cf_aeygqeDeCp2YQ+LFt?z}#AM||qe>r;2cr|!Hybw_;a z&g)Zm#Ha4OK6OWY>dxy^cf_aeygqeDeCp2YQ+LFt?z}#AM||qe>r;2cr|!Hybw_;a z&g)Zm#Ha4OK6OWY>dxy^cf_aeygqeDeCp2YQ+LFt?z}#AM||qe>r;2cr|!Hybw_;a z&g)Zm#Ha4OK6OWY>dxy^cf_aeygqeDeCp2YQ+LFt?z}#AM||qe>r;2cr|!Hybw_;a z&g)Zm#Ha4OK6OWY>dxy^cf_aeygqeDeCp2YQ+LFt?z}#AM||qe>r;2cr|!Hybw_;a z&g)Zm#Ha4OK6OWY>dxy^cf_aeygqeDeCp2YQ+LFt?z}#AM||qe>r;2cr|!Hybw_;a z&g)Zm#Ha4OK6OWY>dxy^cf_aeygqeDeCp2YQ+LFt?z}#AM||qe>r;2cr|!Hybw_;a z&g)Zm#Ha4OK6OWY>dxy^cf_aeygqeDeCp2YQ+LFt?z}#AM||qe>r;2cr|!Hybw_;a z&g)Zm#Ha4OK6OWY>dxy^cf_aeygqeDeCp2YQ+LFt?z}#AM||qe>r;2cr|!Hybw_;a z!Ru2G#HSv-KJ`F+>cQ(%55%V)ygv0neComLQxC+a9=tyFKz!=K>r)TJryjgM^+0^; z!Ru2G#HSv-KJ`F+>cQ(%55%V)ygv0neComLQxC+a9=tyFKz!=K>r)TJryjgM^+0^; z!Ru2G#HSv-KJ`F+>cQ(%55%V)ygv0neComLQxC+a9=tyFKz!=K>r)TJryjgM^+0^; z!Ru2G#HSv-KJ`F+>cQ(%55%V)ygv0neComLQxC+a9=tyFKz!=K>r)TJryjgM^+0^; z!Ru2G#HSv-KJ`F+>cQ(%55%V)ygv0neComLQxC+a9=tyFKz!=K>r)TJryjgM^+0^; z!Ru2G#HSv-KJ`F+>cQ(%55%V)ygv0neComLQxC+a9=tyFKz!=K>r)TJryjgM^+0^; z!Ru2G#HSv-KJ`F+>cQ(%55%V)ygv0neComLQxC+a9=tyFKz!=K>r)TJryjgM^+0^; z!Ru2G#HSv-KJ`F+>cQ(%55%V)ygv0neComLQxC+a9=tyFKz!=K>r)TJryjgM^+0^; z!Ru2G#HSv-KJ`F+>cQ(%55%V)ygv0neComLQxC+a9=tyFKz!=K>r)TJryjgM^+0^; z!Ru2G#HSv-KJ`F+>cQ(%55%V)ygv0neComLQxC+a9=tyFKz!=K>r)TJryjgM^+0^; z!Ru2G#HSv-KJ`F+>cQ(%55%V)ygv0neComLQxC+a9=tyFKz!=K>r)TJryjgM^+0^; z!Ru2G#HSv-KJ`F+>cQ(%55%V)ygv0neComLQxC+a9=tyFKz!=K>r)TJryjgM^+0^; z!Ru2G#HSv-KJ`F+>cQ(%55%V)ygv0neCo;TQ%}UFp1eNwM11PW>r+p}r=Gk%^+bH? z$?H>3#HXITKJ`R=>dEUr+p}r=Gk%^+bH? z$?H>3#HXITKJ`R=>dEUr+p}r=Gk%^+bH? z$?H>3#HXITKJ`R=>dEUr+p}r=Gk%^+bH? z$?H>3#HXITKJ`R=>dEUr+p}r=Gk%^+bH? z$?H>3#HXITKJ`R=>dEUr+p}r=Gk%^+bH? z$?H>3#HXITKJ`R=>dEUr+p}r=Gk%^+bH? z$?H>3#HXITKJ`R=>dEUr+p}r=Gk%^+bH? z$?H>3#HXITKJ`R=>dEUr+p}r=Gk%^+bH? z$?H>3#HXITKJ`R=>dEUr+p}r=Gk%^+bH? z$?H>3#HXITKJ`R=>dEUr+p}r=Gk%^+bH? z$?H>3#HXITKJ`R=>dEUr+p}r=Gk%^+bH? z$?H>3#HXITKJ`R=>dEUr+p}r=Gk%^+bGX zIq(A2C+B`%O=&}-lXE|>rnEuP$+@3bQ`)fTt5<2_7zq)qS83uH2{Ei!Y2p|OQLI;K;ur~WtmVK9S)ZKy zc{L@Dkr2yzl_rjn5Y2j(CXSI1&w7<6j*$@2dX*-Qkr2~*l_rjn5Y>8>CXSI1*LsyE zj*$@AdX*-Qkr3Nj4!pSa$+@3bQ{os2@vT>B;ur}Lu2*T|7zr`1S83uH2~n0dzB`Rkq{?)l_rjn5Gi|=CXSI1D_ah{SoX=epI1}j7zy#RS83uH z2@$hbY2p|OF|$``;ur~0vsY>27zuH+S83uH36Zl`Y2p|Ov9nic;us0hvsY>27zy#S z<-iMQpPc)7H6@Ob5JP*FCXSI1MSGPdj*$>YdzB`Rkq}9Hl_rjn5KDWNCXSI1O?#Cl zj*$>gdzB`Rkq}XPl_rjn5K~(Yyr}lcxt~{4;ur~WwO4847zvTJS83uH39+?TY2p|O z(Y04;;us0>wO4847zq)!S83uH2{E=;Y2p|OQMOlU;ur~XwuRW)7RcEaVrN?*XIqG! zZGoI^A$GO}a<+xo*%rvz7Gh^xAZJ^Moo#`fZ6S8H1#-59*x44y*%o4FTOem!h@EYL zoNXa?wgqywh1l5^$k`TRXImg=TZo-)ft+n2cD4m_wuRW)7RcEaVrN?*XIqG!ZGoI^ zA$GO}a<+xo*%rvz7Gh^xAZJ^Moo#`fZ6S8H1#-59*x44y*%o4FTOem!h@EYLoNXa? zwgqywh1l5^$k`TRXImg=TZo-)ft+n2cD4m_wuRW)7RcEaVrN?*XIqG!ZGoI^A$GO} za<+xo*%rvz7Gh^xAZJ^Moo#`fZ6S8H1#-59*x44y*%o4FTOem!h@EYLoNXa?wgqyw zh1l5^$k`TRXImg=TZo-)ft+n2cD4m_wuRW)7RcEaVrN?*XIqG!ZGoI^A$GO}a<+xo z*%rvz7Gh^xAZJ^Moo#`fZ6S8H1#-59*x44y*%o4FTOem!h@EYLoNXa?wgqywh1l5^ z$k`TRXImg=TZo-)ft+n2cD4m_wuRW)7RcEaVrN?*XIqG!ZGoI^A$GO}a<+xo*%rvz z7Gh^xAZJ^Moo#`fZ6S8H1#-59*x44y*%o4FTOem!h@EYLoNXa?wgqywh1l5^$k`TR zXImg=TZo-)ft+n2cD4m_wuRW)7RcEaVrN?*XIqG!ZGoI^A$GO}a<+xo*%rvz7Gh^x zAZJ^Moo#`fZ6S8H1#-59*x44y*%o4FTOem!h@EYLoNXa?wgqywh1l5^$k`TRXImg= zTZo-)ft+n2cD4m_wuRW)7RcEaVrN?*XIqG!ZGoI^A$GO}a<-M&*;dHeR$^ycA!l2O zoo$7jZ6$WL6>_$f*x6Rd*;Zm_TOns#iJfhQoNXm`wiR-=mDt%<$k|q6XImj>TZx@* zg`90AcD5CAww2h~R>;{_VrN?+XIqJ#ZH1g|C3dzIa<-M&*;dHeR$^ycA!l2Ooo$7j zZ6$WL6>_$f*x6Rd*;Zm_TOns#iJfhQoNXm`wiR-=mDt%<$k|q6XImj>TZx@*g`90A zcD5CAww2h~R>;{_VrN?+XIqJ#ZH1g|C3dzIa<-M&*;dHeR$^ycA!l2Ooo$7jZ6$WL z6>_$f*x6Rd*;Zm_TOns#iJfhQoNXm`wiR-=mDt%<$k|q6XImj>TZx@*g`90AcD5CA zww2h~R>;{_VrN?+XIqJ#ZH1g|C3dzIa<-M&*;dHeR$^ycA!l2Ooo$7jZ6$WL6>_$f z*x6Rd*;Zm_TOns#iJfhQoNXm`wiR-=mDt%<$k|q6XImj>TZx@*g`90AcD5CAww2h~ zR>;{_VrN?+XIqJ#ZH1g|C3dzIa<-M&*;dHeR$^ycA!l2Ooo$7jZ6$WL6>_$f*x6Rd z*;Zm_TOns#iJfhQoNXm`wiR-=mDt%<$k|q6XImj>TZx@*g`90AcD5CAww2h~R>;{_ zVrN?+XIqJ#ZH1g|C3dzIa<-M&*;dHeR$^ycA!l2Ooo$7jZ6$WL6>_$f*x6Rd*;Zm_ zTOns#iJfhQoNXm`wiR-=mDt%<$k|q6XImj>TZx@*g`90AcD5CAww2h~R>;{_VrN?+ zXIqJ#ZH1g|C3dzIa<-M&*;dHeR$^ycA!l2Ooo$7jZ6$WL6>_$X*x5G7**0Ql+aPD# zh@EYNoNXg^wheN&jo8^X$k{exXWJlW+lZZQgPd(6cD4<2wvE`?HptmFVrSbRXWNLK zZG)U`BX+h8a<+}w**3`8HezSnAZOc%oo$1hZ6kKJ4RW@P*x5G7**0Ql+aPD#h@EYN zoNXg^wheN&jo8^X$k{exXWJlW+lZZQgPd(6cD4<2wvE`?HptmFVrSbRXWNLKZG)U` zBX+h8a<+}w**3`8HezSnAZOc%oo$1hZ6kKJ4RW@P*x5G7**0Ql+aPD#h@EYNoNXg^ zwheN&jo8^X$k{exXWJlW+lZZQgPd(6cD4<2wvE`?HptmFVrSbRXWNLKZG)U`BX+h8 za<+}w**3`8HezSnAZOc%oo$1hZ6kKJ4RW@P*x5G7**0Ql+aPD#h@EYNoNXg^wheN& zjo8^X$k{exXWJlW+lZZQgPd(6cD4<2wvE`?HptmFVrSbRXWNLKZG)U`BX+h8a<+}w z**3`8HezSnAZOc%oo$1hZ6kKJ4RW@P*x5G7**0Ql+aPD#h@EYNoNXg^wheN&jo8^X z$k{exXWJlW+lZZQgPd(6cD4<2wvE`?HptmFVrSbRXWNLKZG)U`BX+h8a<+}w**3`8 zHezSnAZOc%oo$1hZ6kKJ4RW@P*x5G7**0Ql+aPD#h@EYNoNXg^wheN&jo8^X$k{ex zXWJlW+lZZQgPd(6cD4<2wvE`?HptmFVrSbRXWNLKZG)U`BX+h8a<+}w**3`8HezSn zAZOc%oo$1hZ6kKJ4RW@P*x5G7**0Ql+aPD#h@EYNoNXg^wheN&o!HrS$k}#cXWJoX z+lifRhn#IEcD5aIww>77cF5UwVrSbSXWNOLZHJs~Cw8_Sa<-k=*>=d;c4BASA!pl( zoo$DlZ6|iN9dfpv*x7c-*>+-Q+aYJ$iJfhSoNXs|wjFY|o!HrS$k}#cXWJoX+lifR zhn#IEcD5aIww>77cF5UwVrSbSXWNOLZHJs~Cw8_Sa<-k=*>=d;c4BASA!pl(oo$Dl zZ6|iN9dfpv*x7c-*>+-Q+aYJ$iJfhSoNXs|wjFY|o!HrS$k}#cXWJoX+lifRhn#IE zcD5aIww>77cF5UwVrSbSXWNOLZHJs~Cw8_Sa<-k=*>=d;c4BASA!pl(oo$DlZ6|iN z9dfpv*x7c-*>+-Q+aYJ$iJfhSoNXs|wjFY|o!HrS$k}#cXWJoX+lifRhn#IEcD5aI zww>77cF5UwVrSbSXWNOLZHJs~Cw8_Sa<-k=*>=d;c4BASA!pl(oo$DlZ6|iN9dfpv z*x7c-*>+-Q+aYJ$iJfhSoNXs|wjFY|o!HrS$k}#cXWJoX+lifRhn#IEcD5aIww>77 zcF5UwVrSbSXWNOLZHJs~Cw8_Sa<-k=*>=d;c4BASA!pl(oo$DlZ6|iN9dfpv*x7c- z*>+-Q+aYJ$iJfhSoNXs|wjFY|o!HrS$k}#cXWJoX+lifRhn#IEcD5aIww>77cF5Uw zVrSbSXWNOLZHJs~Cw8_Sa<-k=*>=d;c4BASA!pl(oo$DlZ6|iN9dfpv*x7c-*>+-Q z+aYJ$iJfhSoNXs|wjFY|o!HrS$k}#cXWJoX+lifRhn#IEcD5aIwu9K&4#?RKVrM%b zXFG_U?SP!^Aa=F`a<+rm*$&9r4q|6JAZI&>o$Y{}?I3ox19G;5*x3%q*$!f7J0NE} zh@I_#ob4cXwgYmugV@;)$k`5JXFDKgJBXd_fSm0hcD4g@wu9K&4#?RKVrM%bXFG_U z?SP!^Aa=F`a<+rm*$&9r4q|6JAZI&>o$Y{}?I3ox19G;5*x3%q*$!f7J0NE}h@I_# zob4cXwgYmugV@;)$k`5JXFDKgJBXd_fSm0hcD4g@wu9K&4#?RKVrM%bXFG_U?SP!^ zAa=F`a<+rm*$&9r4q|6JAZI&>o$Y{}?I3ox19G;5*x3%q*$!f7J0NE}h@I_#ob4cX zwgYmugV@;)$k`5JXFDKgJBXd_fSm0hcD4g@wu9K&4#?RKVrM%bXFG_U?SP!^Aa=F` za<+rm*$&9r4q|6JAZI&>o$Y{}?I3ox19G;5*x3%q*$!f7J0NE}h@I_#ob4cXwgYmu zgV@;)$k`5JXFDKgJBXd_fSm0hcD4g@wu9K&4#?RKVrM%bXFG_U?SP!^Aa=F`a<+rm z*$&9r4q|6JAZI&>o$Y{}?I3ox19G;5*x3%q*$!f7J0NE}h@I_#ob4cXwgYmugV@;) z$k`5JXFDKgJBXd_fSm0hcD4g@wu9K&4#?RKVrM%bXFG_U?SP!^Aa=F`a<+rm*$&9r z4q|6JAZI&>o$Y{}?I3ox19G;5*x3%q*$!f7J0NE}h@I_#ob4cXwgYmugV@;)$k`5J zXFDKgJBXd_fSm0hcD4g@wu9K&4#?RKVrM%bXFG_U?SP!^Aa=F`a<&_>v)v$PyAeCv z4RW>{v9sMEXS)$Q+YNHI8?m$9AZNP~JKGI%wi~gt-5_VX5j)!ra<&_>v)v$PyAeCv z4RW>{v9sMEXS)$Q+YNHI8?m$9AZNP~JKGI%wi~gt-5_VX5j)!ra<&_>v)v$PyAeCv z4RW>{v9sMEXS)$Q+YNHI8?m$9AZNP~JKGI%wi~gt-5_VX5j)!ra<&_>v)v$PyAeCv z4RW>{v9sMEXS)$Q+YNHI8?m$9AZNP~JKGI%wi~gt-5_VX5j)!ra<&_>v)v$PyAeCv z4RW>{v9sMEXS)$Q+YNHI8?m$9AZNP~JKGI%wi~gt-5_VX5j)!ra<&_>v)v$PyAeCv z4RW>{v9sMEXS)$Q+YNHI8?m$9AZNP~JKGI%wi~gt-5_VX5j)!ra<&_>v)v$PyAeCv z4RW>{v9sMEXS)$Q+YNHI8?m$9AZNP~JKGI%wi~gt-5_VX5j)!ra<&_>v)v$PyAeCv z4RW>{v9sMEXS)$Q+YNHI8?m$9AZNP~JKGI%wi~gt-5_VX5j)!ra<&_>v)v$PyAeCv z4RW>{v9sMEXS)$Q+YNHI8?m$9AZNP~JKGI%wi~gt-5_VX5j)!ra<&_>v)v$PyAeCv z4RW>{v9sMEXS)$Q+YNHI8?m$9AZNP~JKGI%wi~gt-5_VX5j)!ra<&_>v)v$PyAeCv z4RW>{v9sMEXS)$Q+YNHI8?m$9AZNP~JKGI%wi~gt-5_VX5j)!ra<&_>v)v$PyAeCv z4RW>{v9sMEXS)$Q+YNHI8?m$9AZNP~JKGI%wi~gt-5_VX5j)!ra<&_>v)v$PyAeCv z4RW>{v9sMEXS)$Q+YNHIJF&CfA!oZ2JKG&{wmY%2-63bY6Fb`-a<)6Mv)v(QyAwOx z9dfoiv9sMFXS)+S+Z}SYJF&CfA!oZ2JKG&{wmY%2-63bY6Fb`-a<)6Mv)v(QyAwOx z9dfoiv9sMFXS)+S+Z}SYJF&CfA!oZ2JKG&{wmY%2-63bY6Fb`-a<)6Mv)v(QyAwOx z9dfoiv9sMFXS)+S+Z}SYJF&CfA!oZ2JKG&{wmY%2-63bY6Fb`-a<)6Mv)v(QyAwOx z9dfoiv9sMFXS)+S+Z}SYJF&CfA!oZ2JKG&{wmY%2-63bY6Fb`-a<)6Mv)v(QyAwOx z9dfoiv9sMFXS)+S+Z}SYJF&CfA!oZ2JKG&{wmY%2-63bY6Fb`-a<)6Mv)v(QyAwOx z9dfoiv9sMFXS)+S+Z}SYJF&CfA!oZ2JKG&{wmY%2-63bY6Fb`-a<)6Mv)v(QyAwOx z9dfoiv9sMFXS)+S+Z}SYJF&CfA!oZ2JKG&{wmY%2-63bY6Fb`-a<)6Mv)v(QyAwOx z9dfoiv9sMFXS)+S+Z}SYJF&CfA!oZ2JKG&{wmY%2-63bY6Fb`-a<)6Mv)v(QyAwOx z9dfoiv9sMFXS)+S+Z}SYJF&CfA!oZ2JKG&{wmY%2-63bY6Fb`-a<)6Mv)v(QyAwOx z9dfoiv9sMFXS)+S+Z}SYJF&CfA!oZ2JKG&{wmY%2-63bY6Fb`-a<)6Mv)v(QyAwOx z9dfoiv9sMFXS)+S+Z}SYJF&CfA!oZ2JKG&{wmY%2-63bY6Fb`-a<)6Mv)v(QyAwOx z9dfoiv9sMFXS)+S+Z}SYJF&CfA!oZ2JKG&{wmY%2-63bY6Fb`-a<&JtvppbZdk{O@ z19G+pv9moOXL}Gk+XHg82eGp~AZL3JJKFujH#`*}5` zjgd~y{k)pe#z-gUeqK#!W2BRFKd+{=G1AGopI1}b80qBP&#NhIjC6AD=hc)pMmjn7 z^J+>PBb}W4c{QbtkxtJ2teWyV+b8FKUQLN(B*fWXrHNxC#Mxe@iDM+h*4beqK$9Vz=hc)rMnatJRhl?PLY(bYnm9&6ob6SbI7UL8?NypMMnatJRhl?P zLY(bYnm9&6ob6SbI7UL8?NypMMnarzJ@7i)C+B`%O^IV9#Mxe@iDM+h*_$f*x6Rd z*;Zm_TOns#iJfhQoNXm`wiR-=mDt%<$k|q6XImj>TZx@*g`90AcD5CAww2h~R>;{_ zVrN?+XIqJ#ZH1g|C3dzIa<-M&*;dHeR$^ycA!l2Ooo$7jZ6$WL6>_$f*x6Rd*;Zm_ zTOns#iJfhQoNXm`wiR-=mDt%<$k|q6XImj>TZx@*g`90AcD5CAww2h~R>;{_VrN?+ zXIqJ#ZH1g|C3dzIa<-M&*;dHeR$^ycA!l2Ooo$7jZ6$WL6>_$f*x6Rd*;Zm_TOns# ziJfhQoNXm`wiR-=mDt%<$k|q6XImj>TZx@*g`90AcD5CAww2h~R>;{_VrN?+XIqJ# zZH1g|C3dzIa<-M&*;dHeR$^ycA!l2Ooo$7jZ6$WL6>_$f*x6Rd*;Zm_TOns#iJfhQ zoNXm`wiR-=mDt%<$k|q6XImj>TZx@*g`90AcD5CAww2h~R>;{_VrN?+XIqJ#ZH1g| zC3dzIa<-M&*;dHeR$^ycA!l2Ooo$7jZ6$WL6>_$f*x6Rd*;Zm_TOns#iJfhQoNXm` zwiR-=mDt%<$k|q6XImj>TZx@*g`90AcD5CAww2h~R>;{_VrN?+XIqJ#ZH1g|C3dzI za<-M&*;dHeR$^ycA!l2Ooo$7jZ6$WL6>_$f*x6Rd*;Zm_TOns#iJfhQoNXm`wiR-= zmDt%<$k|q6XImj>TZx@*g`90AcD5CAww2h~R>;{_VrN?+XIqJ#ZH1g|C3dzIa<-M& z*;dHeR$^ycA!l2Ooo$7jZ6$WL6>_$f*x6Rd**0Ql+aPD#h@EYNoNXg^wheN&jo8^X z$k{exXWJlW+lZZQgPd(6cD4<2wvE`?HptmFVrSbRXWNLKZG)U`BX+h8a<+}w**3`8 zHezSnAZOc%oo$1hZ6kKJ4RW@P*x5G7**0Ql+aPD#h@EYNoNXg^wheN&jo8^X$k{ex zXWJlW+lZZQgPd(6cD4<2wvE`?HptmFVrSbRXWNLKZG)U`BX+h8a<+}w**3`8HezSn zAZOc%oo$1hZ6kKJ4RW@P*x5G7**0Ql+aPD#h@EYNoNXg^wheN&jo8^X$k{exXWJlW z+lZZQgPd(6cD4<2wvE`?HptmFVrSbRXWNLKZG)U`BX+h8a<+}w**3`8HezSnAZOc% zoo$1hZ6kKJ4RW@P*x5G7**0Ql+aPD#h@EYNoNXg^wheN&jo8^X$k{exXWJlW+lZZQ zgPd(6cD4<2wvE`?HptmFVrSbRXWNLKZG)U`BX+h8a<+}w**3`8HezSnAZOc%oo$1h zZ6kKJ4RW@P*x5G7**0Ql+aPD#h@EYNoNXg^wheN&jo8^X$k{exXWJlW+lZZQgPd(6 zcD4<2wvE`?HptmFVrSbRXWNLKZG)U`BX+h8a<+}w**3`8HezSnAZOc%oo$1hZ6kKJ z4RW@P*x5G7**0Ql+aPD#h@EYNoNXg^wheN&jo8^X$k{exXWJlW+lZZQgPd(6cD4<2 zwvE`?HptmFVrSbRXWNLKZG)U`BX+h8a<+}w**3`8HezSnAZOc%oo$1hZ6kKJ4RW@P z*x5G7**0Ql+aPD#h@EYNoNXg^wheN&jo8^X$k}#cXWJoX+lifRhn#IEcD5aIww>77 zcF5UwVrSbSXWNOLZHJs~Cw8_Sa<-k=*>=d;c4BASA!pl(oo$DlZ6|iN9dfpv*x7c- z*>+-Q+aYJ$iJfhSoNXs|wjFY|o!HrS$k}#cXWJoX+lifRhn#IEcD5aIww>77cF5Uw zVrSbSXWNOLZHJs~Cw8_Sa<-k=*>=d;c4BASA!pl(oo$DlZ6|iN9dfpv*x7c-*>+-Q z+aYJ$iJfhSoNXs|wjFY|o!HrS$k}#cXWJoX+lifRhn#IEcD5aIww>77cF5UwVrSbS zXWNOLZHJs~Cw8_Sa<-k=*>=d;c4BASA!pl(oo$DlZ6|iN9dfpv*x7c-*>+-Q+aYJ$ ziJfhSoNXs|wjFY|o!HrS$k}#cXWJoX+lifRhn#IEcD5aIww>77cF5UwVrSbSXWNOL zZHJs~Cw8_Sa<-k=*>=d;c4BASA!pl(oo$DlZ6|iN9dfpv*x7c-*>+-Q+aYJ$iJfhS zoNXs|wjFY|o!HrS$k}#cXWJoX+lifRhn#IEcD5aIww>77cF5UwVrSbSXWNOLZHJs~ zCw8_Sa<-k=*>=d;c4BASA!pl(oo$DlZ6|iN9dfpv*x7c-*>+-Q+aYJ$iJfhSoNXs| zwjFY|o!HrS$k}#cXWJoX+lifRhn#IEcD5aIww>77cF5UwVrSbSXWNOLZHJs~Cw8_S za<-k=*>=d;c4BASA!pl(oo$DlZ6|iN9dfpv*x7c-*>+-Q+aYJ$iJfhSoNXs|wjFY| zo!HrS$k}#cXWJoX+lifRhn#IEcD5aIww>77cF5TdVrM%bXFG_U?SP!^Aa=F`a<+rm z*$&9r4q|6JAZI&>o$Y{}?I3ox19G;5*x3%q*$!f7J0NE}h@I_#ob4cXwgYmugV@;) z$k`5JXFDKgJBXd_fSm0hcD4g@wu9K&4#?RKVrM%bXFG_U?SP!^Aa=F`a<+rm*$&9r z4q|6JAZI&>o$Y{}?I3ox19G;5*x3%q*$!f7J0NE}h@I_#ob4cXwgYmugV@;)$k`5J zXFDKgJBXd_fSm0hcD4g@wu9K&4#?RKVrM%bXFG_U?SP!^Aa=F`a<+rm*$&9r4q|6J zAZI&>o$Y{}?I3ox19G;5*x3%q*$!f7J0NE}h@I_#ob4cXwgYmugV@;)$k`5JXFDKg zJBXd_fSm0hcD4g@wu9K&4#?RKVrM%bXFG_U?SP!^Aa=F`a<+rm*$&9r4q|6JAZI&> zo$Y{}?I3ox19G;5*x3%q*$!f7J0NE}h@I_#ob4cXwgYmugV@;)$k`5JXFDKgJBXd_ zfSm0hcD4g@wu9K&4#?RKVrM%bXFG_U?SP!^Aa=F`a<+rm*$&9r4q|6JAZI&>o$Y{} z?I3ox19G;5*x3%q*$!f7J0NE}h@I_#ob4cXwgYmugV@;)$k`5JXFDKgJBXd_fSm0h zcD4g@wu9K&4#?RKVrM%bXFG_U?SP!^Aa=F`a<+rm*$&9r4q|6JAZI&>o$Y{}?I3ox z19G;5*x3%q*$!f7J0NE}h@I_#ob4cXwgYmugV@;)$k`5JXFDKgJBXd_fSm0hcD4g@ zwu9K&4#?RKVrM%bXFG_U?SP!^Aa=F`a<+rm*$&9rZp6-ZgPiR~>})s4*>1$nc7vSl zM(k`i$k}eh&US;G?MCcuH^|v;#Ljkuob5*JY&Xc+Zp6-ZgPiR~>})s4*>1$nc7vSl zM(k`i$k}eh&US;G?MCcuH^|v;#Ljkuob5*JY&Xc+Zp6-ZgPiR~>})s4*>1$nc7vSl zM(k`i$k}eh&US;G?MCcuH^|v;#Ljkuob5*JY&Xc+Zp6-ZgPiR~>})s4*>1$nc7vSl zM(k`i$k}eh&US;G?MCcuH^|v;#Ljkuob5*JY&Xc+Zp6-ZgPiR~>})s4*>1$nc7vSl zM(k`i$k}eh&US;G?MCcuH^|v;#Ljkuob5*JY&Xc+Zp6-ZgPiR~>})s4*>1$nc7vSl zM(k`i$k}eh&US;G?MCcuH^|v;#Ljkuob5*JY&Xc+Zp6-ZgPiR~>})s4*>1$nc7vSl zM(k`i$k}eh&US;G?MCcuH^|v;#Ljkuob5*JY&Xc+Zp6-ZgPiR~>})s4*>1$nc7vSl zM(k`i$k}eh&US;G?MCcuH^|v;#Ljkuob5*JY&Xc+Zp6-ZgPiR~>})s4*>1$nc7vSl zM(k`i$k}eh&US;G?MCcuH^|v;#Ljkuob5*JY&Xc+Zp6-ZgPiR~>})s4*>1$nc7vSl zM(k`i$k}eh&US;G?MCcuH^|v;#Ljkuob5*JY&Xc+Zp6-ZgPiR~>})s4*>1$nc7vSl zM(k`i$k}eh&US;G?MCcuH^|v;#Lo6N=l-@vkDt7H`0Vx5S5F?kxP5;0`tt38U%tAy zef8zNzte*r?jPj7%2z61ihp_N4@`ZE{LjDprr@jB-&Xm!wf6bj-~IgOzx?fQ*MGdd zxVd`%?BUDn>#HyC9Rrq+?|A*!@n*-TyZvCNZ!f~HfAgz<{?%8%`nO+xasS@Bmfy3_ zscO^T?DO~QXSXjdZZDp_+wsNK)x#$@SJ!`fc=`P4>t{FLR^!dRzxVj!>C10!!`nmt z+u#4w?|%8_zg++6^6BfV>+k;I|N7tG^Q&hUuP%T8@aglfpIqi&T-^Nmdwz0r{q?c- zXP?;Xm)J?I$MW~juO41rJbn7+2OqtbGWwytyYZ_QUq89|`r949eVQJw3%VYthkx-Q zTKigWCFeavAHTnANB%wMw727TPxv>R-`&u!o;)Qwa=jHR8zfM=SPWtIbyRj>K_Tc@AyjWeu#mfi3So{2k)kfd{^4tFH{WJW- z?%a3Wws%eCuYTu#{^mG8q|5%;Xms=X*~1r4F8|~8i38v&2o4_`fBJ#u?nBzz*ST@g$IodD z`8=e39@5UZbA^Vq53c3=9s%y7KaZt)J^TI_-rL*fw-2A+JiK|n zPW8p>ZS-Ex#9w+=6%Qf zhl{7L-*g5~pMGSYkFVE5aeMvpW4m49$K~oKS4-Y^e)0O%^N;NO>Gj3qb(OC^zI*hn zpWHlr^6bUyb=&{EF3IKX^>HuhDjpsjKJw$27tdZiy?*)T>^^p07mr@9TlDIKbCWan z{qn`rC$E0U&h;ZZ|Ks(OuYdpQ;p5wj%UA1p|F8o0zOULh4_8P0eHB)9o`-J+TN@G#=-0MPaD zL&6C8SQzncR<8e;DJRE$cC*9b$6Pi0>kiaItp8X8^$@vhQpcMsc;ulz57eIr>dyo9 zIrs1LK>Y)6W6gK=yAHOab^RfOt^TFKcD-NsAM(zA|Na-pF6H`9YtLW(^0#08>R8@aXdLaT{|yTE~8}D)NiJe|Yofhp#UF^yJ~y@oBqy`0ij2j|cza`^K&I zVD^K(dFj2r`NNaj=Qpe8dNaMe`l{6%K3osan~v<>qw94xhZ)oK$}grry1wwZx@aO( zFQ&Ffk1rp|)uXF-=kw^&+&$*~X%~<0$0ND8|LFR99s2SRkEX=1H{XriUO&FRy}r4; zet3uD-Cn~`VUVo z-?z^D?;gfu19VqzxRIr>)+k`f9~mnarp25 z_gvbNgRCf3=Fd`)~K8 zg|((r-w=v_e17{M*S8OkkNM~q7j_YTzaQqjAJ_fj{`5<^|L9RPy}s6V)#Qs~!MKt~ z^YQhg`BpMtD#)RDzg4h+USq$^6KKzW3!l-m+#u+`n^6@xwTj>-<;B&t>-rA|y&B)!!M!`y7pX%(eDTdGzdSCLtw`?n>f7pkyXODm_iq+9 zfBl<({l)Ko{>!&r$Qp3oHU1ZGN66I;ecK!S{(7C#?fbrA3wZ8-$H4z)o%EzTS!3M$ z?-}pkJNf67*2X;9IQeHy)`p&JoN&l%!%sI(_vWX^@zY(O_>XJjbk`>i+uG=-8>c&H zr)SVk586(D(@uZWPwUf9chFA{I!=p?)5?w0UC+}U%+npr(}T+4khU~!dD=?96G>aA zmr^wCQ!Q2K*SNo#2n3-&s0q>uD?P#nWCq9naJ6($k~q>Cy1CmG%;OFX`wl?K3@Rnw~RFPnPy{ z^z3re{fKs{2f5HghL!Y0J}APh07C5^3x7 z2$r5DQ%zgx2x(Zb^h}uNzoqj`gNCK&#Ukw`((lsqVv)|erQf9|#L^I9=?SqkVp!^z zEcHE>o&rnv-O}J-X>{<^>-(Xlt)8~hkeT`kPyK{T8g|p&bV)}@{e-8x>1pin)N6R^ zCwvP4((lqp;+FT4YLxmAPtS{QQAOHk8jn0ZA)a~-PrZhxXTsAn;c2Y$)Q5OWSGT9% zr9Q+{ui>fJ@DxgT>NR}iNGa7J^%|bW9G~fTsh{xFPegjoB0Xnu$R%@ZmV&d?Glb^A zEk$mcj-dH>=?Lk`iu7bfdX^$RJ&~S}NPS2|-b#DPab$W5BHigo{Y13^c0NLHb;62MtTZHdK(<+nHZ_x9ck1ao?npD zKGUc@Qkx#>DHxIemX4R6kdfN$NU_^U&%{Wt`=jS~_w={)yo_{jN4mEoMd>rO&6)1s zOnvK2cWkDJY--+0dr5a}rjc@{`!`chJ5%2}(>zgHq%tW zA*u4ym!O&AjJ4m2>d?Ti*)jDnveRi9R5^tNg z+a~U|nY(RzPDZYNzM+zDsLcG^JY%Q%4r=Zw^sp7qbFjIi(0t2uo1)cRIK4Qn&B&Yi zK$drv?~&%NAWzwe=B_|JA2)Xed7^G@<{IU$AkW8%wtSNLVaOA7Ycp4&I->JLCqE3HZ?~rxzqOgqIgbmT9|q6eNS>{eJY6SwPHt`HlWbezd>Xm2dww=NKO3G0 zpgdD2c{*-w=EAu-M~*flx9-ScK%SJ7JS!)8T2Au3+!}8Wc{3k3KW331v&bPtp2u68 z>00IaImr`rYcucJw%_I&c`lqEq{stMp2J(4xo~WS^R>#|NaXP-&-ty*T%-J$Mebyx zd3b6$&dHOB(sCD+CkdtH$EdX7}{oa1_H*^&=( zzCj*D^4y~2>BRMllg}zQ=RWgs^N2HYW6x8JYcpp$R}Lzg`1C?h{PBaa#*MS`C%A28X5WSkK7gHiO2P#nU9;hf|1{1 zMjk}+JmuQVcQB7BBfqxfnaj1Aca@*Iv8B1>hj8SFaO8(DPkc(A>s*`pxA~dOQ=w}! z?J7@(uFae`lgE^qd!(5kvza@nnLDUFd%8Bae|c)%1>RMXkDARzPU%5c>tRE4R_}7V{QpcsT*0Fnm2PkQ|d<6X3v`` zVI@zoYMx?UoB2TLQCXWUZ{~B(Sx z?SFBPHoC_s>dYNyqWJp&ljiXUG=SSKJLgB%sEbZnsjaE3g&dBJWZ;3 znpATR%7~H_5jo!l-~Bv$xi<5zau1xRE7xY^&0ILACFd+c&GVD132NTl z{+er)`)AL$JEsV0&I8n(2)H)4Jm>t|Ji_J)NzD_InkOVR&p@uteCcw=aZVo8JYA`I zy7KrBPUOveR=M}ivzMA@FV|*H*UnE)p03o$Jwu*yT$?%9J9o&DJLJe6Ma~M;$UShL zW7OD^(S7@EKIhy&$Cj*~pN%|isCn8@BR?B4w#~e&+PY?L(Q{@nzIbo=9Yq=_S|w;pXd5C&-H1}8Pt~Be#^HzClRjA{M#Ix<(a^>sd+P> zWPVigOyJ4CO#gN>cN8u640&Qub3$SIhf0r|c_7FWgPJD>*JiFlzP~MhDUg!~HP0ey zo(Wu=mNz|b=C31i-`?}>?)i4-X+wRVnt1&DY(Dls+-5%uc z$L51AcQkFUCqDgt4TcZ$JbV7T40{miWK!;OMB2N!=c=r#rF>sGr0vx=r@gmzX>v}r zKS=q>+SBpHBK>}yPCA_RwB0OM#$}|_UqkYP{8sq?w|8$%b{yB4_wW5FbmuG4o$HjD zy0axYMln7pBzY!06B7)G1Z9j#kU@fD>;3d^HBaa&P}KgjNx{zocn4x(cde|dtf#W7 ze|ww;<48NF_NwM-b8U^&=Gt24%{~9LyfnA!^wZ-m&OhBQyUy#}?Y=t?#<8nUF9%zl z|NibgTH_e()1foZ|K2V;`?#gNbaxt?%TOK7o_-tP`0(>;+hxxlAAUZE_IcxdoR5@s zJ`X(38=rM|*)?%)e$Uh39@F`0XQbt6`|b~=(~&;cEC2pHSo4)LnhtIdA>Q^LcKU!HCP|oKLhoj-AV^YUg9`a^UCXz`oztr{9=5pY>zf`Mf=*(DS2@ zdES+yF3qpY$Da=czsw@Xt>Afzp6BlUI_X|60r!XE-DP{uZ<$A?0j`|JI{JP@6MyS&pY$7?RVz`Z=R2nI{x~6R-ONO?DN&{xI;f53Arv) z@@0zkbL)|H-c^?=^gI;D4gYyZ9mnDMrJVDe6Ly&s_xU)seLk!1FF&WV?zlRiM#Im8 zcTCymSCh^g?|C_H&zF<)NFE{meDS(mjB=fGiqFgZUFZ3CU8d7@zEU5zt*0M%IpQxN zVZCyMokC66WwJZ(mgDa8Je}luSp9O{&Ld)-fBt!@4PTe@Jx)XE*Gt{qc`CnNYHG~O z^mo2)-=7qhU+29XhUdfRxX(N9s+^bk?z}z634Qqu-SfxK^K_DV-gRE*>F)l@c>4G2 zdZj*}gU2oXd3dM45{pi|cVEuRc|I%BFIS6m;yotY({b=ZcmDL_cKtksT()M!W$WsE-Mk)V_p|+ZO4ygdyWDt1 zT&_Il&=7SV?E4$D)AraokH&G%pQo_8+{K=!#9^18cidH9?=#O@znbv|_J92)L#RZerl`AV9XJEi+=J#9)AnCNWR1%m^)+P9yUX^+`=hNUmyI)?^KhIdlp>Y-h&O>`1_{$31IqRmMcT>grjQ7jrc^!s~^Za%{ zai4jF^M!3JyUd4|b8Mb>(=}Y)-_o7VlFNeU{S0H5-E!Kkck4R_{(cecGy|Mxw|NN# zm%Go)3e)APbbkrEyt83qLjBK@)_W1YADJR98Ki=HnWm)T}q#@Q~X z*SK8Q?7BqcmwVmI8q2usw#%YNTy~?KFPoPo5I-MuvCcax_GPN8bMsc`xhLwfKy#Uz z?{7%Y_2;w`ak(t{d2nOcD|5|hFvBl{e_8vyZqH>M#4nfK`^)zEhV-&_TbG@ES@61C zIPNY}$htg(IRCJ?{`KYdG}dLtxGcKlIRE(jJH89?az2#qE~|0tx{z|2*yp}nD9%%L z%DOe&Ura63x;)BiN36o-LG7qH}3MlYMeK2 zT=L`Pc0c^ObaGjZ$n&(dt~UeMd(!iv6PLTboR@j#vKs4`#gY5V+4-*Pd?=6WYU_0# zS(m%SJG&g}=Slw>p!0faRbKAAF6$weXDas(0#4&`cfQ2g-8nqmFHC;dUHJW}eO{=y z%U%4w{Q2^1*e{Pw?p|4VJKs%R77Z@z!F72=cS%Z>ugp{D2i50&6_>l(SDv|?kG)Zs zv~u_Av~`|_;+0?+mw$gY-OitXf1i8}X6Iw|^5h_{*Pt^1a9MKQak;I&#J*SJ%jF_+ zStE^0?0n_9yZhxn`f}!8Zu74RHzn(fLvI`H%CmzP(<3W^`U6InR36C)M}q z=e&G>xdzzfL>$*i{rqQ-x*QvqS@W`P8rR#1IPadzCF}mt@M*Lz53R!1D|z9J>|K|r z$17p>{MzvOpI=__xL?CQuY{ft#X2wK+2#6ue?NGB_Ir03jLT!LOIo;Jb~wM5a9*q{ zzbx2aUX!@L%^%;jvoG6o|Jud*@xx^W>@uz0ubP}++qf)iTvk;w&IhOE^^WusHqRe- zd6sfnfxmxHb-rc1J`XrwGREcomWWGoy<8&ZdCIQ5TryU@a^zj!(!5N2d0BC~Jg2|3 zH!iz!UtXt~m&c6zylY;0PL-EKb;o61xvUsn+h6BXA>(|(wsD=c?=FYx`B1FOJYsj} zYush!I4+OM@292nQt-L^%fj;Yp7)x)@-i#?b&pca(%zQL|$)N&!)Olff zUDnzC^2W`+tmRy0ow~g2aKAQo-MsT8d&%|JTh7ZHndgnY&%x(iab66%ti9wVI@x8R zYg}G0ipyyjmzVhRnneBj2I%!3>Ux8B-oES7&)sE~xIA{eyohy~!{YLWPTpOfh~(w1 zxyx}i&Qr*^z7QJM=SP=Yq`16=cKLg^%i_SgKH|N;6qv6hvvX|Um*9B00Nq_;{^fy@ zk8{i)m)BDAx{i46_U>{|b6JJEe+TD61fHYR`J_DG9gcl@LD4RE1ed3r>)h$}9hCc1 z^t7k_d=f>R&kDc1Vt#qGVDIl*Rk$7%m$lS37%H2QLfH z*9*tx8Ao0&tM{kS8GO3V4{?bh=fr!zh;cqgE{}9BFU;JZQ>P1EUe;PK?_$2PwtIQg z>wZD*w0-A&bvfh4Wy$0|vR%-k%TK#530+^ZxP+K-dADkwL)QJG%K5Or+$!4TJ-5p` z!u@6b{PQmF5uOL@`kLJR3e$_cUZu#xJiP+U4ddE;nD-6}7tD4cV(N zc$`;1&y)NsvF-XM;krIkIZsR1)mpzq_WOq@=bdx8Wr)kFOI|Kwmy~;X|HLmT*Dlwv z>&t<5UEAL0h5lEr*_ZcY;&SNUF9e)#wJ!^P*NNU-H7`=dH`H zCDpzJhU*&6`AoV0wV?AXe7QzkXZ_2v!R39q%NmSb-l%=~`(>ZK_wlD+e?Q|FU+Mhm zv-e(pN$gjD{>eZ7@^Am?-M{_2-~RJI{LAlt`R*@&^P7ME%~v(gKmPQiCm%lh|GxOv z$#>SiyZ>6=Id&a)aNjsCUwjGV&%XH8+k3F({;THig|;t#H~xMD?!P4di_({04gBTy z{D5B%eE;8fe?{Q4ljFYOr@#Nb-#z*Lc$({jFOQnb*MEMoYyadIIv;Un#c;pvtD^7! z*V9kF*(cvUIQI4TPygg6|MtgEo__ktlTV*~^x$iUpS=J6yDyKKcOM>pUk&2@H;RA# z4W(cI_vQCW|Bt`@hkyCy<=A}n-#+;BPk-|7_g~EY;fD|Y-z^9Am;Zh8@n`RT_~iFb zKYZmgUfrDIGym=5_uqZ*$%h|4__EXQ_`c~i@BHOZeep%Tf4Dyle)7Nn&o}??%dh-> z_uXgDpS^hSHR0dAU+3MOKlt49zWo#X_s{KCpZ4dr_1nG-{;TgTKkt!mEBs>LJ^SeC z^Dn;i`tldY&wKj(4=?`hXFnOQ{LPmm^y$a{@!2bX^{2mW{_8El-#P#L&p&+f>2E&% zkIQF$`91MJyZ!UN{0E=?&HuamoBJL=kfVpfAih;508UK?qdHFyZF72Km7co=YP4q{PQn< z-nX~=SLYw_?z?~c5C8D)FaP0}|M<&)`rU6IoTuM46km4rv;Y3|kAH6eAOGXAU4L=c0!HDBbnxR2!f)>{Gk>o2=bwM{$sgbO;KPrfym+9i zJYo`uc@mIh5)&KbAUp?lk@bHW0zq~N~>8*`-j>~iGI=1%Zc3!sdtNiw-FY@uu zu6rQ;iGxk%Kr11vtB%X_p{?jyAO%SY4Nk4 zEPMDZw=dIn&G(moaNHf@~p;QW!_dV%`({P9)xc>3ALpMUz^)6Xua$)7%H@hSU@FYix&^VLs!@L~6N z2uE9M?^yQ12lrq0aql%hdHN*YTgQFJ@8g4qSCVA6cgeRXqKc=|*>c{NF2bS#1efVt|_s7_cmu!1u zpWktn|MnsHvZ-URfA#$Rr~e5KoTH&H&-E`h{a4RlJpIGdPaku1d=-Ju>GwO6@8ieh zVF>=N@p%aZ;zx?JO{`I&2{a=6n&;Rs`BNhMiKmGQ5@3-GQ{m(~@(`!1c{f1%s=;@Q^Uyao- ze)F$?_b>nOcfb4PKmPjX-+QS3{{FsAaIE5nvHI-ukKPW|Pk)lP99B>M^X*Xm^e1)8 zP(6El0v>-gZyBnWlhtpYet6tY{=cXH?emYGz5ndRAK$p~S~rZ>%L(g^AFh4N;q}d= z^~Mj^xaIcY<;?YVyl%OLcrbna&u7oy|LrGFPYdpE$VlB1-d|2!Z}33f6yLv@vfki< zx+S~6oU7j8fx0ERzZ|4rJp1(cq0i21+;75t^SC9pf3?2w;){nGzy9>;XPGxke zlRc!qjO|~YW3N0^{q84+%QA0!3+&H4flT}IDd59TdF3;%&q%-ffnd5^!9<|mw)y4f#ll< zl5ZbKzI`AmGhS~WNWOg_`SyY2f0^-m`#|#T1IZtFpx!=^ym6r3K9Ia|px!=^yltG` zK9KwiJdoUFHs|9XNZ$W8WB3pJKyv!s-S>DPIqz((AM$~u%=4HZ<$>Cd_mJn&&t)F@ zAnm{UGnhB}Vawa6Dr`{RK0f)LpYVe`K6(3`079&VRd{|8_b5zv6QK!{4}hyPW^kpnPz; z)Vy8L|1Y$l|EOK_c1izDS<-*hVbXR{|52mTc3J;XqtbR^|52mTdujhsL(_V3|4}2; zdU^kGk+Ssy|Kog9T3#srX z)>!>#^ zzQ1Y9sJF!2H*Fd9rl|XdEu-F&bKkUO)LT;S8?}sjOTc~emQlY9xO^G){srTm+a9}& z`sMr0F@D%(R2yR?e`nh6mdD#=)VIs1e|{PD+xFWVy^Q+EY3;_fShZ!;A7NB37h1h# z)E{9?u2)*YWmLIQ`HL*0%8kljWEoX%RQ__ysB%N|7h6V^8=1e@GOFCj{Kb}0=yCMfcw1`3_YvX53pe9vFZm!7U~_Z zuvvJ)aPO>$w+n`E7YzU0j!`HWD`26#aKKbK2UtAU{zR!@fT{`?hMyBr~ z;tw)BZ5I=NkkM(osQ817PTR%BA7pfTFEaihL)3b)@dp{9){Blm$OyGweEdO1sOci) z50egiE=K+^W7Bg{@`o9lo{N(|%-FPCr2Juq=9b9(@#il-`TWJZr+2!3wcDmP+|7l^{!{e3nX(rBXgiCCE}KpQRFHsg%!B z39?kmXQ>2PD&@0Of-IHtSt>!6D)}r`AWM~emMV~?N!$WkSrr3z%JlFw2FvQ)`u zsRCK5yzvEH&k`)C5^- z%4ewwvecB%QWIpUDW9b#$Wl{2OHGiarhJx~AWKd8EHy!vTJl+Hfh@J;v(y4vYRPA* z1+vtV&r%CysU@GK7RXXdK1(f-rIvh_S|Cd;`7E_SmRj;zYJn`ZUc)Rxau z8)T_*Cm*I>ws2Fw@^R{A3pe#EAE;ioa8tkXk?Lg&H}xwYs$RBmQ@`@D>SYTz^(!B& zUbb*kzw*)QWeYd;D<7_2ws2Fw^6~0r3%B$;^z+qCi2FDF>gTJQVD@kN)z4QqVe8-Y ztDmoK0@T0hS3h4N6}ZtLA>;#52gO-GVAH-1~`jGk{lKRl6)CaNDhd!o0h^9XDIrTw2^`Q@{48 z>Vv52L!VV2#8n^qu=*gf`p~D<2eH+MKCV89u0Hg6^+9~~p%1JNBCHR6Vto)}edr_W zgDC4mpIINoSs(h)`XJK!(5Kc1vDSw^wmyiqKJ>ZuLA>>$53Ua)t`B{3eGqef=%ed{ zsOv+YT_40x1a)L!VzC#9tr!0Q(>U`_L!Y2Qk=(KEghT z!anpF_CXx>p%1YSBC!vBihU4^eduHCgJ|qSpJN}yV;}k;`ye9w&?ngkG1-Sc%07t7 zKJ;1kL0tBs53>&{=;Q2z=UK8Vsj z^qKZSoc5s)wGSe-4}GeA5UYLYW9@@z?L(hyAH-`P`e6GYV*Ai1+Xpe*hd$aqh}u5% z+4e!)_Ms2A4P2X=f;jcGLIU zjri@R@3$NA+fCnZH{!ROzTa-dZ#R9v-H6|A`hL3+zuolxb|Zef>HF zP2X=f;jcGLIUjri@R@3$NA+fCnZ zH{!ROzTa-dZ#R9v-H6|A`hL3+zuolxb|Zef>HFHFP2X=f;jcGLIUjri@R@3$NA+fCnZH{!ROzTa-d zZ#R9v-H6|A`hL3+zuolxb|Zef>-+6a{C3y(+nxCBuJ5-y@!MVBZ+GIiyT0G<#BX$CMUEgna;-+6a{C3y(+nxCBuJ5-y@!MVBZ+GIiyT0G<#BX$CMUEgna;-+65 zTezuT{d`6IcGvgYo%rpp@3%Yg+g;ypcjC9ZzTfV|Z+Csa-HG4s`hL3;zuooyb|-$j z>-+6a{C3y(+nxCBuJ5-y@!MVBZ+GIiyT0G<#BX$CMUEgna;i}CeZM`3-yZsYdl0`p^!@fAetYQq?LqwZ(D&Pe`0b(Zw+Hdt zL*H)?;+e6=P z58}6nzTY0iZx4OHJ&4~P`hI&5zdiK*_8@+H==<$K{PxiI+k^P+q3^c`@!Lb+Zx7

i}CeZM`3-yZsYdl0`p^!@fAetYQq?LqwZ(D&Pe`0b(Zw+HdtL*H)?;+e6=Pf7!xK{p#l{;+e6=P58}6nzTY0i zZx4OHJ&4~P`hI&5zdiK*_8@+H==<$K{PxiI+k^P+q3^c`@!Lb+Zx7i}CeZM`3 z-yZsYdl0`p^!@fAetYQq?LqwZ(D&Pe`0b(Zw+HdtL*H)?;ig|U{PxuM+mrb1sqeQZ@!M11Z%^X4r@r5w#BWc1zdecHp89@!62Cq5 z{q`h&d+Ph`N&NQI_uG^B?Wym#C-K`;-)~Rix2L|}p2TlYeZM`4-=6w@dlJ7r_5JoF zetYWs?MeLh)c4zy`0c6hwig|U z{PxuM+mrb1sqeQZ@!M11Z%^X4r@r5w#BWc1zdecHp89@!62Cq5{q`h&d+Ph`N&NQI z_uG^B?Wym#C-K`;-)~Rix2L|}p2TlYeZM`4-=6w@dlJ7r_5JoFetYWs?MeLh)c4zy z`0c6hwig|U{PxuM+mrb1 zsqeQZ@!M11Z%^X4r@r5w#BWc1zdecHp89@!62Cq5{q`h&d+Ph`N&NQI_uG^B?Wym# zC-K`;-)~Rix2L|}p2TlYeZM`4-=6w@dlJ7r_5JoFetYWs?MeLh()Zho`0b_dw-@o- zOW$uV;xO@3$B6+e_bX zFXFeCzTaNNZ!dkny@=ml`hI&6zrFPR_9A|J>HFF@!Lz^Z!hAv zm%iU##BVQszrBdxUiyA}5x>3k{q`b$d+Gb_Mf~>C_uGs3?WOOx7xCLm-)}GCx0k-( zUc_%NeZRek-(LEDdlA3A^!@fCetYTr?M3|d()Zho`0b_dw-@o-OW$uV;xO@3$B6+e_bXFXFeCzTaNNZ!dkn zy@=ml`hI&6zrFPR_9A|J>HFF@!Lz^Z!hAvm%iU##BVQszrBdx zUiyA}5x>3k{q`b$d+Gb_Mf~>C_uGs3?WOOx7xCLm-)}GCx0k-(Uc_%NeZRek-(LED zdlA3A^!@fCetYTr?M3|d()Zh6ws2Fw`uU3Z?WOOx7xCLm-)}GCx0k-(Uc_%NeZRek z-(LEDdlA3A^!@fCetYTr?M3|d()Zho`0b_dw-@o-OW$uV;xO@3$B6+e_bXFXFeCzTaNNZ!dkny@=ml`hI&6 zzrFPR_9A|J>HFF@!Lz^Z!hAvm%iU##BVQszrBdxUiyA}5x>3k z{q`b$d+Gb_Mf~>C_uGs3?WOOx7xCLm-)}GCw@-b)eGig}J`0Z2QZ=b|(pZb3L zB!2tU_uD7&+o!(YK8fEx_5Joq{PwBuw@>1?Pkq0A62E=w`|XqX?Ni@xpTuvU`hNQ) ze*4t-+b8kcr@r4liQhi;{q{-x_Nni;PvW;veZPGYzkTZa?UVTJQ{Qi&#BZPae)}YT z`_%W_C-K{-zTZBH-#+#I_DTHqsqeQ>;p`y_t* z)c4yb@!O}q-#&@oKK1?fN&NPy@3&9lw@-b)eGig}J`0Z2QZ=b|(pZb3LB!2tU z_uD7&+o!(YK8fEx_5Joq{PwBuw@>1?Pkq0A62E=w`|XqX?Ni@xpTuvU`hNQ)e*4t- z+b8kcr@r4liQhi;{q{-x_Nni;PvW;veZPGYzkTZa?UVTJQ{Qi&#BZPae)}YT`_%W_ zC-K{-zTZBH-#+#I_DTHqsqeQ>;p`y_t*)c4yb z@!O}q-#&@oKK1?fN&NPy@3&9lw@-b){bdU`^{bz+h~GZ-{q{-x_Nni;PvW;veZPGY zzkTZa?UVTJQ{Qi&#BZPae)}YT`_%W_C-K{-zTZBH-#+#I_DTHqsqeQ>;p`y_t*)c4yb@!O}q-#&@oKK1?fN&NPy@3&9lw@-b)eGig}J`0Z2QZ=b|(pZb3LB!2tU_uD7&+o!(YK8fEx_5Joq{PwBuw@>1?Pkq0A62E=w z`|XqX?Ni@xpTuvU`hNQ)e*4t-+b8kcr@r4liQhi;{q{-x_Nni;PvW;PeZPGXzkTWZ z?Th&BOW$u_#BX2ve)}SR`_lK@7xCMdzTdux-@f$y_C@^mrSG>d;wJ-)~>UZ(sU;`yzh(()ZgJ@!OZa-@b_7zV!X}Mf~=q@3$}Fw=aFaeG$KX>HF=A z`0Y#IZ(qc3U;2LgB7XbQ_uCio+n2uIzKGwx^!@fl{Pv~qw=d$iFMYp#5x;%u`|XSP z?MvTpU&L=;`hNQ&e*4n*+ZXZMm%iV=h~K{S{q{xt_NDK)FXFc^eZPGXzkTWZ?Th&B zOW$u_#BX2ve)}SR`_lK@7x4j}bzzFzm%iV=h~K{S{q{xt_NDK)FXFc^eZPGXzkTWZ z?Th&BOW$u_#BX2ve)}SR`_lK@7xCMdzTdux-@f$y_C@^mrSG>d;wJ-)~>UZ(sU;`yzh(()ZgJ@!OZa-@b_7zV!X}Mf~=q@3$}Fw=aFaeG$KX>HF=A z`0Y#IZ(qc3U;2LgB7XbQ_uCio+n2uIzKGwx^!@fl{Pv~qw=d$iFMYrLWeYd;tDmok z-@f$y_C@^mrSG>d;wJ-)~>UZ(sU;`yzh(()ZgJ@!OZa-@b_7 zzV!X}Mf~=q@3$}Fw=aFaeG$KX>HF=A`0Y#IZ(qc3U;2LgB7XbQ_uCio+n2uIzKGwx z^!@fl{Pv~qw=d$iFMYp#5x;%u`|XSP?MvTpU&L=;`hNQ&e*4n*+ZXZMm%iV=h~K{S z{q{xt_NDK)FXFc^eZPGXzkTWZ?Th&BOW$u_#BX2ve)}SR`_lK@7xCMdzTdux-@f$y z_C@^mrSG>d;-+7S`0ZQYZ{Ngk-}-+0CVu-+7S`0ZQYZ{Ngk-}-+0CVu-+7S`0ZQYZ{Ngk-}-+0CVup>5IRvMXzxdLgUm&|_oKQ&<|5+zQQaVO z5%c}1ZjiZ%`hHY5$XvvIKdKvKE+W4l)eSNivERp?KK}lohub=CkhzHeepENeTtt9B zsvBf3V!$8O4Kf!|;E(DCnTt5^M|FeDMI`v6xIRvM2=PaCgUm&Y_;II?#6RfawvHQQF5<)=)eSNik>Zc)2APXk@ke!o%tf^Lqq;%n zB3}Ga-5_%jG5)A-khzE%e^fWfTttmOsvBf3;>M3VeK`I>54UyPAafBr{-|z{xriQr zR5!?6#E(C!8)Pmb$RE`WG8ZxAkLm`QizxC(b%V@B9QmWVLFOWo{88N?a}i5^-05TT z4|=$*;|7_Fc=AVegUm%l`J=i)<|3y2QQaVO5mo-EZjiZ%D}Pis$XrC0KdKvKE@I0c z)eSNi(dCco2APZa^5ae)n19g2Z5=nrT*R0^svBf3qRb!F4Kf#T=8x(InTtsCM|FeD zMXdRwxpFgS_WG>>*AJq*q7ZK=>>IRvM81zSVgUm$~`lGr*=0XnrXyDKf z4*ft5{b=CO59H8~1`ho|4*h80&=2I$j|L9?Ko0$A;Ls1`(2oWV{Xh==XyDKf z4*ft5{b=CO59H8~1`ho|4*h80&=2I$j|L9?Ko0$A;Ls1`(2oWV{Xh==XyDKf z4*ft5{b=CO59H8~1`ho|4*h80&=2I$j|L9?Ko0$A;Ls1`(2oWV{Xh==XyDKf z4*ft5{b=CO59H8~1`ho|4*h80&=2I$j|L9?Ko0$A;Ls1`(2oWV{Xh==XyDKf z4*ft5{b=CO59H8~1`ho|4*h80&=2I$j|L9?Ko0$A;Ls1`(2oWV{Xh==XyDKf z4*ft5{b=CO59H8~1`ho|4*h80&=2I$j|L9?Ko0$A;Ls1`(2oWV{Xh==XyDKf z4*ft5{b=CO59H8~1`ho|4*h80&=2I$j|L9?Ko0$A;Ls1`(2oWV{Xh==XyDKf z4*ft5{b=CO59H8~1`ho|4*h80&=2I$j|L9?Ko0$A;Ls1`(2oWV{Xh==XyDMl z?BTYK8)Poz(2oWV{Xh==XyDKf4*ft5{b=CO59H8~1`ho|4*h80&=2I$j|L9? zKo0$A;Ls1`(2oWV{Xh==XyDKf4*ft5{b=CO59H8~1`ho|4*h80&=2I$j|L9? zKo0$A;Ls1`(2oWV{Xh==XyDKf4*ft5{b=CO59H8~1`ho|4*h80&=2I$j|L9? zKo0$A;Ls1`(2oWV{Xh==XyDKf4*ft5{b=CO59H8~1`ho|4*h80&=2I$j|L9? zKo0$A;Ls1`(2oWV{Xh==XyDKfM2-t${;t$f36e4!t3V-WoXch8%ip z;Lsa#=&gZ6Z^)sy1`fR;hu#`E^oAUIYv9lua_FsrLvP5Tw+0TqA&1@?IP``bdTZd& z8*=EafkSV|p|=JOy&;F*8aVWZ9C~Zu&>M2-t${;t$f36e4!t3V-WoXch8%ip;Lsa# z=&gZ6Z^)sy1`fR;hu#`E^oAUIYv9lua_FsrLvP5Tw+0TqA&1@?IP``bdTZd&8*=Ea zfkSV|p|=JOy&;F*8aVWZ9C~Zu&>M2-y@5mT$f5TJ4!t9X-WxddjvRV#;LtmA=)HkM z@5rI|1`fR=hu#}F^o|^QZ{W~7a_GH*L+{9;_XZBVBZuA_IP{JjdT-#+J96l~fkW@e zq4x$3y(5R-8#wfi9C~ly&^vPIy@5mT$f5TJ4!t9X-WxddjvRV#;LtmA=)HkM@5rI| z1`fR=hu#}F^o|^QZ{W~7a_GH*L+{9;_XZBVBZuA_IP{JjdT-#+J96l~fkW@eq4x$3 zy(5R-8#wfi9C~ly&^vPIy@5mT$f5TJ4!t9X-WxddjvRV#;LtmA=)HkM@5rI|1`fR= zhu#}F^o|^QZ{W~7a_GH*L+{9;_XZBVBZuA_IP{JjdT-#+J96l~fkW@eq4x$3y(5R- z8#wfi9C~ly&^vPIy@5mT$f5TJ4!t9X-WxddjvRV#;LtmA=)HkM@5rI|1`fR=hu#}F z^o|^QZ{W~7a_GH*L+{9;_XZBVBZuA_IP{JjdT-#+J96l~fkW@eq4x$3y(5R-8#wfi z9C~ly&^vPIy@5mjvWMF`ZjiZCB8NU3IP{4e z`fT9PCvxbsfkU6jq0a^meIkcG8#wfd9QthF&?j=}vw=gO$f3^$4t*krJ{vgni5&WD z;Ls;>=(B-CpU9!l1`d59hdvuP^obn$Y~au*a_F;xL!Zc@&jt>CB8NU3IP{4e`fT9P zCvxbsfkU6jq0a^meIkcG8#wfd9QthF&?j=}vw=gO$f3^$4t*krJ{vgni5&WD;Ls;> z=(B-CpU9!l1`d59hdvuP^obn$Y~au*a_F;xL!Zc@&jt>CB8NU3IP{4e`fT9PCvxbs zfkU6jq0a^meIkcG8#wfd9QthF&?j=}vw=gO$f3^$4t*krJ{vgni5&WD;Ls;>=(B-C zpU9!l1`d59hdvuP^obn$Y~au*a_F;xL!Zc@&jt>CB8NU3IP{4e`fT9PCvxbsfkU6j zq0a^meIkcG8#wfd9QthF&?j=}vw=gO$f3^$4t*krJ{vgni5&WD;Ls;>=(B-CpU9!l z1`d59hdvuP^obn$Y~awp?BTYK8)Poz&}RdOK9NJ84IKJJ4t+Lo=o2~g*}$PsN1BX75L!S*C`a}+WHgM<@IrQ1Up-<${X9I^m zkwc#i9Qs5KeKv6D6FKzRz@bm%&}RdOK9NJ84IKJJ4t+Lo=o2~g*}$PsN1BX75L!S*C`a}+WHgM<@IrQ1Up-<${X9I^mkwc#i z9Qs5KeKv6D6FKzRz@bm%&}RdOK9NJ84IKJJ4t+In=nFaY)xe=IkV9V$9Qr~I zeKm0C3pw=Fz@abX&{qS8zK}y-4IKJH4t+In=nFaY)xe=IkV9V$9Qr~IeKm0C z3pw=Fz@abX&{qS8zK}y-4IKJH4t+In=nFaY)xe=IkV9V$9Qr~IeKm0C3pw=F zz@abX&{qS8zK}y-4IKJH4t+In=nFaY)xe=IkV9V$9Qr~IeKm0C3pw=Fz@abX z&{qS8zK}y-4IKJH4t+In=wJ45TgMGD7jo#UfkR)&p|1uGeIbXw8aVWY9QtbD&=+#( ztARsb$f2(W4t*hqz8X06g&g{7;LsOx=&ON4U&x`a1`d58hrSv(^o1PyYT(cpa_Fmp zLtn_DuLcf%A&0&iIP`@a`fA|N7jo#UfkR)&p|1uGeIbXw8aVWY9QtbD&=+#(tARsb z$f2(W4t*hqz8X06g&g{7;LsOx=&ON4U&x`a1`d58hrSv(^o1PyYT(cpa_FmpLtn_D zuLcf%A&0&iIP`@a`fA|N7jo#UfkR)&p|1uGeIbW_HgM=Ca_DCRhkhc5el~FECvxa# z1BZShhkiD2=qGaMX9I_RB8Pr9aOfv;=w}0mej9D7jo!V1BZSghkiA1=ofP6R|AKBA%}i7aOf9u=vM=Wej$f` zHE`$`a_CnBhkhZ4el>9D7jo!V1BZSghkiA1=ofP6R|AKBA%}i7aOf9u=vM=Wej$f` zHE`$`a_CnBhkhZ4el>9D7jo!V1BZSghkiA1=ofP6R|AKBA%}i7aOf9u=vM=Wej$f` zHE`$`a_CnBhkhZ4el>9D7jo!V1BZSghkiA1=ofP6R|AKBA%}i7aOf9u=vM=Wej$f` zHE`$`a_CnBhkhZ4el>9D7jo!V1BZSghkiA1=ofP6R|AKBA%}i7aOf9u=vM=Wej$f` zHE`$`a_CnBhkhZ4el>9D7jo!V1BZSghkiA1=ofP6R|AKBA%}i7aOf9u=vM=Wej$f` zHE`$`a_CnBhkhZ4el>9D7jo!V1BZSghkiA1=ofP6R|AKBA%}i7aOf9u=vM=Wej$f` zHE`$`a_CnBhkhZ4el>9D7jo!V1BZSghkiA1=ofP6R|AKBA%}i7aOf9u=vM=Wej$f` zHE`$`a_CnBhyG;`w{_egb0LR*HE`$`a_CnBhkhZ4el>9D7jo!V1BZSghkiA1=ofP6 zR|AKBA%}i7aOf9u=vM=Wej$f`HE`$`a_CnBhkhZ4el>9D7jo!V1BZSghkiA1=ofP6 zR|AKBA%}i7aOf9u=vM=Wej$f`HE`$`a_CnBhkhZ4el>9D7jo!V1BZSghkiA1=ofP6 zR|AKBA%}i7aOf9u=vM=Wej$f`HE`$`a_CnBhkhZ4el>9D7jo!V1BZSghkiA1=ofP6 zR|AKBA%}i7aOf9u=vM=Wej$f`HE`$`a_CnBhkhZ4em8LFH*)BA1BZSihkiG3=r?lc zcLRrhBZq!BaOgL3=ywB$ej|r|H*n}Ta_Dyhhkhf6em8LFH*)BA1BZSihkiG3=r?lc zcLRrhBZq!BaOgL3=ywB$ej|r|H*n}Ta_Dyhhkhf6em8LFH*)BA1BZSihkiG3=r?lc zcLRrhBZq!BaOgL3=ywB$ej|r|H*n}Ta_Dyhhkhf6em8LFH*)BA1BZSihkiG3=r?lc zcLRrhBZq!BaOgL3=ywB$ej|r|H*n}Ta_Dyhhkhf6em8LFH*)BA1BZSihkiG3=r?lc zcLRrhBZq!BaOgL3=ywB$ej|r|H*n}Ta_Dyhhkhf6em8LFH*)BA1BZSihkiG3=r?lc zcLRrhBZq!BaOgL3=ywB$ej|r|H*n}Ta_Dyhhkhf6em8LFH*)BA1BZSihkiG3=r?lc zcLRrhBZq!BaOgL3=ywB$ej|r|H*n}Ta_Dyhhkhf6em8LFH*)BA1BZSihkiG3=r?lc zcLRrhBZq!BaOgL3=ywB$ej|r|H*n}Ta_Dyhhkhf6em8LFU-ocY#|<(Ua_Dyhhkhf6 zem8LFH*)BA1BZSihkiG3=r?lccLRrhBZq!BaOgL3=ywB$ej|r|H*n}Ta_Dyhhkhf6 zem8LFH*)BA1BZSihkiG3=r?lccLRrhBZq!BaOgL3=ywB$ej|r|H*n}Ta_Dyhhkhf6 zem8LFH*)BA1BZSihkiG3=r?lccLRrhBZq!BaOgL3=ywB$ej|r|H*n}Ta_Dyhhkhf6 zem8LFH*)BA1BZSihkiG3=r?lccLRrhBZq!BaOgL3=ywB$ej|r|H*n}Ta_Dyhhkhf6 zem8LFH*x6gPT!$_(8FyV%V(|!J>1r@eCB%4!)+bQXRZf5+}5#t=6cY>Z5_*Jt_MBb z*0Fr%deFmd9m{8~2R+=@v3%xw(8FyV%V(|!J>1l>zC-_@hub=CkhzFMe^fWfT*RS2 zsvBf3;?N(}4Kf#T=#T0KnTt5|M|FeDMI8F0xz(eG8b{^kLm`Qi#YU0 zb%V@B9QvcWLFOV3z1`_M^bdNtt>Xrni#YU0b%V@B9QvcWLFOV3{ZZW@a}kIBsBVzC zh(mu=H^^MXp+Bk{WG>>+AJq*q7jfv1>IRvMIP`X>@6bQ!;kJ$&WG>>+AJq*q7jfv1 z>IRvMIP^z#gUm%7`lGr*<{}RLQQaVO5r_V$ZjiZ%Lw{5^$XvvsKdKvKF5=MJoxVf= zpoiN!ZjiZ%Lw{5^$XvvsKdKvKF5=K1)eSNiap;fg2APXE^hb4r%tajfqq;%nA`bmg z-5_%jhyJK;khzFMZ+H3*{evEE>$pMYA`bmg-5_%jhyJK;khzFMe^fWfT*RS2svBf3 z;?N(}4Kf#T=#T0KnTt5|M|FeDMI8F0xz(eG8b{^kLm`Qi#YUlr|-}| z=;5}G8)Pox&>z(eG8b{^kLm`Qi#YU0b%V@B9QvcWLFOV3{ZZW@a}kIBsBVzCh(mu= zH^^MXp+Bk{WG>>++nv5c|DcE4I&P4;h(mu=H^^MXp+Bk{WG>>+AJq*q7jfv1>IRvM zIP^z#gUm%7`lGr*<{}RLQQaVO5r_V$ZjiZjn13C1gfkQu#Lq8fg^aDBcqk%&|kV8KjIP?QK^rL}8 zKafK|8aVU=IrO7}LqCv1KN>jn13C1gfkQu#Lq8fg^aDBcqk%&|kV8KjIP?QK^rL}8 zKafK|8aVU=IrO7}LqCv1KN>jn13C1gfkQu#Lq8fg^aDBcqk%&|kV8KjIP?QK^rL}8 zKafK|8aVU=IrO7}LqCv1KN>jn13C1gfkQu#Lq8fg^aDBcqk%&|kV8KjIP?QK^rL}8 zKafK|8aVU=IrO7}LqCv1KN>jn13C1gfkQu#Lq8fg^aDBcqk%&|kV8KjIP?QK^rL}8 zKafK|8aVU=IrO7}LqCv1KN>jn13C1gfkQu#Lq8fg^aDBcqk%&|kV8KjIP?QK^rL}8 zKafK|8aVU=IrO7}LqCv1KN>jn13C1gfkQu#Lq8fg^aDBcqk%&|kV8KjIP?QK^rL}8 zKafK|8aVU=IrO7}LqCv1KN>jn13C1gfkQu#Lq8fg^e=n3t>Xrn3pwjn13C1gfkQu#Lq8fg z^aDBcqk%&|kV8KjIP?QK^rL}8KafK|8aVU=IrO7}LqCv1KN>jn13C1gfkQu#Lq8fg z^aDBcqk%&|kV8KjIP?QK^rL}8KafK|8aVU=IrO7}LqCv1KN>jn13C1gfkQu#Lq8fg z^aDBcqk%&|kV8KjIP?QK^rL}8KafK|8aVU=IrO7}LqCv1KN>jn13C1gfkQu#Lq8fg z^aDBc*1(}RM2-t${;t$f36e4!t3V-WoXch8%ip;Lsa#=&gZ6Z^)sy z1`fR;hu#`E^oAUIYv9lua_FsrLvP5Tw+0TqA&1@?IP``bdTZd&8*=EafkSV|p|=JO zy&;F*8aVWZ9C~Zu&>M2-t${;t$f36e4!t3V-WoXch8%ip;Lsa#=&gZ6Z^)sy1`fR; zhu#`E^oAUIYv9lua_FsrLvP5Tw+0TqA&1@?IP``bdTZd&8*=EafkSV|p|=JOy&;F* z8aVWZ9C~ly&^vPIy@5mT$f5TJ4!t9X-WxddjvRV#;LtmA=)HkM@5rI|1`fR=hu#}F z^o|^QZ{W~7a_GH*L+{9;_XZBVBZuA_IP{JjdT-#+J96l~fkW@eq4x$3y(5R-8#wfi z9C~ly&^vPIy@5mT$f5TJ4!t9X-WxddjvRV#;LtmA=)HkM@5rI|1`fR=hu#}F^o|^Q zZ{W~7a_GH*L+{9;_XZBVBZuA_IP{JjdT-#+J96l~fkW@eq4x$3y(5R-8#wfi9C~ly z&^vPIy@5mT$f5TJ4!t9X-WxddjvRV#;LtmA=)HkM@5rI|1`fR=hu#}F^o|^QZ{W~7 za_GH*L+{9;_XZBVBZuA_IP{JjdT-#+J96l~fkW@eq4x$3y(5R-8#wfi9C~ly&^vPI zy@5mT$f5TJ4!t9X-WxddjvRV#;LtmA=)HkM@5rI|1`fR=hu#}F^o|^QZ{W~7a_GH* zL+{9;_XZBVBZuA_IP{JjdT-#+J96l~fkW@eq4x$3y(5R-8#wfi9C~ly(7){AwvHQQ zF67XA1Bc#`L+=e7dPfeuH*n}3IrQGZp?Boadjp5wkwfnd9C}9%y*F^^9Xa&gz@c~K z(0c=i-jPG^4IFw$4!t*U=p8xq-oT-ECB8NU3IP{4e`fT9PCvxbsfkU6j zq0a^meIkcG8#wfd9QthF&?j=}vw=gO$f3^$4t*krJ{vgni5&WD;Ls;>=(B-CpU9!l z1`d59hdvuP^obn$Y~au*a_F;xL!Zc@&jt>CB8NU3IP{4e`fT9PCvxbsfkU6jq0a^m zeIkcG8#wfd9QthF&?j=}vw=gO$f3^$4t*krJ{vgni5&WD;Ls;>=(B-CpU9!l1`d59 zhdvuP^obn$Y~au*a_F;xL!Zc@&jt>CB8NU3IP{4e`fT9PCvxbsfkU6jq0a^meIkcG z8#wfd9QthF&?j=}vw=gO$f3^$4t*krJ{vgni5&WD;Ls;>=(B-CpU9!l1`d59hdvuP z^obn$Y~au*a_F;xL!Zc@&jt>CB8NU3IP{4e`fT9PCvxbsfkU6jq0a^meIkcG8#wfd z9QthF&?j=}vw=gO$f3^$4t*krJ{vgni5&WD;Ls;>=(B-CpU9!l1`d59hdvuP^e=n3 zt>Xrn3pw=Jz@bm%&}RdOK9NJ84IKJJ4t+Lo=o2~g*}$PsN1BX75L!S*C`a}+WHgM<@IrQ1Up-<${X9I^mkwc#i9Qs5KeKv6D z6FKzRz@bm%&}RdOK9NJ84IKJJ4t+Lo=o2~g*}$PsN1BX75L!S*C`a}+WHgM<@IrQ1Up-<${X9I^mkwc#i9Qs5KeKv6D6FKzR zz@bm%&}RdOK9NIT4IKJH4t+In=nFaY)xe=IkV9V$9Qr~IeKm0C3pw=Fz@abX z&{qS8zK}y-4IKJH4t+In=nFaY)xe=IkV9V$9Qr~IeKm0C3pw=Fz@abX&{qS8 zzK}y-4IKJH4t+In=nFaY)xe=IkV9V$9Qr~IeKm0C3pw=Fz@abX&{qS8zK}y- z4IKJH4t+In=nFaY)xe=IkV9V$9Qr~IeKm0C3pw=Fz@abX&{qS8zK}y-4IKKH zJ>1rDgUp2-`fA|N7jo#UfkR)&p|1uGeIbXw8aVWY9QtbD&=+#(tARsb$f2(W4t*hq zz8X06g&g{7;LsOx=&ON4U&x`a1`d58hrSv(^o1PyYT(cpa_FmpLtn_DuLcf%A&0&i zIP`@a`fA|N7jo#UfkR)&p|1uGeIbXw8aVWY9QtbD(0|>-mz{m|`G+r_z5B_hAHVd$@781wFn&w2PMuYAUregFD5zxu~t{q9%)7oYrH|19JFJFvfM>BEoz@P}v5|M0V){O|ws#h0= zv+q1)-hcMd)90T(`}q08|9O1&)8~J9@ozu-$v!=i|9U@s`tg5!cKY9c`r!H=hu80( z{QmO~pM3h8kN@N0@%7%n9Y5n|w|~}`|KPK~`G0qR^WMkLUp#sC{OPCf{NK+$e*WW} z6URn>_?_p&`~J~8FE`|qC(qx1^4UATI6m|5KRw3u)f43Fz4|?O+;Qg2Z`pJ2ef;6) zA3gu^cizvx_&ML_wEy;QUm4CXCYJ9zt)L(M<-oo8@WmPro>( zy?_3v-@e{a`Q6k1eDR}=$So=Qqo+@ve>E(>_|3om-M{?9-~H~F|M=^lzur;#`%~9H z^sw9#wLkm(qc?0+Zi(X`J^9Z!Y*cQE>>oXQ)6Sq9Imx_9BXi4I?}Lfve?EKu{%=2d`t<$I zV{#K6{BnwUGsfhmsQb<2@@9<5Ejjn)wDM+*$t@}O<#_z!*{8>+Kl`y3Vg75}B!X4x z*XkEPu1l~I{aVf9$8`yw_`g=C_;Fo=r~I$gCVpI(;K}}L^@tzWC3%|vI-lh~ZpDc_ z!GE34?;qDCd20VUpV>dIOY)@tbv~zmT$kkO{Of!+|F|y66M5O}!}MU;F3H=Dve^eI z!Lr#0$-uJNhiJgB)dGIpb0v^{UaSB6xGq8Vd9CK}WS`e+^**jkkbPdO&-=J8LH2pAChz0k9s$|s zwK}|y>k?$2*J|%Ru1k=8UaPnJxGq8Vd9B9okQILIJtB3ozEk?$2*J|B9u1k=8UaN2WxGq8Vd99}He4>0OOSnDt3~^`Ek?$2*XqnZu1k=8 zUaKwpxGq8Vd99x8k?$2*J`{zu1k=8UaRZ+xGq8Vd99Y~ zW?6YOF&j#6N%VwVqvd@;y zJ{x48Et`Ed$Ua*(`)rVXwruv|9R*F(1QrjLzjRa#DE^U2=pKd z^w4FX2XUZ>E(AS@1U+;q=s_&#p^HHeqCpQ`4tfv|dgy}CgNV>WmxLa~gdVym^dKtq z&}E?qaiNDU3_XYpJ#=a4L2T%ui$f2hLl0dZdJrFa=mOD$2+>2Ah#tg<9=b^MAWHPm zWugahqK7UNJ%|)NbgAe;tmvVOMGvAy4_z*L5HEV@g3*JB(Lk0ux@hzuYV^=$ zqX%)Lhb|mFh#Wn1>F7c1=%I^8528m8T|RmcKYHi_(t`-nLzj>q#E>4ki1Z+e^w4Fb z2XUl_E+jpOBt3K~=|L>%p^HfmqDc>3PI?eedgy}EgNV{Ymy{mFlpeaM^dPGA&}F3u zaixbYEIo)UJ#=a5L2T)vi%SopOAlRMdJtcF=mOJ&2-8ECm>$HK9=gc%Aj9>km;y6E&E>h#cMrw4JT zhb}xlh&(-X>FGi2>7k2H528;GU4D8He|qQw)Po4rLzkc)#GoF!2=yQe_0VOg2XUx} zE<`Om~(p^H%uqEQcBj(QM}dgy}GgNW2am!uxVq#n8`^&l$s&}FFyajAzc zOg)H9J#=a6L2T-wi&GDxQx9F9dJv!5bbV?gKDFuk)JA-2)Agy1_|&HBQycNAP1mP3 z;!~ThPi@4fHeH|Eh)-?0KD7~_+H`$tBR;k1`qV~zYSZr)%?sZH0XHsVv8 zu1{^mr#4-m+K5kWx<0iLpW1YNY9l_i>H5@0d}`D6sg3y5rt4E1@u^MMr#9kKo32l7 z#HTi0pW28|ZMr_S5ue(0eQF~r)%?sZH0XHsVv8u1{^mr#4-m+K5kW zx<0iLpW1YNY9l_i>H5@0d}`D6sg3y5rt4E1@u^MMr#9kKo32l7#HTi0pW28|ZMr_S z5ue(0eQF~r)%?sZH0XHsVv8u1{^mr#4-m+K5kWx<0iLpW1YNY9l_i z>H5@0d}`D6sg3y5rt4E1@u^MMr#9kKo32l7#HTi0pW28|ZMr_S5ue(0eQF~Pwm8~c3q#^iBIjiKD85{+I4+uCqA|7`qWN*YS;Ct zo%qzQ>r*@Nsa@BncH&dJu21d6r*>VR+KEr?x<0iNpW1bOY9~Im>-yAAd}`PAsh#-L zuIp1f@u^+cr*`5~yRJ{|#HV&$pW2B}?Ycg-6QA03eQGB@wd?xSPJC+D^{Jis)UNAO zJMpPq*Qa*kQ@gHD?Zl^cU7y;CPwl!swG*G(b$x0lKDF!m)J}YA*Y&BL_|&fJQ#Pwm8~c3q#^iBIjiKD85{+I4+uCqA|7`qWN*YS;Cto%qzQ>r*@Nsa@Bn zcH&dJu21d6r*>VR+KEr?x<0iNpW1bOY9~Im>-yAAd}`PAsh#-LuIp1f@u^+cr*`5~ zyRJ{|#HV&$pW2B}?Ycg-6QA03eQGB@wd?xSPJC+D^{Jis)UNAOJMpPq*Qa*kQ@gHD z?Zl^cU7y;CPwl!swG*G(b$x0lKDF!m)J}YA*Y&BL_|&fJQ#Pwm8~ zc3q#^iBIjiKD85{+I4+uCqA|7`qWN*YS;Cto%qzQ>r*@Nsa@BncH&dJu21d6r*>VR z+KEr?x<0iNpW1bOY9~Im>-yAAd}`PAsh#-LuIp1f@u^+cr*`5~yRJ{|#HV&$pW2B} z?Ycg-6QA03eQGB@wd?xSPJC+D^{Jis)UNAOJMpPq*Qa*kQ@gHD?Zl^cU7y;CPwl!s zwG*G(b$x0lKDF!m)J}YA*Y&BL_|&fJQ#Pwm8~c3q#^iBIjiKD85{ zI&^*NAU<{I`qV*u>d^J6gZR{;>r)5usYBPN4&qaXu1_7rrw(18I*3mlx;}LfpE`7X z>L5OK==#(_eCp8kse|~`q3crz@u@@Crw-y%hptZ@#HS8jpE`(79lAbs5T819ed-`S zb?ExkL44}a^{IpS)S>HB2l1&x*QXBRQ-`ik9mJ;&U7tFLPaV2Gbr7FAbbaa|K6U8& z)Ioge(DkW<_|&26QwQ;>L)WJc;!}sNPaVXk4qcx*h)*55K6MbEI&^*NAU<{I`qV*u z>d^J6gZR{;>r)5usYBPN4&qaXu1_7rrw(18I*3mlx;}LfpE`7X>L5OK==#(_eCp8k zse|~`q3crz@u@@Crw-y%hptZ@#HS8jpE`(79lAbs5T819ed-`Sb?ExkL44}a^{IpS z)S>HB2l1&x*QXBRQ-`ik9mJ;&U7tFLPaV2Gbr7FAbbaa|K6U8&)Ioge(DkW<_|&26 zQwQ;>L)WJc;!}sNPaVXk4qcx*h)*55K6MbEI&^*NAU<{I`qV*u>d^J6gZR{;>r)5u zsYBPN4&qaXu1_7rrw(18I*3mlx;}LfpE`7X>L5OK==#(_eCp8kse|~`q3crz@u@@C zrw-y%hptZ@#HS8jpE`(79lAbs5T819ed-`Sb?ExkL44}a^{IpS)S>HB2l1&x*QXBR zQ-`ik9mJ;&U7tFLPaV2Gbr7FAbbaa|K6U8&)Ioge(DkW<_|&26QwQ;>L)WJc;!}sN zPaVXk4qcx*h)*55K6MbEI&^*NAU<{I`qV*u>d^J6gZR{;>r)5usZ-adPU2Iiu1}rB zr%qj;I*Cu6x;}LhpE`AY>LfmO>iX14eCpKosgwBBsq0fG@u^eSr%vKir>;+(#HUVO zpE`+8ow`1C5}!JCed;7Wb?W-mNqp+m^{JEi)T!%JC-JFM*QZY6Q>U&^oy4b3U7tFM zPo26xbrPRCb$#k2K6UE))Jc5m)b**8_|&QEQz!AMQ`e_X;!~%tPo2c4PFOBtCWO`qW8$>eThAllauB>r*H3sZ-adPU2Iiu1}rBr%qj;I*Cu6x;}Lh zpE`AY>LfmO>iX14eCpKosgwBBsq0fG@u^eSr%vKir>;+(#HUVOpE`+8ow`1C5}!JC zed;7Wb?W-mNqp+m^{JEi)T!%JC-JFM*QZY6Q>U&^oy4b3U7tFMPo26xbrPRCb$#k2 zK6UE))Jc5m)b**8_|&QEQz!AMQ`e_X;!~%tPo2c4PFOBtCWO z`qW8$>eThAllauB>r*H3sZ-adPU2Iiu1}rBr%qj;I*Cu6x;}LhpE`AY>LfmO>iX14 zeCpKosgwBBsq0fG@u^eSr%vKir>;+(#HUVOpE`+8ow`1C5}!JCed;7Wb?W-mNqp+m z^{JEi)T!%JC-JFM*QZY6Q>U&^oy4b3U7tFMPo26xbrPRCb$#k2K6UE))Jc5m)b**8 z_|&QEQz!AMQ`e_X;!~%tPo2c4PFOBtCWO`qW8$>eThAllauB z>r*H3sZ-adPU2Iiu1}rBr%qj;I*Cu6x;}LhpSpB?>LNaM>H5?~eCpEmsf+m3rR!4{ z@u^GKr!L}Cm#$A;#HTJ@pSp-oUAjJX5udtred;1Ub?N%lMSSYg^{I>a)TQfF7xAe} z*QYMxQeBV8i}=)~>r)r;sY}LNaM>H5?~eCpEmsf+m3rR!4{@u^GKr!L}Cm#$A; z#HTJ@pSp-oUAjJX5udtred;1Ub?N%lMSSYg^{I>a)TQfF7xAe}*QYMxQeBV8i}=)~>r)r;sY}LNaM>H5?~eCpEmsf+m3rR!4{@u^GKr!L}Cm#$A;#HTJ@pSp-oUAjJX z5udtred;1Ub?N%lMSSYg^{I>a)TQfF7xAe}*QYMxQeBV8i}=)~>r)r;sY}LNaM z>H5?~eCpEmsf+m3rR!4{@u{b-Pd$lGJ#~HRNqp+5>r+qSQ%_x=dJ>;{>iX1^_|#L^ zr=G;8p1MBuBtG@j^{FTEsi&?_J&8{}b$#kdeCny|Q%~YkPhFpS5}$hN`qY#7)Kk}| zp2Vk~x<2(JKK0b~sVDKNr>;*uiBCOsedZ$8fPvTQgU7vaqpL**0)RXwsQ`e`S z#HXIRKJ_F%_0;vLC-JGLu1`IQPd#;g>PdX+sq0fu;!{supL!CXdg}VrllatA*QcJu zr=Ge#^&~#^)b*(+@u{b-Pd$lGJ#~HRNqp+5>r+qSQ%_x=dJ>;{>iX1^_|#L^r=G;8 zp1MBuBtG@j^{FTEsi&?_J&8{}b$#kdeCny|Q%~YkPhFpS5}$hN`qY#7)Kk}|p2Vk~ zx<2(JKK0b~sVDKNr>;*uiBCOsedZ$8fPvTQgU7vaqpL**0)RXwsQ`e`S#HXIR zKJ_F%_0;vLC-JGLu1`IQPd#;g>PdX+sq0fu;!{supL!CXdg}VrllatA*QcJur=Ge# z^&~#^)b*(+@u{b-Pd$lGJ#~HRNqp+5>r+qSQ%_x=dJ>;{>iX1^_|#L^r=G;8p1MBu zBtG@j^{FTEsi&?_J&8{}b$#kdeCny|Q%~YkPhFpS5}$hN`qY#7)Kk}|p2Vk~x<2(J zKK0b~sVDKNr>;*uiBCOsedZ$8fPvTQgU7vaqpL**0)RXwsQ`e`S#HXIRKJ_F% z_0;vLC-JGLu1`IQPd#;g>PdX+sq0fu;!{supL!CXdg}VrllatA*QcJur=Ge#^&~#^ z)b*(+@u{b-Pd$lGJ#~HRNqp+5>r+qSQ%_x=dJ>;{>H5@*_|!|+r(VRTUb;T@B0lxf z^{E%}sh6%#y@*e}bbabYeCnm^Q!nCEFI}H{5ubYL`qYc~)JxZ=Uc{$fx<2(HKK0V| zsTc97m#$B}h)=zAedZR*bFXB@#U7vappL*%~)QkAkOV_7f#HU`mKJ_9#_0sjJ z7xAf=u1~#)PrY<~>P39&rR!5K;!`hOpL!9Wdg=Pqi}=(_*QZ{@r(U`~^&&p?()Fnq z@u`=tPrZmwy>xx*MSSX|>r*e{Q!ibgdJ&&`>H5@*_|!|+r(VRTUb;T@B0lxf^{E%} zsh6%#y@*e}bbabYeCnm^Q!nCEFI}H{5ubYL`qYc~)JxZ=Uc{$fx<2(HKK0V|sTc97 zm#$B}h)=zAedZR*bFXB@#U7vappL*%~)QkAkOV_7f#HU`mKJ_9#_0sjJ7xAf= zu1~#)PrY<~>P39&rR!5K;!`hOpL!9Wdg=Pqi}=(_*QZ{@r(U`~^&&p?()Fnq@u`=t zPrZmwy>xx*MSSX|>r*e{Q!ibgdJ&&`>H5@*_|!|+r(VRTUb;T@B0lxf^{E%}sh6%# zy@*e}bbabYeCnm^Q!nCEFI}H{5ubYL`qYc~)JxZ=Uc{$fx<2(HKK0V|sTc97m#$B} zh)=zAedZR*bFXB@#U7vappL*%~)QkAkOV_7f#HU`mKJ_9#_0sjJ7xAf=u1~#) zPrY<~>P39&rR!5K;!`hOpL!9Wdg=Pqi}=(_*QZ{@r(U`~^&&p?()Fnq@u`=tPrZmw zy>xx*MSSX|>r*e{Q!ibgdJ&&`>H5@*_|!|+r(VRTUb;T@B0lxf^{E%}skg3Ay@^k~ zb$#kheCn<1Q*Yu^Z(W~y6Q6qP`qZ2F)LYl5-o&Tgx<2(LKK0i1sWaFWjZ{kyLU7varpL*;1)SLL!Ti2)F#HZf6KJ_L(_15*NH}R>ru1~#*PrY@0 z>P>v=t?N^7;!|&3pL!FYdh7bsoA}gQ*QegZr{20g^(H>`*7d13@u|12PrZpxy>)%+ zO?>LD>r-#yQ*T|LdJ~^|>-yB2_|#k1r{2V;-nu^ZCO-An^{F@Uskg3Ay@^k~b$#kh zeCn<1Q*Yu^Z(W~y6Q6qP`qZ2F)LYl5-o&Tgx<2(LKK0i1sWaFWjZ{kyLU7varpL*;1)SLL!Ti2)F#HZf6KJ_L(_15*NH}R>ru1~#*PrY@0>P>v= zt?N^7;!|&3pL!FYdh7bsoA}gQ*QegZr{20g^(H>`*7d13@u|12PrZpxy>)%+O?>LD z>r-#yQ*T|LdJ~^|>-yB2_|#k1r{2V;-nu^ZCO-An^{F@Uskg3Ay@^k~b$#kheCn<1 zQ*Yu^Z(W~y6Q6qP`qZ2F)LYl5-o&Tgx<2(LKK0i1sWaFWj zZ{kyLU7varpL*;1)SLL!Ti2)F#HZf6KJ_L(_15*NH}R>ru1~#*PrY@0>P>v=t?N^7 z;!|&3pL!FYdh7bsoA}gQ*QegZr{20g^(H>`*7d13@u|12PrZpxy>)%+O?>LD>r-#y zQ*T|LdJ~^|>-yB2_|#k1r{2V;-nu^ZCO-An^{F@Uskg3Ay@^k~b$#kheCn<1Q*Yu^ zZ(W~y6Q6qA=>pXcw*9tFWh2ppZNIHk*--Rg+i&YsHWoeD_S-s@4Mq>P{kBeJqtSzH zzpYc*aP(l?Z|hVx9zEFh+d7pENDsFCwoYXu(t~ZksZ(9F`oXr})~RqtB5w6jEu4{v zTzymvXCz`*AJxJciRjfwwQxove)Ul;oRNrNeN+o)Bw|<})xsHxDAq@{a7H4I^|;f8 ztRHOqZJi2dBw|?~)xsHxXx2xya7H4Y^-(RHk%(x0R10S$Vp<>7!WoIE)AJxJciD=hHwQxov-u1ZC1+O1$`)!>HXCz`?AJxJciKy2{ zwQxov?)6bEoRNrpeN+o)Bw}A5)xsHx=+{TJa7H5j^-(RHk%)kOR10S$VqlLuT?G5V zw%^vNa7H2y_E9aIk%)wSR10S$VqqWE!WoHZ*hjT+Mj{^eQ7xR2h=_eu3uh!^VjtDQ z8HuRaN40Q9A};p0(}l4gZ2N7U3TGr@V;|MR8HwoFN40Q9B0ly}Eu4{vkbP7OXCz`| zAJxJci744ewQxovPWDkPoRNr>eN+o)Bw}TcJ6$aM!M5Mlsc=RjUiMKfoRNr_eN+o) zBw}VC)xsHxsM$xga7H3-_E9aIk%*jqR10S$VrL)K!WoI^*+;c-Mk0RpxYGr+A8h+= zoeF0pVrU=L!WoGu+DEl;Mk0>(Q7xR2h@^c~3uh!^X&=?X8Hs4xN40Q9BA)h9Eu4{v zsC`rmXCz{3k2_sd`@y!~)~RqtBChsPEu4{vtbJ4qXCz{4AJxJciRju#wQxovzV=Zq zoRNsIeN+o)Bw}nI)xsHxDBDN1a7IGT_7HZq2XeNDu(Lgovps~J?SY)_A?$1q9j>}(I@Y!6{) zdmv|f2s_&YIom_n*&fK*9>UJ{K+g6McD4s{wui8@J&?0Kgq`hyob4g(Y!BpY4`F9} zAZL3BJKF;}+e6sd9?01q!p`9j>}(I@Y!6{)dmv|f z2s_&YIom_n*&fK*9>UJ{K+g6McD4s{wui8@J&?0Kgq`hyob4g(Y!BpY4`F9}AZL3B zJKF;}+e6sd9?01q!p`9j>}(I@Y!6{)dmv|f2s_&Y zIom_n*&fK*9>UJ{K+g6McD4s{wui8@J&?0Kgq`hyob4g(Y!BpY4`F9}AZL3BJKF;} z+e6sd9?01q!p`}(rywoTaCHsoxZu(NH***0Nk+mN$u!p^oK zXWN9GZ9~qs2|L?{oNW_!whcMkChTk*a<)y_**4^Co3OKO$k{ewXWNjoZNkpBA!pl! zooz$Twh24ihMa8^cD4;U+a~O68*;Wy*x5GZY@4vNZOGX+VQ1Trvu(o8wjpQRgq>|e z&bA3V+lHKN6Lz)@Iol@eY#Va6P1xBs}(rywoTaCHsoxZu(NH***0Nk+mN$u!p^oKXWN9GZ9~qs2|L?{ zoNW_!whcMkChTk*a<)y_**4^Co3OKO$k{ewXWNjoZNkpBA!pl!ooz$Twh24ihMa8^ zcD4;U+a~O68*;Wy*x5GZY@4vNZOGX+VQ1Trvu(o8wjpQRgq>|e&bA3V+lHKN6Lz)@ zIol@eY#Va6P1xBs}(rywoTaCHsoxZu(NH***0Nk+mN$u!p^oKXWN9GZ9~qs2|L?{oNW_!whcMkChTk* za<)y_**4^Co3OKO$k{ewXWNjoZNkpBA!pl!ooz$Twh24ihMa8^cD4;U+a~O68*;Wy z*x5GZY@4vNZOGX+VQ1Trvu(o8wjpQRgq>|e&bA3V+lHKN6Lz)@Iol@eY#Va6P1xBs z})%7wq4lScI0fk zu(R#R*>+)P+mW;F!p^oMXWNCHZAZ?w3p?A6oNX6&wjDX!F6?YOa<*OA*>>b?yRftE z$k}#bXWNmp?ZVEsBWK%%ooz?XwhKGkj+|{5cD5Zk+b-;EJ94&N*x7dEY`d_t?a0}7 zVQ1Tsv+cspwj*cTg`I6j&bA9X+m4)V7k0KCIomGmY&&wcUD(-n&bA|G+l8HNN6xkjJKK(&Z5MX79XZ=B>})%7wq4lScI0fku(R#R*>+)P+mW;F z!p^oMXWNCHZAZ?w3p?A6oNX6&wjDX!F6?YOa<*OA*>>b?yRftE$k}#bXWNmp?ZVEs zBWK%%ooz?XwhKGkj+|{5cD5Zk+b-;EJ94&N*x7dEY`d_t?a0}7VQ1Tsv+cspwj*cT zg`I6j&bA9X+m4)V7k0KCIomGmY&&wcUD(-n&bA|G+l8HN zN6xkjJKK(&Z5MX79XZ=B>})%7wq4lScI0fku(R#R*>+)P+mW;F!p^oMXWNCHZAZ?w z3p?A6oNX6&wjDX!F6?YOa<*OA*>>b?yRftE$k}#bXWNmp?ZVEsBWK%%ooz?XwhKGk zj+|{5cD5Zk+b-;EJ94&N*x7dEY`d_t?a0}7VQ1Tsv+cspwj*cTg`I6j&bA9X+m4)V z7k0KCIomGmY&&wcUD(-n&bA|G+l8HNN6xkjJKK(&Z5MX7 z9XZ=B>})%7wq4lScI0e_u(KV=*$!c6JCL&-!p?ReXFG(Q?Lf|U2s_(>ob3>HwgWla zA?$1ia<)U**$(7vhp@99$k`5IXFHIy9m39bAZI&-o$Wx*b_hG$ft>9ScD4gK+ac_1 z2XeMU*x3%`Y=^M39mv@ZVP`v#vmL_Db|7avgq`g`&UOep+ku?z5O%f$Iol!ZYzK0- zL)h64K+bjuJKKSr?GSdh13B9v>}&^ewnNz2 z4&-cyu(KV=*$!c6JCL&-!p?ReXFG(Q?Lf|U2s_(>ob3>HwgWlaA?$1ia<)U**$(7v zhp@99$k`5IXFHIy9m39bAZI&-o$Wx*b_hG$ft>9ScD4gK+ac_12XeMU*x3%`Y=^M3 z9mv@ZVP`v#vmL_Db|7avgq`g`&UOep+ku?z5O%f$Iol!ZYzK0-L)h64K+bjuJKKSr?GSdh13B9v>}&^ewnNz24&-cyu(KV=*$!c6 zJCL&-!p?ReXFG(Q?Lf|U2s_(>ob3>HwgWlaA?$1ia<)U**$(7vhp@99$k`5IXFHIy z9m39bAZI&-o$Wx*b_hG$ft>9ScD4gK+ac_12XeMU*x3%`Y=^M39mv@ZVP`v#vmL_D zb|7avgq`g`&UOep+ku?z5O%f$Iol!ZYzK0-L)h64K+bjuJKKSr?GSdh13B9v>}&^ewnNz24&-cyu(KV=*$!c6JCL&-!p?ReXFG(Q z?Lf|U3On10ob42Lwi7wqDeP<~a<)_0*-qqar?9h~$k|R|XFHLzox;v`B4;~=o$W-< zb_zS&iJa{ecD55a+bQg9Cvvt^*x63xY^SiZoygfvVP`v$vz@}ub|Pmxg`Mq0&UOkr z+lid*6n3@~Iom1hY$tNIQ`p%~})4;wo};IPULK-u(O@W*-l|+JCU=U!p?RgXFG+R?L^LY3On10ob42L zwi7wqDeP<~a<)_0*-qqar?9h~$k|R|XFHLzox;v`B4;~=o$W-})4; zwo};IPULK-u(O@W*-l|+JCU=U!p?RgXFG+R?L^LY3On10ob42Lwi7wqDeP<~a<)_0 z*-qqar?9h~$k|R|XFHLzox;v`B4;~=o$W-})4;wo};IPULK-u(O@W z*-l|+JCU=U!p?RgXFG+R?L^LY3On10ob42Lwi7wqDeP<~a<)_0*-qqam$0*4$k{Go zXSI57jm{s*x4@RY?rXJUC7xkVQ0IL zvt7c@b|GiGgq`g|&UOhq+l8F%5_Yx=Iol=dY!`C2OW4^i}(fuwoBOAF63;Nu(Msr*)CycyO6V8!p?Rf zXS;-*?Ly9W2|L?`ob3{JwhKAiCG2b$a<)s@*)HU4m$0*4$k{GoXSI57jm{s*x4@RY?rXJUC7xkVQ0ILvt7c@b|GiGgq`g| z&UOhq+l8F%5_Yx=Iol=dY!`C2OW4^i}(fuwoBOAF63;Nu(Msr*)CycyO6V8!p?RfXS;-*?Ly9W2|L?` zob3{JwhKAiCG2b$a<)s@*)HU4m$0*4$k{GoXSI57jm{s*x4@RY?rXJUC7xkVQ0ILvt7c@b|GiGgq`g|&UOhq+l8F%5_Yx= zIol=dY!`C2OW4^i}(fuwoBOAF63;Nu(Msr*)CycyO6V8!p?RfXS;-*?Ly9W2|L?`ob3{JwhKAiCG2b$ za<)s@*)HU4m$0*4$k{GoXS}*fuY)@fldm?9h3Om~qIong%*`CPRp2E)dM9%gUcD5&Swx_VOJ(06Lg`Mq*ob4&> zY)|BDPhn?!B4>LFJKGaE+f&%tp2*pr!p`q4Q`p&_$l0F4&h|vk_7rxuCvvu@u(Lgpvpt2K?TMW2DeP=d z}*fu zY)@fldm?9h3Om~qIong%*`CPRp2E)dM9%gUcD5&Swx_VOJ(06Lg`Mq*ob4&>Y)|BD zPhn?!B4>LFJKGaE+f&%tp2*pr!p`q4Q`p&_$l0F4&h|vk_7rxuCvvu@u(Lgpvpt2K?TMW2DeP=d}*fuY)@fl zdm?9h3Om~qIong%*`CPRp2E)dM9%gUcD5&Swx_VOJ(06Lg`Mq*ob4&>Y)|BDPhn?! zB4>LFJKGaE+f&%tp2*pr!p`q4Q`p&_$l0F4&h|vk_7rxuCvvu@u(Lgpvpt2K?TMW2DeP=d})UOY%gJFdm(3g z2|L>hIonIv*^wij}?m$0+Fkh8sno$ZC3?IrANFXU`5VP|_GXL|`d z+Y33{OW4_7$k|@P&h|pi_7Zls7jm|ju(Q38v%Q3!?S-7}CG2c3})UOY%gJFdm(3g2|L>h zIonIv*^wij}?m$0+Fkh8sno$ZC3?IrANFXU`5VP|_GXL|`d+Y33{ zOW4_7$k|@P&h|pi_7Zls7jm|ju(Q38v%Q3!?S-7}CG2c3})UOY%gJFdm(3g2|L>hIonIv z*^wij}?m$0+Fkh8sno$ZC3?IrANFXU`5VP|_GXL|`d+Y33{OW4_7 z$k|@P&h|pi_7Zls7jm|ju(Q38v%Q3!?S-7}CG2c3}+r3Y;R#_dn0Fi3p?8zIon&<+1|+6 z-onoIM$YyYcD6Tiwzsgey^*uMg`Mq}+r3Y;R#_dn0Fi3p?8zIon&<+1|+6-onoI zM$YyYcD6Tiwzsgey^*uMg`Mq}+r3Y;R#_dn0Fi3p?8zIon&<+1|+6-onoIM$YyY zcD6Tiwzsgey^*uMg`Mq}+r1Y}=i#v;AP(Z|hVxBR$ym+d7raNDsFCwoYX;(t~Zk zty9^I^kCa>>r^%)J=pf!I+e{x54QcbPGvLFgKfX9Q`wC4VB2r&R5l|$*!G(`)pfQX zZ2N7U3TGtZY#-Ia8HqUCN40Q9BF^?vEu4{vvwc(xXC&fmAJxJci8$LwwQxov&h}9) zoRNsLeN+o)B;st_ovyR}VB2r&R5&9MXZxrY&Pc@BKB|Q?5^=VVYT=ATob97pI3p2f z`=}PqNW|Gbs)aKWakh_Y;fzF_?W0;aBN1oY?sT2)2ityIr@|SDINL|Ha7H4|_E9aI zk%+T>R10S$;%pz)!WoG;+efua;GZJyOk80tJM4auTS~w#S zXZxrY&Pc@BwmV&C`@y!~)~RqtBF^?vEu4{vvwc(xXC&fmAJxJci8$LwwQxov&h}9) zoRNsLeN+o)B;srz)xsHxINL|Ha7H4|w%zGE+Yh$=woZjJ5^=VVYT=ATob97pI3p2f z`=}PqNW|Gbs)aKWakh_Y;fzF_?W0;aBN1o&s20vh#MwTog)R10S$;%pz)!WoG;+efua;GZJyOk80tJM4auTS~w#S zXZxrY&Pc@BKB|Q?5^=VVYT=AToNc?)b+#XD`)!>HXC&fmAJxJci8$LwwQxov&h}9) zoRNsLeN+o)B;srz)xsHxINL|Ha7H4|_E9aIk%+T>R10S$9j>}(I@Y!6{)dmv|f2s_&YIom_n z*&fK*9>UJ{K+g6McD4s{wui8@J&?0Kgq`hyob4g(Y!BpY4`F9}AZL3BJKF;}+e6sd z9?01q!p`9j>}(I@Y!6{)dmv|f2s_&YIom_n*&fK* z9>UJ{K+g6McD4s{wui8@J&?0Kgq`hyob4g(Y!BpY4`F9}AZL3BJKF;}+e6sd9?01q z!p`9j>}(I@Y!6{)dmv|f2s_&YIom_n*&fK*9>UJ{ zK+g6McD4s{wui8@J&?0Kgq`hyob4g(Y!BpY4`F9}AZL3BJKF;}+e6sd9?01q!p`|e&bA3V+lHKN z6Lz)@Iol@eY#Va6P1xBs}(rywoTaCHsoxZu(NH***0Nk+mN$u!p^oKXWN9GZ9~qs2|L?{oNW_!whcMk zChTk*a<)y_**4^Co3OKO$k{ewXWNjoZNkpBA!pl!ooz$Twh24ihMa8^cD4;U+a~O6 z8*;Wy*x5GZY@4vNZOGX+VQ1Trvu(o8wjpQRgq>|e&bA3V+lHKN6Lz)@Iol@eY#Va6 zP1xBs}(rywoTaC zHsoxZu(NH***0Nk+mN$u!p^oKXWN9GZ9~qs2|L?{oNW_!whcMkChTk*a<)y_**4^C zo3OKO$k{ewXWNjoZNkpBA!pl!ooz$Twh24ihMa8^cD4;U+a~O68*;Wy*x5GZY@4vN zZOGX+VQ1Trvu(o8wjpQRgq>|e&bA3V+lHKN6Lz)@Iol@eY#Va6P1xBs}(rywoTaCHsoxZu(NH***0Nk z+mN$u!p^oKXWN9GZ9~qs2|L?{oNW_!whcMkChTk*a<)y_**4^CyRftE$k}#bXWNmp z?ZVEsBWK%%ooz?XwhKGkj+|{5cD5Zk+b-;EJ94&N*x7dEY`d_t?a0}7VQ1Tsv+csp zwj*cTg`I6j&bA9X+m4)V7k0KCIomGmY&&wcUD(-n&bA|G z+l8HNN6xkjJKK(&Z5MX79XZ=B>})%7wq4lScI0fku(R#R*>+)P+mW;F!p^oMXWNCH zZAZ?w3p?A6oNX6&wjDX!F6?YOa<*OA*>>b?yRftE$k}#bXWNmp?ZVEsBWK%%ooz?X zwhKGkj+|{5cD5Zk+b-;EJ94&N*x7dEY`d_t?a0}7VQ1Tsv+cspwj*cTg`I6j&bA9X z+m4)V7k0KCIomGmY&&wcUD(-n&bA|G+l8HNN6xkjJKK(& zZ5MX79XZ=B>})%7wq4lScI0fku(R#R*>+)P+mW;F!p^oMXWNCHZAZ?w3p?A6oNX6& zwjDX!F6?YOa<*OA*>>b?yRftE$k}#bXWNmp?ZVEsBWK%%ooz?XwhKGkj+|{5cD5Zk z+b-;EJ94&N*x7dEY`d_t?a0}7VQ1Tsv+cspwj*cTg`I6j&bA9X+m4)V7k0KCIomGm zY&&wcUD(-n&bA|G+l8HNN6xkjJKK(&Z5MX79XZ=B>})%7 zwq4lScI0fku(R#R*>+)P+mW;F!p^oMXWNCHZAZ?w3p?A6oNX6&wjDX!F6?YOa<*OA z*>>b?yRftE$k}#bXWNmp?ZVEsBWK%%ooz?Xb_hG$ft>9ScD4gK+ac_12XeMU*x3%` zY=^M39mv@ZVP`v#vmL_Db|7avgq`g`&UOep+ku?z5O%f$Iol!ZYzK0-L)h64K+bjuJKKSr?GSdh13B9v>}&^ewnNz24&-cyu(KV= z*$!c6JCL&-!p?ReXFG(Q?Lf|U2s_(>ob3>HwgWlaA?$1ia<)U**$(7vhp@99$k`5I zXFHIy9m39bAZI&-o$Wx*b_hG$ft>9ScD4gK+ac_12XeMU*x3%`Y=^M39mv@ZVP`v# zvmL_Db|7avgq`g`&UOep+ku?z5O%f$Iol!ZYzK0-L)h64K+bjuJKKSr?GSdh13B9v>}&^ewnNz24&-cyu(KV=*$!c6JCL&-!p?Re zXFG(Q?Lf|U2s_(>ob3>HwgWlaA?$1ia<)U**$(7vhp@99$k`5IXFHIy9m39bAZI&- zo$Wx*b_hG$ft>9ScD4gK+ac_12XeMU*x3%`Y=^M39mv@ZVP`v#vmL_Db|7avgq`g` z&UOep+ku?z5O%f$Iol!ZYzK0-L)h64K+bju zJKKSr?GSdh13B9v>}&^ewnNz24&-cyu(KV=*$!c6JCL&-!p?ReXFG(Q?Lf|U2s_(> zob3>HwgWlaA?$1ia<)U**$(7vhp@99$k`5IXFHIy9m39bAZI&-o$Wx*b_hG$ft>9S zcD4gK+ac_12XeMU*x3%`Y^SiZoygfvVP`v$vz@}ub|Pmxg`Mq0&UOkr+lid*6n3@~ zIom1hY$tNIQ`p%~})4;wo};IPULK-u(O@W*-l|+JCU=U!p?RgXFG+R?L^LY3On10ob42Lwi7wqDeP<~ za<)_0*-qqar?9h~$k|R|XFHLzox;v`B4;~=o$W-})4;wo};IPULK- zu(O@W*-l|+JCU=U!p?RgXFG+R?L^LY3On10ob42Lwi7wqDeP<~a<)_0*-qqar?9h~ z$k|R|XFHLzox;v`B4;~=o$W-})4;wo};IPULK-u(O@W*-l|+JCU=U z!p?RgXFG+R?L^LY3On10ob42Lwi7wqDeP<~a<)_0*-qqar?9h~$k|R|XFHLzox;v` zB4;~=o$W-}(fuwoBOAF63;Nu(Msr*)CycyO6V8!p?RfXS;-*?Ly9W z2|L?`ob3{JwhKAiCG2b$a<)s@*)HU4m$0*4$k{GoXSI57jm{s*x4@RY?rXJUC7xkVQ0ILvt7c@b|GiGgq`g|&UOhq+l8F% z5_Yx=Iol=dY!`C2OW4^i}(fuwoBOAF63;Nu(Msr*)CycyO6V8!p?RfXS;-*?Ly9W2|L?`ob3{JwhKAi zCG2b$a<)s@*)HU4m$0*4$k{GoXSI5 z7jm{s*x4@RY?rXJUC7xkVQ0ILvt7c@b|GiGgq`g|&UOhq+l8F%5_Yx=Iol=dY!`C2 zOW4^i}(fuwoBOA zF63;Nu(Msr*)CycyO6V8!p?RfXS;-*?Ly9W2|L?`ob3{JwhKAiCG2b$a<)s@*)HU4 zm$0*4$k{GoXS zPw&Q0ZT!y0&qn-g)H`0Wm;bN7{<`qR=ijt@|EJ^ge)}&!|M@R}``f?zv!CO4_QmHs z{FGNd{}p^wksO>%ICtcii}v-S*zcAAbJP^B;fD{rroc^L@_w@0!k@fBws2XqQ-?#Jcn>7EvGM2~x_`cK0 z+ll`#IPpJXKRsgFKhkonA7I)K%e(r4r~NWpn@{_7mutSA_TNtXZ_>2?nA!I)IvM`@ z)2Hu0d+$%r%zy4u@XA4R^!&*m-}xdoKX|X|FF1o%<_{5^kC2=GL(iaDdG|eL(Ae|t z?F{;M2K{q8&c0>0Y0sd?0RHOv`%nM#b%xhpfBNjBC!hZDzd!xsckSwD&mXx=@HWW* z7hJ&^-~U2Xw!g`}kGT+)`Q7}1FGTas?fYDaj(-?$FGO!IL~k!dH|)Q+7ozXE8++c< cJmykTX8VV@lw{TSxTpF50S4i~^*Y-r0HaZek^lez diff --git a/common-primitives/pipeline_runs/data_preprocessing.dataset_sample.Common/1.yaml.gz b/common-primitives/pipeline_runs/data_preprocessing.dataset_sample.Common/1.yaml.gz deleted file mode 100644 index 824ea255f6650632805690eca072a694bb38021b..0000000000000000000000000000000000000000 GIT binary patch literal 0 KcmV+b0RR6000031 literal 7503 zcmV-V9kAjbiwFp9jU8S912HanVQp*x>|I@V+qSZO_pi`f+6UUiH{hp}x@ntr)10it z>0P&X-NnTZ&}Ji%x+Imx{q+abhmEb+vMkAtBe4&uMg#_fnT^2^Tcp#fZ0(o!ba4La z!};sCzaC|A8#lIXPLF~?R$SP|T@%9E%*rgzN<3mVce2uVQrd1y{K zKPBv#b7NTe=L^^5b5U32$d>J?Q)tVJqO{RxTQ@~jo(?$6GL{lS4W(&DoFO4o%%+kG zXIPwwTv(RqaBhoLf{QR0ff7aq?xGIj%;rHRsM1;rNsM$&Yn$8JmZ^=l7_YWXHK|kE zbaoF0>3A~x;67Gm)7EiOw$0+J>Gi0&j9oA1vXJumh0J~~u5}Cs!Jv#s_H^*t#c%k- z`yU5qRc(Jb|NX~7p#m+W_&7Lw_i^yA0Xf#;ufP4%cY7Y)hSf!p#=~gr;@d%apywCH zmsbs5nMHnNM^$~b2zy(#uF3qn0dWhA8>&dywf(!QKiN9!;GME8<~of7nGy$Y&23C@Nwb=Lre3(27JW#-ueVHt_o03tCVatf#{6d7 z*T(%m+@BBk6$pQF!k?V*wF!T6!q+Bz3#6}2`r0J-1z(%=wJF~_%I~9~nLh>PTcG_x zwLb;zYtz00?JF?;6pXLU__i5en}KMD0wlObI7oQSH-k_F8UqPhGQcNbg>cXU03iSh z0ze>u1R}s{2r`jii-KzqK%!uQKs;SGf%O7fAP5AaKn%;&KO{cQD{lQ7Qh(;oH58RjDal-M! zK~`jt>s(z25(f!thERe8B?RD;13ozcEd=|4EszH}pp^q!IiQsT2037mGq4)#Bglgs z%OwuuVZj9Wm23i1F9FWBUSsZd52P|=oB_Qa8%*O#e z9I_jS>?R<)2|$(rWC_5MfLtd4J%WH1D5?d8@nGK+fENLKqhMIq!U(W$3dmprN^JpH z5}bkrI zl7PG_-akt|2ohg)LzO*nt$(FAC}x1?ycwG?l@Y;A^O;6;v!5 z*0hE-tzi{ws8Mu?TJV&Bf+RtLmayk&sFO9+$r|cp4f~3QeMJLaG(#;gI%Eb7nLz`V zG*r|Ykfk9{XxN=JG%fb2N^}f z4x|BD8ght%y~BiXz5~cIkWma&EQVl9e>?-|F;KA>s8|eS8v`|p0W2B7l401-kAng9 z7(kDKykr0q2C|KTY-8Y_%RqiH5NQM1#z3|)5O)J{HxPFNJBfkZV*nEd&h!i(-QjT^ zHfh4+4E~&zC6{^CM#!#bG8XIezFP=+=coZ4p zS!9TZk)c`Yi)JjH86yY@)T&ODC@K`O?-E~uj0Hi?f*>ZB&XNgYW9c-RAQ+ZTi3x_A z2vh}W#EIA#kC6!mbm>@m>0Ei~V3{Z!SD~*os?w-RV`Ds)UOJavI&LNyP>NXvPn?|y z6Qoe15n=)=pd^hR|lj zB*mPAhtp2vC<+vp`XU7d_d`ykC^8fkiVIa8Ir}1f;a&X35(SBTmm@sBv>U6xm$}nt zWXog3@7vDrIeB+|L~e@R=QP`=j@VQ8dkhrpL;RjELY~tIk8kz&vj6qc?b7#>&z{rh zhb28n1>&S{@-b@+Y8sF$8j>i6k~ORIeLoc#HmZ21OzT$5hfQG7bMY};{jdU7(dZQQ*# z;JEuItBcRJ%!($hKHK{0xT-HsW*wT7&*9195#6Ysl*fNHRe993aoyhfmH~P!iDpLp z`J!voPNvVEMr-Z$7@7epB1=Fi!2_y$9=cYU0s&Xrre1 z2S>Vga5{ZnxwgZ&b=F*eJ$=mhbPyhkyA*d$EXs>$@lpe{shkXlxAt_QyR5P(jfcaj zNAC8c3kD4tpZRh6y!?e5?{x6L|2LIq58X#mQ5J0!-Pp837cp~PH^E&;JKhNIu@qRv%r|h1Wf_O8i(zttsKAO}8m=g^0VrZQ$CzsX8 zo{S0yg_EDFbkZ%C=Hy*n{bf`4Auc|}CyQ5`oMdbiTnp3pZ*+*h{-%5I|5}RTI=ysL zJMj%YnHqQk<}M^R`YhipyZXi3k8j?*@V?$Ay`@#zI=gMXJQl9Xx%-OL;li14b{72f zKMN6>)HaLLwtL~h{EY_J`kb7$lR6$ovlZ=}cYHJYeA37BAbTBe-js429^d>#r*Vle zZL9iv@!Y*WR)Sf0PsrW)xUT+wL?oZb$z&MU@2kI$))`?F{X@Hrf_W5Pm2G|Xt}4oQ z{XmYpS6ejGIQ~-Kym!J2n726(ZCrP2rfa=T5WQdd-NZ1hhLchG_+dPoj`PrYc3(%) z%QtVLS8rbZ{_5@dhxsmamkVZ-T6)LFI>2!gnl1RL>D_B)E^NIGI^NaAD6X&m%U-R5 zh5KTMKItxnYxw8gFusD>?QmJ0xqbeks=Jr#fYBSMdfB#hkxbf^8FKd?Bx`u^?PQeL zI%;g}HiYI|;_MqaiLT+KU){e=H15Eb9iWTY8ZLU-x-zFdL;&$Myz_o?kZVh<;hNuy zgG7y7!!`b%_2kYxw8=Pp=QrHDsNt`oH7h zr-O9PItB9wXWA!QO%8I`BI{JkKgI3ztT%Qx#}{^EH~liK;`V!V(mM6>zsu>dU^GtK z!*kF&CG&^jWO`hFm>ycEa{i^T!)(6L4$njD6wvRgrsxQO2YBbO4v5yNvu}B=@bh)# z2bVN#)7tDjt}pCXz}|s|dSbuQ@)Tj6-5|`umltg`nhe__n)8cCGb>h-pHlGk$j_yh zUf-j*V<_cRv9ISX@452bbLCU=Q}Q*r^3{46qr0ZY4N*eeL5({H zP1}HT9g*{f;;o@~CHtdOB!rR~#Pi(-S=Fc;djA zoSw2-%N0F0^AK3hZnt37 zqmVOAw=Y9~CEu3Ef2yjXjZ0{;^@6>>z`Soud9WeV&VNjo%!(nLjrP14-Q~#nyPY>g zP;K2}E<4`Otv&2DSA0juN%Jt+J%}naj7qU1s?h1;JD>_F4?}s^L$W7L|Ipgq1 z!)-R4o-{mY($Fwr$Ofcgw-v2cOw^pS*d1wTnB0&wTt&UjyE^Q7Zoj{qt?R0esxm4o z*L^%$qKkGw-SyE#h8=9 zj_I7rw92L&(6~rH0Rb_$;^O7o-|~}-m-GK7cUUxM;2^i4hqlb1%LBT_*Cn5&*Vc}1 z2-T;g`Y7k2yz4!$52X4!U2M(AFIO)+dZVYO`g*Fbr}}!T@3~fgYt_r;WHi1yp6+Op zZ|bx?B>i^cm%XF^cH)(NA_8~fkiDSb9S~`{YIu2WmJsD6NiB-=bW^*ohtC>&ELPft#cEPx^si6 zi7N@<;>*{Dku?-I<6+T0STkLr`EOfXT((hO$LaLXGJa+HiYb?o%gDv34ag!zYAnMv^DRxC&8_neo{%oq-Q8Xt#b=_0f z_XUpK61JYX-V5fszh&4wd^Ri6+l+bY7wz^j+*re7-}X^&gWuq;<wmd>;ke^>rst zs}(cJ>8|hkjG4c)|Es&c&3@GRn7<|DTnV$If~?b}!(9|)Mvxu<9s9l@+sA^8-V(OH zAnOaVgWk-avLM@ZM|WF=)@%Ok9Cmbh&yL=mxeBtujvfR@UE;D|oaXaaAI@LD{q?Bx zb7NblBXF6hi}2~dm^_sRbRaq!mM z25uc?ZgHhdQTLr7oXmW7D<(YP4fp+kRX#3Chx zxyjNnO|6YnsWVF{qlrzql`3?L-iDctO=4tDtjL@>d2VSYa+_#t70uH5I%Ha9oY6Qi z+OUATwG$-7gn>&S)>vjtmgGmyBcF?E(nJeGx*xBd&NcAZP?HN1BswF3ND`+(n>3&) zr`owe85=qcl`G;#{sagWii+6Ac|yW0v?9@pF=2dkH_y7L+8^0xb-+_+FymGYUSV!uKwffHnyuD3CPR zsbK^SI*osgg&ZWmB?G@O0@o-739d2F34UR)7MKV@0?VX7BL<6u0SXw*mKpyV2b6QL zlY^a{36Nkla{>~Kz@Z<+*43yWK|jGj0$acnTtPB{mBV4JaO2w|AWsMqg2X|>mcEw+ zL|TA91p}SnB>{dBkoE+iTu2yQ!Uz&ts{gciCQEMHKp4K~DdsjOSp-00IVR)eWXCDH za)>We)G}qoW6LGShvezQMH2B-98w~|8CA1&Yf;4VV{7Vb5MaUR*D&6-K+8{4!yK+* zj?^&vHS`w^<6T4V(J(7&sJo6KhXiuakS4UPhM7-W$PZdtJIKNLIhZH{%fHV+FE9eH zX|VkI83}2^0%@SX7^%R5G(k@TdK#E94a^e;Mu*XG4OdiHa}4NdV8%2Se+fCjjAEgm zSr}Co^t8~IEUXTY3v^=9jq1(lyESkIG9l!j5uc?O;}}|)nGx%EP88^ zMiZ|Jbml;t%*1=4=VvC~3mRl5CY4}PAW>#wsSG9sJw!wnuflvI1WYlRi5@9>)&l0@ z%!FbHiT6sp7qzOnutH)|FvLX7rxh`lIYyrvbru?bjIONESPXP+Yu;?lovnGW!eYu6 zOXW};Yy}CfTxQm;qVo@PeP-fSYy}CEcxGbC3iDN%6zVKvOe)5EQFe5OvxgZ>3j4Ff zrww$SGZRaN4;&KPNE*x+byg8RDn+%ckP&*#LP@+ApU710kqrB@0tYoStBr57o7?Et zyV%>3K~lLUOHQhO*HkhJ7OQ)(#1)lg%5}J=K2KfGmmU4o)n$vV>#}&Y4{NcW&!>xM zv0kXQ-jJlEs7ja6t*S%`q)J^AO!5T6fs@80isIX)TZV}v%x;3z-fpeXphpo=7O z#!4yHPR6k@E1T$L9?y_kx0Wldgj1`9TgmmqO0CsG8E4Z~8n3iYR_geNX?~fz%UPswujQV)! zRwp6W`6u68TXmOvzq{>4tnXjVKlQNdsxO}2>)ErL&#?EhwG)@0Z`<$pkLhClcyi-u z8S25@o%%#d3GuNScJ%=2{L>KF-QAcXQ?=qjJ9~KSP z979~>cX%i1$(1KUz>8|O1QVAf2@wijBhlLke^b8Fzi+2(pUY2V>6RRhh zxG7w`TcZB0TnihA>i&coJQ;o4CPi7Ra{3v!)%RiIZ}Y%kj1@03Yl}7RS?m0Abb$(m zH!!XUAM&-OtLn{fJ{cf_xIlzzA*858MQhqZ(aW;5E^djlmoIJ&4pFyDq$}^Go=syW z^-%R8tAZajS}qK&!SqJOg#%ZWvsS{r)7G}gCsrq_bVx4Qn3mXBDH(PXSWp+gOtSq| z=kV>`r8q>u*Lr`z4N9-eXN~v!oV>)^cOG~Z*e=uJr5QhVo7yIuht1kr^j_SO{&*IIwG%sNF@guA=hFMl0O_{na) zO7k5$C(j@qUemtPul6nM-B@kLcvZdDB;ZCnkCIm!zr+s7@isH|9cRCO{b%>@A6tJv zT!5XC-rpvV>vWfo7;l#v$&(iv>e=D}(AY)UwFB=mO?(G=D(5tWSCHVL4dF#1cY;HB zi6|ZC5MCWJXFG(KhRi_^;iaK;>O**SC>;S2UL9KJL4?Llz7NfYZL6ZHVFCQ^al!pQ)Z0W1Sp2CxiZt2Sf+%K-LT-9iSi*DDeO*iMNU zz%qdCB@F{u2C%)U$NfMo#70QO1+VgTEv5ChmQg&4pxfMoz18o(x2 zTWN;_*nGEYtO5gACu7+Iz`DrA@iQKScs-1Xk3l5Gl_m|4m=rzN`On0V5VN1Nw3rg) zVVc9(hWX4nGE51?FmdS?Vlrrj84AOav!$3G6=D2wjuI2&91Jwhl7h*_C2BFExPC-2 z7K?C7MIl&(i;N4)BDB(4U(O=5N)AO~7QrlnS;Td+2!9I{vxwGVF+eJUEMkBfw0Wy$b5d$JNJS7;C^3s5f{>vXLDV3_F@mT;iD(27gc92bA_y(Y z5kwL)yd#WvMu&Wa(SsNR2_p(U77|7mT1+I2CdAlC7*U8Zk}#r7kClYcjj730sfku5 za%wm=$@g6t8JL2ekyiwFoiDkok518`|@Wo&6~WnXf2ZZ3IkYyiaEPj6h;VaDOTJ_UD978A_5 z=ihs@s*$FtfjB5qWz!8Y!@0DHN~AziZro4bkuAHygDwhm(UpjEXEZN$@8E$pzq_Z$ z*Zps=Z*SiH;a}f<^XEUlxtv~4FZ=7u+c$@s%fom3%k^3*<9?m{x;yn%_T#yh(vIiL z-q!hash7KQnd^QZ=a$R;<#;*ee7$fyosReA`kJ&iH#ZNLw>N+K)A9Cn_kO?JKJS-L zch^6^{pQcN-~Qp<*Z=tSyRUD*eDdwPzkL18pWp0{*LQri9k=7*SPolxSH{~`-`4TF zK8|bEfBWkCk{=$PpB_K#kFRg9AF)4v_wcyi{;)s4JUl(Vy*cmc^|<<~+ohdWJMX8~ zFQ-w;+D>a5=li>JxtuRw)+MvvUwS#T<90dpb7_anc|5ecZJoAbNJVgK;-{Npdb?r^+o z^UyBG?QrVZ4)o!9U3+nay-=H1Va`=|H6JkZzsdwIBfy?$kWeR99w zPnZ4qzn|kv{qW7>W&hi+2m24#C*tk{~ z2hD@-!SG;u5Dyjy<>-M+i%W}3i%W}3i%W}3i%W}3H&=szz0fsv1=_s%li#sH#y_ zqpC(#jj9?|HL7Y<)u^gbRimm#RgJ0|RW+(=RMn`eQB|X=Mpccf8dWu_YE;#zs!>&= zszz0fsv1=_s%li#sH#y_qpC(#jj9?|HL7Y<)u^gbRimm#RgJ0|RW+(=RMn`eQB|X= zMpccf8dWu_YE;#zs!>&=szz0fsv1=_s%li#sH#y_qpC(#jcOXzG^%M-)2OCVO{1Dd zHH~T-)ikPURMV)YQB9+oMm3FU8r3waX;jmwrcq6!nnpE^Y8urvs%ccysHRa(qnbuF zjcOXzG^%M-)2OCVO{1DdHH~T-)ikPURMV)YQB9+oMm3FU8r3waX;jmwrcq6!nnpE^ zY8urvs%ccysHRa(qnbuFjcOXzG^%M-)2OCVO{1DdHH~T-)ikPURMV)YQB9+oMm3FU z8r3waX;jmwrcq6!nnpE^Y8urvs%ccysHRa(qnbuFjcOXzG^%M-)2OadU8A~2b&cv8 z)itVXRM)7kQC*|DMsKfHGs%uo&sIF06qq;_Qjp`cJ zHL7b=*Ql;hU8A~2b&cv8)itVXRM)7kQC*|DMsKfHG zs%uo&sIF06qq;_Qjp`cJHL7b=*Ql;hU8A~2b&cv8)itVXRM)7kQC*|DMsKfHGs%uo&sIF06qq;_Qjp`cJHL7b=*Ql;hU8A~2b&VPtH8g5y z)X=D*QA4AKMh%S`8Z|U(Xw=ZCp;1GlhDHsI8X7e;YG~BZsG(6qqlQKejT#y?G-_zn z(5RtNL!*X94UHNaH8g5y)X=D*QA4AKMh%S`8Z|U(Xw=ZCp;1GlhDHsI8X7e;YG~BZ zsG(6qqlQKejT#y?G-_zn(5RtNL!*X94UHNaH8g5y)X=D*QA4AKMh%S`8Z|U(Xw=ZC zp;1GlhDHsI8X7e;YG~BZsG(6qqlQKejT#y?G-_zn(5RtNL!*X94UHNaH8g5y)X=D@ zQB$L)Moo>H8Z|X)YSh%IsZmp-rbbPTni@4VYHHNfsHss?qozhpjhY%YHEL?q)TpUZ zQ=_IvO^uowH8pB#)YPb{QB$L)Moo>H8Z|X)YSh%IsZmp-rbbPTni@4VYHHNfsHss? zqozhpjhY%YHEL?q)TpUZQ=_IvO^uowH8pB#)YPb{QB$L)Moo>H8Z|X)YSh%IsZmp- zrbbPTni@4VYHHNfsHss?qozhpjhY%YHEL?q)TpUZQ=_IvO^uowH8pB#)YPb{QB$L) zMoo>1Mn$8dQPHSqR5U6Y6^)8UMWdoo(Wq!tG%6YujfzG^qoPsKsAyC)DjF4yibh4F zqEXSPXjC*R8WoL-Mn$8dQPHSqR5U6Y6^)8UMWdoo(Wq!tG%6YujfzG^qoPsKsAyC) zDjF4yibh4FqEXSPXjC*R8WoL-Mn$8dQPHSqR5U6Y6^)8UMWdoo(Wq!tG%6YujfzG^ zqoPsKsAyC)DjF4yibh4FqEXSPXjC*R8WoL-Mn$8dQPHTSQA?wiMlFq68nrZPY1GoF zrBO?xmPRd&S{k)9YH8HcsHIU$qn1W3janMDG-_$o(x|0TOQV)XEsa_lwKQsJ)Y7P> zQA?wiMlFq68nrZPY1GoFrBO?xmPRd&S{k)9YH8HcsHIU$qn1W3janMDG-_$o(x|0T zOQV)XEsa_lwKQsJ)Y7P>QA?wiMlFq68nrZPY1GoFrBO?xmPRd&S{k)9YH8HcsHIU$ zqn1W3janMDG-_$o(x|0TOQV)XEsa_lwKQsJ)Y7P>QA?wiMlFoG{x|CALGfVopnA|e z=pGCYrU&uBrNyPirNyPirNyPirNyPirNyPirJGAPmu@cIT)Me*bLr;N&83@5H4wnv>4wnv>4wnv>4wp`sPM1!XPM1!XPM1!XPM1!XPM5}|acNu{ zm&T=WX&_sl~lL(r$6DvlF0l& zA{mt#5pEvo5FoJ3G9#D2E^~8Rzu426*wdNV)0x=Qnb^~r*wdNV)0x=Qnb^~r*wdNV z)0x=Qnb^~r*wdNV)0x=Qnb^~r*wdNV)0x=Qnb^~r*wdNV)0x=Qnb^~r*wdNV)0x=Q znb^~r*wdNV)0x=Qnb^~r*wdNV)0x=Qnb^~r*wdNV)0x=Qnb^~r*wdNV)0x=Qnb^~r z*wdNV)0x=Qnb^~r*wdNV)0x=Qnb^~r*wdNV)0x=Qnb^~r*wdNV)0x=Qnb^~r*wdNV z)0x=Qnb^~r*wdNV)0x=Qnb^~r*wdNV)0x=Qnb^~r*wdNV)0x=Qnb^~r*wdNV)0x=Q znb^~r*wdNV)0x=Qnb^~r*wdNV)0x=Qnb^~r*wdNV)0x=Qnb^~r*wdNV)0x=Qnb^~r z*wdNV)0x=Qnb^~r*wdNV)0x=Qnb^~r*wdNV)0x=Qnb^~r*wdNV)0x=Qnb^~r*wdNV z)0x=Qnb^~r*wdNV)0x=Qnb^~r*wdNV)0x=Qnb^~r*wdNV)0x=Qnb^~r*wdNV)0x=Q zndo#TI-QA5XQI=Y=yWDJorz9oqSKk^bS65TiB4yt)0yaWCOVyoPG_Rindo#TI-QA5 zXQI=Y=yWDJorz9oqSKk^bS65TiB4yt)0yaWCOVyoPG_Rindo#TI-QA5XQI=Y=yWDJ zorz9oqSKk^bS65TiB4yt)0yaWCOVyoPG_Rindo#TI-QA5XQI=Y=yWDJorz9oqSKk^ zbS65TiB4yt)0yaWCOVyoPG_Rindo#TI-QA5XQI=Y=yWDJorz9oqSKk^bS65TiB4yt z)0yaWCOVyoPG_Rindo#TI-QA5XQI=Y=yWDJorz9oqSKk^bS65TiB4yt)0yaWCOVyo zPG_Rindo#TI-QA5XQI=Y=yWDJorz9oqSKk^bS65TiB4yt)0yaWCOVyoPG_Rindo#T zI-QA5XQI=Y=yWDJorz9oqSKk^bS65TiB4yt)0yaWCOVyoPG_Rindo#TI-QA5XQI=Y z=yWDJorz9oqSKk^bS65TiB4yt)0yaWCOVyoPG_Rindo#TI-QA5XQI=Y=yWDJorz9o zqSKk^bS65TiB4yt)0yaWCOVyoPG_Rindo#TI-QA5XQI=Y=yWDJorz9oqSKk^bS65T ziB4yt)0yaWCOVyoPG_Rindo#TI-QA5XQI=Y=yWDJorz9oqSKk^bS65TiB4yt)0yaW zCOVyoPG_Rindo#TI-QA5XQI=Y=yWDJorz9oqSKk^bS65TiB4yt)0yaWCOVyoPG_Ri zndo#TI-QA5XQI=Y=yWDJorz9oqSKk^bS65TiB4yt)0yaWCOVyoPG_Rindo#TI-QA5 zXQI=Y=yWDJorz9oqSKk^bS65TiB4yt)0yaWCOVyoPG_Rindo#TI-QA5XQI=Y=yWDJ zorz9oqSKk^bS65TiB4yt)0yaWCOVyoPG_Rindo#TI-QA5XQI=Y=yWDJorz9oqSKk^ zbS65TiB4yt)0yaWCOVyoPG_Rindo#TI-QA5XQI=Y=yWDJorz9oqSKk^bS65TiB4yt z)0yaWCOVyoPG_Rindo#TI-QA5XQI=Y=yWDJorz9oqSKk^bS65TiB4yt)0yaWCOVyo zPG_Rindo#TI-QA5XQI=Y=yWDJorz9oqSKk^bS65TiB4yt)0yaWCOVyoPG_Rindo#T zI-QA5XQI=Y=yWDJorz9oqSKk^bS65TiB4yt)0yaWCOVyoPG_Rindo#TI-QA5XQI=Y z=yWDJorz9oqSKk^bS65TiB4yt)0yaWCOVyoPG_Rindo#TI-QA5XQI=Y=yWDJorz9o zqSKk^bS65TiB4yt)0yaWCOVyoPG_Rindo#TI-QA5XQI=Y=yWDJorz9oqSKk^bS65T ziB4yt)0yaWCOVyoPG_Rindo#TI-QA5XQI=Y=yWDJorz9oqSKk^bS65TiB4yt)0yaW zCOVyoPG_Rindo#TI-QA5XQI=Y=yWDJorz9oqSKk^bS65TiB4yt)0yaWCOVyoPG_Ri zndo#TI-QA5XQI=Y=yWDJorz9oqSKk^bS65TiB4yt)0yaWCOVyoPG_Rindo#TI-QA5 zXQI=Y=yWDJorz9oqSKk^bS65TiB4yt)0yaWCOVyoPG_Rindo#TI-QA5XQI=Y=yWDJ zorz9oqSKk^bS65TiB4yt)0yaWCOVyoPG_Rindo#TI-QA5XQI=Y=yWDJorz9oqSKk^ zbS65TiB4yt)0yaWCOVyoPG_Rindo#TI-QA5XQI=Y=yWDJorz9oqSKk^bS65TiB4yt z)0yaWCOVyoPG_Rindo#TI-QA5XQI=Y=yWDJorz9oqSKk^bS65TiB4yt)0yaWCOVyo zPG_Rindo#TI-QA5XQI=Y=yWDJorz9oqSKk^bS65TiB4yt)0yaWCOVyoPG_Rindo#T zI-QA5XQI=Y=yWDJorz9oqSKk^bS65TiB4yt)0yaWCOVyoPG_Rindo#TI-QA5XQI=Y z=yWDJorz9oqSKk^bS65TiB4yt)0yaWCOVyoPG_Rindo#TI-QA5XQI=Y=yWDJorz9o zqSKk^bS65TiB4yt)0yaWCOVyoPG_Rindo#TI-QA5XQI=Y=yWDJorz9oqSKk^bS65T ziB4yt)0yaWCOVyoPG_Rindo#TI-QA5XQI=Y=yWDJorz9oqSKk^bS65TiB4yt)0yaW zCOVyoPG_Rindo#TI-QA5XQI=Y=yWDJorz9oqSKk^bS65TiB4yt)0yaWCOVyoPG_Ri zndo#TI-QA5XQI=Y=yWDJorz9oqSKk^bS65TiB4yt)0yaWCOVyoPG_Rindo#TI-QA5 zXQI=Y=yWDJorz9oqSKk^bS65TiB4yt)0yaWCOVyoPG_Rindo#TI-QA5XQI=Y=yWDJ zorz9oqSKk^bS65TiB4yt)0yaWCOVyoPG_Rindo#TI-QA5XQI=Y=yWDJorz9oqSKk^ zbS65TiB4yt)0yaWCOVyoPG_Rindo#TI-QA5XQI=Y=yWDJorz9oqSKk^bS65TiB4yt z)0yaWCOVyoPG_Rindo#TI-QA5XQI=Y=yWDJorz9oqSKk^bS65TiB4yt)0yaWCOVyo zPG_Rindo#TI-QA5XQI=Y=yWDJorz9oqSKk^bS65TiB4yt)0yaWCOVyoPG_Rindo#T zI-QA5XQI=Y=yWDJorz9oqSKk^bS65TiB4yt)0yaWCOVyoPG_Rindo#TI-QA5XQI=Y z=yWDJorz9oqSKk^bS65TiB4yt)0yaWCOVyoPG_Rindo#TI-QA5XQI=Y=yWDJorz9o zqSKk^bS65TiB4yt)0yaWCOVyoPG_Rindo#TI-QA5XQI=Y=yWDJorz9oqSKk^bS65T ziB4yt)0yaWCOVyoPG_Rindo#TI-QA5XQI=Y=yWDJorz9oqSKk^bS65TiB4yt)0yaW zCOVyoPG_Rindo#TI-QA5XQI=Y=yWDJorz9oqSKk^bS65TiB4yt)0yaWCOVyoPG_Ri zndo#TI-QA5XQI=Y=yWDJorz9oqSKk^bS65TiB4yt)0yaWCOVyoPG_Rindo#TI-QA5 zXQI=Y=yWDJorz9oqSKk^bS65TiB4yt)0yaWCOVyoPG_Rindo#TI-QA5XQI=Y=yWDJ zorz9oqSKk^bS65TiB4yt)0yaWCOVyoPG_Rindo#TI-QA5XQI=Y=yWDJorz9oqSKk^ zbS65TiB4yt)0yaWCOVyoPG_Rindo#TI-QA5XQI=Y=yWDJorz9oqSKk^bS65TiB4yt z)0yaWCOVyoPG_Rindo#TI-QA5XQI=Y=yWDJorz9oqSKk^bS65TiB4yt)0yaWCOVyo zPG_Rindo#TI-QA5XQI=Y=yWDJorz9oqSKk^bS65TiB4yt)0yaWCOVyoPG_Rindo#T zI-QA5XQI=Y=yWDJorz9oqSKk^bS65TiB4yt)0yaWCOVyoPG_Rindo#TI-QA5XQI=Y z=yWDJorz9oqSKk^bS65TiB4yt)0yaWCOVyoPG_Rindo#TI-QA5XQI=Y=yWDJorz9o zqSKk^bS65TiB4yt)0yaWCOVyoPG_Rindo#TI-QA5XQI=Y=yWDJorz9oqSKkM9ifT2 z#iYfhC8Q;#rKF`v%Oov(X_1&sVm67{BxaMCO=32Q*(7F@m`!3fiP~ zW|x>=9HLIVor%UCFYcvQ({hu zIVI+lm`h?ViMb@^l9)?kE{VA$=8~98VlIifNz6@RZW42on484hB<3bDH;K7P%uQmR zB<4wCo+Rc;VxA=CNn)NP=1F3nB<4wC-b>7TiFq$E?OCF?jacGe| zNb6#5k(eb9lDn8&BxcEj6ffo$iCOX>(Tll7VwOBe_hN35m?aOAzL;AiX32xpFAgn| z2kBqTEfTZjK@u2qi^ME>kP61!A~8!IB!n@yNX(K4X<^JQ60_t%au{=q#4LG`BF3Rb z@*qu&xkX}@JV+K}ZjqQJ4^qaMTO?-5gTyiB7KvH%AbpIvMPimbNFrlyk(eb9Qpq^9 zNFJnNsYNhVwOBeRby_Em?aMq)|gu)X32xJHRcwHS@Ixxjk!f)mOMyd z zC5}Uj3Ga760_t%QXX@Q#4LG`n#bHCF-smK=rOlQ%#sIbddw{nv*bat9&?Mt zEP0T!$Du{?AZ?GiMPimbNZw;^k(eb9Quvr#BxcEjL_X#giCOX>osYRiVwOBe>SJz^ zm?aNV`#7{n9;Ej%w@A#A2T6X+EfTZjL8>2fi^ME>knqRc!k9^a%q@(W_{ZGBn8|<4 zEsU7}$lM|^OCF>FGPg*~k_X9v%q$+#)ec9;62{w@A#A2T6j= zEfTZjL8>5!7RiHjLFN{TS@Ixhkhw)-mOMxuWNwj|B@Yq^nOh`g$%8aP<`#)r@*tUz zxkX}@JV+_z&?0$|R><5UF-smK7c#d<%#sHwhRiJzv*badA#;nwEP0S_$lM|^OCBU0 zGPg*~k_V}W99kp~(hr$iBxcEjBt+&GiCOX>6_L3`VwOBeNMvr2m?aO=5}8{hX32x( zMCKNWS@Ix7kwc5*L7F0Si^ME>kgUkuA~8!Iq%1PGNX(K4iHpoFjG4U1+`^a%jLa>J znZ(H4!kCGS%q zknG6ZA~8!Iq&#wHkvvFyWNwj|B@dDxnOh`g$%7O~<`#)r@*okCxkX}@JV=LRZjqQJ z50WC8TO?-5gVab4Es_W6k<2X;v*baNBy)?zEP0SB$=o6_OCBUlGPg*~k_Ty%%qLqiF#4LG`fXUp#m`Rw-EsU9n z$=t%2$(YP7jG2(h+#)ec9;9V5w@A#A2g#YtEfTZjL5e1Gi^ME>kf_PrA~8!Iq-!#_ zNX(K4Nt?_q60_t%>L!O4$%FJw<`#)r@*s(mxkX}@JV@naZjqQJ4-z_=TO?-5gS1ZO z7KvH%Ai0ydMPimbNb%&*B6*PJ$=o6_OCBV9GPg*~k_RcD%qw@A#A2dSVOS|ktBL77`5X32x3Q05kiS@Iw?l(|J>mOMxhWp0s}B@fa> znOh`g$%AB3<`#)r@*riDLyP1=+9-32#4LG`Jj&c6F-sn#kTSPO%#sI*q|7ahnM}&u z!k7uA%q@(WRLb1Kn2Dv#EfTZjL3$~3i^ME>kYvi-A~8!Iq?$6fNX(K438&0060_t% z+9`93#4LG`e9GJ+F-sn#pmJ!DJV--jZjqQJ50X)tTO?-5gOpU}7KvH%ATgD>MPimb zNKa*Mk(eb9l2n;nBxcEjR8kkZPbMe-o6mAOS?mOMyqWp0s}B@a?unOh`g$%8~!<`#)r z@*v%nxkX}@JV<(FZjqQJ4^m$_v`8MLzcRN-%#sI5u*@wIv*bZ4EOU#*EP0R+%iO}4 zNwLf=jF}kA+`^d2vCJ)unIOyDA~8!Iq{%Y3NX(K4$+FBX60_t%$}Dq>#4LG`ILq83 zF-sn#&oZ}2%#sI5w9G9Mv*bZ4Er%A#gLGQv7KvH%AgPwQMPimbNUddVk(eb95^R}U zBxcEjG+X8tiCOX>*_OFQVwOBex#iFzd60I?+#)ec9wgr~w@A#A2PwGBEfTZjK_V`5 zi^ME>kdDjTA~8!IB;_)Ib7+w~NDF3ek(eb9l7pFBBxcEj6k+BTiCOX> zQJA?!VwOBe7iMmem?aOAhM8L=X32xpVGb>l2kFDiEfTZjK@u@@i^ME>kV?$lA~8!I zBos5ZNX(K4X~oPf60_t%axrs@#4LG`V$7jM@*vHaxkX}@JV-WXZjqQJ4^obqTO?-5 zgT!Oz7KvH%ApMxRMPimbNJ3_Ak(eb9Qjs~dNFJmkGq*_0k_So2%q;>nnJGA5pki6>*?$(VRDCZ3FmCu8Etn0PWK zo{WhnW8%q}crqrQjEN^>;>nnJGA5pki6>*?$(VRDCZ3FmCu8Etn0PWKo{WhnW8%q} zcrqrQjEN^>;>nnJGA5pki6>*?$(VRDCZ3FmCu8Etn0PWKo{WhnW8%q}crqrQjEN^> z;>nnJGA5pki6>*?$(VRDCZ3FmCu8Etn0PWKo{WhnW8%q}crqrQjEN^>;>nnJGA5pk zi6>*?$(VRDCZ3FmCu8Etn0PWKo{WhnW8%q}crqrQjEN^>;>nnJGA5pki6>*?$(VRD zCZ3FmCu8Etn0PWKo{WhnW8%q}crqrQjEN^>;>nnJGA5pki6>*?$(VRDCZ3FmCu8Et zn0PWKo{WhnW8%q}crqrQjEN^>;>nnJGA5pki6>*?$(VRDCZ3FmCu8Etn0PWKo{Whn zW8%q}crqrQjEN^>;>nnJGA5pki6>*?$(VRDCZ3FmCu8Etn0PWKo{WhnW8%q}crqrQ zjEN^>;>nnJGA5pki6>*?$(VRDCZ3FmCu8Etn0PWKo{WhnW8%q}crqrQjEN^>;>nnJ zGA5pki6>*?$(VRDCZ3FmCu8Etn0PWKo{WhnW8%q}crqrQjEN^>;>nnJGA5pki6>*? z$(VRDCZ3FmCu8Etn0PWKo{WhnW8%q}crqrQjEN^>;>nnJGA5pki6>*?$(VRDCZ3Fm zCu8Etn0PWKo{WhnW8%q}crqrQjEN^>;>nnJGA5pki6>*?$(VRDCZ3FmCu8Etn0PWK zo{WhnW8%q}crqrQjEN^>;>nnJGA5pki6>*?$(VRDCZ3FmCu8Etn0PWKo{WhnW8%q} zcrqrQjEN^>;>nnJGA5pki6>*?$(VRDCZ3FmCu8Etn0PWKo{WhnW8%q}crqrQjEN^> z;>nnJGA5pki6>*?$(VRDCZ3FmCu8Etn0PWKo{WhnW8%q}crqrQjEN^>;>nnJGA5pk zi6>*?$(VRDCZ3FmCu8Etn0PWKo{WhnW8%q}crqrQjEN^>;>nnJGA5pki6>*?$(VRD zCZ3FmCu8Etn0PWKo{WhnW8%q}crqrQjEN^>;>nnJGA5pki6>*?$(VRDCZ3FmCu8Et zn0PWKo{WhnW8%q}crqrQjEN^>;>nnJGA5pki6>*?$(VRDCZ3FmCu8Etn0PWKo{Whn zW8%q}crqrQjEN^>;>nnJGA5pki6>*?$(VRDCZ3FmCu8Etn0PWKo{WhnW8%q}crqrQ zjEN^>;>nnJGA5pki6>*?$(VRDCZ3FmCu8Etn0PWKo{WhnW8%q}crqrQjEN^>;>nnJ zGA5pki6>*?$(VRDCZ3FmCu8Etn0PWKo{WhnW8%q}crqrQjEN^>;>nnJGA5pki6>*? z$(VRDCZ3FmCu8Etn0PWKo{WhnW8%q}crqrQjEN^>;>nnJGA5pki6>*?$(VRDCZ3Fm zCu8Etn0PWKo{WhnW8%q}crqrQjEN^>;>nnJGA5pki6>*?$(VRDCZ3FmCu8Etn0PWK zo{WhnW8%q}crqrQjEN^>;>nnJGA5pki6>*?$(VRDCZ3FmCu8Etn0PWKo{WhnW8%q} zcrqrQjEN^>;>nnJGA5pki6>*?$(VRDCZ3FmCu8Etn0PWKo{WhnW8%q}crqrQjEN^> z;>nnJGA5pki6>*?$(VRDCZ3FmCu8Etn0PWKo{WhnW8%q}crqrQjEN^>;>nnJGA5pk zi6>*?$(VRDCZ3FmCu8Etn0PWKo{WhnW8%q}crqrQjEN^>;>nnJGA5pki6>*?$(VRD zCZ3FmCu8Etn0PWKo{WhnW8%q}crqrQjEN^>;>nnJGA5pki6>*?$(VRDCZ3FmCu8Et zn0PWKo{WhnW8%q}crqrQjEN^>;>nnJGA5pki6>*?$(VRDCZ3FmCu8Etn0PWKo{Whn zW8%q}crqrQjEN^>;>nnJGA5pki6>*?$(VRDCZ3FmCu8Etn0PWKo{WhnW8%q}crqrQ zjEN^>;>nnJGA5pki6>*?$(VRDCZ3FmCu8Etn0PWKo{WhnW8%q}crqrQjEN^>;>nnJ zGA5pki6>*?$(VRDCZ3FmCu8Etn0PWKo{WhnW8%q}crqrQjEN^>;>nnJGA5pki6>*? z$(VRDCZ3FmCu8Etn0PWKo{WhnW8%q}crqrQjEN^>;>nnJGA5pki6>*?$(VRDCZ3Fm zCu8Etn0PWKo{WhnW8%q}crqrQjEN^>;>nnJGA5pki6>*?$(VRDCZ3FmCu8Etn0PWK zo{WhnW8%q}crqrQjEN^>;>nnJGA5pki6>*?$(VRDCZ3FmCu8Etn0PWKo{WhnW8%q} zcrqrQjEN^>;>nnJGA5pki6>*?$(VRDCZ3FmCu8Etn0PWKo{WhnW8%q}crqrQjEN^> z;>nnJGA5pki6>*?$(VRDCZ3FmCu8Etn0PWKo{WhnW8%q}crqrQjEN^>;>nnJGA5pk zi6>*?$(VRDCZ3FmCu8Etn0PWKo{WhnW8%q}crqrQjEN^>;>nnJGA5pki6>*?$(VRD zCZ3FmCu8Etn0PWKo{WhnW8%q}crqrQjEN^>;>nnJGA5pki6>*?$(VRDCZ3FmCu8Et zn0PWKo{WhnW8%q}crqrQjEN^>;>nnJGA5pki6>*?$(VRDCZ3FmCu8Etn0PWKo{Whn zW8%q}crqrQjEN^>;>nnJGA5pki6>*?$(VRDCZ3FmCu8Etn0PWKo{WhnW8%q}crqrQ zjEN^>;>nnJGA5pki6>*?$(VRDCZ3FmCu8Etn0PWKo{WhnW8%q}crqrQjEN^>;>nnJ zGA5pki6>*?$(VRDCZ3FmCu8Etn0PWKo{WhnW8%q}crqrQjEN^>;>nnJGA5pki6>*? z$(VRDCZ3FmCu8Etn0PWKo{WhnW8%q}crqrQjEN^>;>nnJGA5pki6>*?$(VRDCZ3Fm zCu8Etn0PWKo{WhnW8%q}crqrQjEN^>;>nnJGA5pki6>*?$(VRDCZ3FmCu8Etn0PWK zo{WhnW8%q}crqrQjEN^>;>nnJGA5pki6>*?$(VRDCZ3FmCu8Etn0PWKo{WhnW8%q} zcrqrQjEN^>;>nnJGA5pki6>*?$(VRDCZ3FmCu8Etn0PWKo{WhnW8%q}crqrQjEN^> z;>nnJGA5pki6>*?$(VRDCZ3FmCu8Etn0PWKo{WhnW8%q}crqrQjEN^>;>nnJGA5pk zi6>*?$(VRDCZ3FmCu8Etn0PWKo{WhnW8%q}crqrQjEN^>;>nnJGA5pki6>*?$(VRD zCZ3FmCu8Etn0PWKo{WhnW8%q}crqrQjEN^>;>nnJGA5pki6>*?$(VRDCZ3FmCu8Et zn0PWKo{WhnW8%q}crqrQjEN^>;>nnJGA5pki6>*?$(VRDCZ3FmCu8Etn0PWKo{Whn zW8%q}crqrQjEN^>;>nnJGA5pki6>*?$(VRDCZ3FmCu8Etn0PWKo{WhnW8%q}crqrQ zjEN^>;>nnJGA5pki6>*?$(VRDCZ3FmCu8Etn0PWKo{WhnW8%q}crqrQjEN`X;>oyp zGA^Esiznma$+&njE}o2wC*$JDxOg%yo{WnpoypGA^Esiznma z$+&njE}o2wC*$JDxOg%yo{WnpoypGA^Esiznma$+&njE}o2w zC*$JDxOg%yo{WnpoypGA^Esiznma$+&njE}o2wC*$JDxOg%y zo{WnpoypGA^Esiznma$+&njE}o2wC*$JDxOg%yo{WnpoypGA^Esiznma$+&njE}o2wC*$JDxOg%yo{WnpoypGA^Esiznma$+&njE}o2wC*$JDxOg%yo{WnpoypGA^Es ziznma$+&njE}o2wC*$JDxOg%yo{WnpoypGA^Esiznma$+&nj zE}o2wC*$JDxOg%yo{WnpoypGA^Esiznma$+&njE}o2wC*$JD zxOg%yo{WnpoypGA^Esiznma$+&njE}o2wC*$JDxOg%yo{Wnp zoypGA^Esiznma$+&njE}o2wC*$JDxOg%yo{WnpoypGA^Esiznma$+&njE}o2wC*$JDxOg%yo{Wnpoyp zGA^Esiznma$+&njE}o2wC*$JDxOg%yo{WnpoypGA^Esiznma z$+&njE}o2wC*$JDxOg%yo{WnpoypGA^Esiznma$+&njE}o2w zC*$JDxOg%yo{WnpoypGA^Esiznma$+&njE}o2wC*$JDxOg%y zo{WnpoypGA^Esiznma$+&njE}o2wC*$JDxOg%yo{WnpoypGA^Esiznma$+&njE}o2wC*$JDxOg%yo{WnpoypGA^Esiznma$+&njE}o2wC*$JDxOg%yo{WnpoypGA^Es ziznma$+&njE}o2wC*$JDxOg%yo{WnpoypGA^Esiznma$+&nj zE}o2wC*$JDxOg%yo{WnpoypGA^Esiznma$+&njE}o2wC*$JD zxOg%yo{WnpoypGA^Esiznma$+&njE}o2wC*$JDxOg%yo{Wnp zoypGA^Esiznma$+&njE}o2wC*$JDxOg%yo{WnpoypGA^Esiznma$+&njE}o2wC*$JDxOg%yo{Wnpoyp zGA^Esiznma$+&njE}o2wC*$JDxOg%yo{WnpoypGA^Esiznma z$+&njE}o2wC*$JDxOg%yo{WnpoypGA^Esiznma$+&njE}o2w zC*$JDxOg%yo{WnpoypGA^Esiznma$+&njE}o2wC*$JDxOg%y zo{WnpoypGA^Esiznma$+&njE}o2wC*$JDxOg%yo{WnpoypGA^Esiznma$+&njE}o2wC*$JDxOg%yo{WnpoypGA^Esiznma$+&njE}o2wC*$JDxOg%yo{WnpoypGA^Es ziznma$+&njE}o2wC*$JDxOg%yo{WnpoypGA^Esiznma$+&nj zE}o2wC*$JDxOg%yo{WnpoypGA^Esiznma$+&njE}o2wC*$JD zxOg%yo{WnpoypGA^Esiznma$+&njE}o2wC*$JDxOg%yo{Wnp zoypGA^Esiznma$+&njE}o2wC*$JDxOg%yo{WnpoypGA^Esiznma$+&njE}o2wC*$JDxOg%yo{Wnpoyp zGA^Esiznma$+&njE}o2wC*$JDxOg%yo{WnpoypGA^Esiznma z$+&njE}o2wC*$JDxOg%yo{WnpoypGA^Esiznma$+&njE}o2w zC*$JDxOg%yo{WnpoypGA^Esiznma$+&njE}o2wC*$JDxOg%y zo{WnpoypGA^Esiznma$+&njE}o2wC*$JDxOg%yo{WnpoypGA^Esiznma$+&njE}o2wC*$JDxOg%yo{WnpoypGA^Esiznma$+&njE}o2wC*$JDxOg%yo{WnpoypGA^Es ziznma$+&njE}o2wC*$JDxOg%yo{WnpoypGA^Esiznma$+&nj zE}o2wC*$JDxOg%yo{WnpoypGA^Esiznma$+&njE}o2wC*$JD zxOg%yo{WnpoypGA^Esiznma$+&njE}o2wC*$JDxOg%yo{Wnp zoypGA^Esiznma$+&njE}o2wC*$JDxOg%yo{WnpoypGA^Esiznma$+&njE}o2wC*$JDxOg%yo{Wnpoyp zGA^Esiznma$+&njE}o2wC*$JDxOg%yo{WnpoypGA^Esiznma z$+&njE}o2wC*$JDxOg%yo{WnpoypGA^Esiznma$+&njE}o2w zC*$JDxOg%yo{WnpoypGA^Esiznma$+&njE}o2wC*$JDxOg%y zo{WnpoypGA^Esiznma$+&njE}o2wC*$JDxOg%yo{WnpoypGA^Esiznma$+&njE}o2wC*$JDxOg%yo{WnpoypGA^Esiznma$+&njE}o2wC*$JDxOg%yo{WnpoypGA^Es ziznma$+&njE}o2wC*$JDxOg%yo{WnpoypGA^Esiznma$+&nj zE}o2wC*$JDxOg%yo{WnpoypGA^Esiznma$+&njE}o2wC*$JD zxOg%yo{WnpoypGA^Esiznma$+&njE}o2wCllhygm^L`o=k`* z6XMB)crqcLOo%5F;>m<~G9jK!h$j=`$%J?^A)ZW#Cllhygm^L`o=k`*6XMB)crqcL zOo%5F;>m<~G9jK!h$j=`$%J?^A)ZW#Cllhygm^L`o=k`*6XMB)crqcLOo%5F;>m<~ zG9jK!h$j=`$%J?^A)ZW#Cllhygm^L`o=k`*6XMB)crqcLOo%5F;>m<~G9jK!h$j=` z$%J?^A)ZW#Cllhygm^L`o=k`*6XMB)crqcLOo%5F;>m<~G9jK!h$j=`$%J?^A)ZW# zCllhygm^L`o=k`*6XMB)crqcLOo%5F;>m<~G9jK!h$j=`$%J?^A)ZW#Cllhygm^L` zo=k`*6XMB)crqcLOo%5F;>m<~G9jK!h$j=`$%J?^A)ZW#Cllhygm^L`o=k`*6XMB) zcrqcLOo%5F;>m<~G9jK!h$j=`$%J?^A)ZW#Cllhygm^L`o=k`*6XMB)crqcLOo%5F z;>m<~G9jK!h$j=`$%J?^A)ZW#Cllhygm^L`o=k`*6XMB)crqcLOo%5F;>m<~G9jK! zh$j=`$%J?^A)ZW#Cllhygm^L`o=k`*6XMB)crqcLOo%5F;>m<~G9jK!h$j=`$%J?^ zA)ZW#Cllhygm^L`o=k`*6XMB)crqcLOo%5F;>m<~G9jK!h$j=`$%J?^A)ZW#Cllhy zgm^L`o=k`*6XMB)crqcLOo%5F;>m<~G9jK!h$j=`$%J?^A)ZW#Cllhygm^L`o=k`* z6XMB)crqcLOo%5F;>m<~G9jK!h$j=`$%J?^A)ZW#Cllhygm^L`o=k`*6XMB)crqcL zOo%5F;>m<~G9jK!h$j=`$%J?^A)ZW#Cllhygm^L`o=k`*6XMB)crqcLOo%5F;>m<~ zG9jK!h$j=`$%J?^A)ZW#Cllhygm^L`o=k`*6XMB)crqcLOo%5F;>m<~G9jK!h$j=` z$%J?^A)ZW#Cllhygm^L`o=k`*6XMB)crqcLOo%5F;>m<~G9jK!h$j=`$%J?^A)ZW# zCllhygm^L`o=k`*6XMB)crqcLOo%5F;>m<~G9jK!h$j=`$%J?^A)ZW#Cllhygm^L` zo=k`*6XMB)crqcLOo%5F;>m<~G9jK!h$j=`$%J?^A)ZW#Cllhygm^L`o=k`*6XMB) zcrqcLOo%5F;>m<~G9jK!h$j=`$%J?^A)ZW#Cllhygm^L`o=k`*6XMB)crqcLOo%5F z;>m<~G9jK!h$j=`$%J?^A)ZW#Cllhygm^L`o=k`*6XMB)crqcLOo%5F;>m<~G9jK! zh$j=`$%J?^A)ZW#Cllhygm^L`o=k`*6XMB)crqcLOo%5F;>m<~G9jK!h$j=`$%J?^ zA)ZW#Cllhygm^L`o=k`*6XMB)crqcLOo%5F;>m<~G9jK!h$j=`$%J?^A)ZW#Cllhy zgm^L`o=k`*6XMB)crqcLOo%5F;>m<~G9jK!h$j=`$%J?^A)ZW#Cllhygm^L`o=k`* z6XMB)crqcLOo%5F;>m<~G9jK!h$j=`$%J?^A)ZW#Cllhygm^L`o=k`*6XMB)crqcL zOo%5F;>m<~G9jK!h$j=`$%J?^A)ZW#Cllhygm^L`o=k`*6XMB)crqcLOo%5F;>m<~ zG9jK!h$j=`$%J?^A)ZW#Cllhygm^L`o=k`*6XMB)crqcLOo%5F;>m<~G9jK!h$j=` z$%J?^A)ZW#Cllhygm^L`o=k`*6XMB)crqcLOo%5F;>m<~G9jK!h$j=`$%J?^A)ZW# zCllhygm^L`o=k`*6XMB)crqcLOo%5F;>m<~G9jK!h$j=`$%J?^A)ZW#Cllhygm^L` zo=k`*6XMB)crqcLOo%5F;>m<~G9jK!h$j=`$%J?^A)ZW#Cllhygm^L`o=k`*6XMB) zcrqcLOo%5F;>m<~G9jK!h$j=`$%J?^A)ZW#Cllhygm^L`o=k`*6XMB)crqcLOo%5F z;>m<~G9jK!h$j=`$%J?^A)ZW#Cllhygm^L`o=k`*6XMB)crqcLOo%5F;>m<~G9jK! zh$j=`$%J?^A)ZW#Cllhygm^L`o=k`*6XMB)crqcLOo%5F;>m<~G9jK!h$j=`$%J?^ zA)ZW#Cllhygm^L`o=k`*6XMB)crqcLOo%5F;>m<~G9jK!h$j=`$%J?^A)ZW#Cllhy zgm^L`o=k`*6XMB)crqcLOo%5F;>m<~G9jK!h$j=`$%J?^A)ZW#Cllhygm^L`o=k`* z6XMB)crqcLOo%5F;>m<~G9jK!h$j=`$%J?^A)ZW#Cllhygm^L`o=k`*6XMB)crqcL zOo%5F;>m<~G9jK!h$j=`$%J?^A)ZW#Cllhygm^L`o=k`*6XMB)crqcLOo%5F;>m<~ zG9jK!h$j=`$%J?^A)ZW#Cllhygm^L`o=k`*6XMB)crqcLOo%5F;>m<~G9jK!h$j=` z$%J?^A)ZW#Cllhygm^L`o=k`*6XMB)crqcLOo%5F;>m<~G9jK!h$j=`$%J?^A)ZW# zCllhygm^L`o=k`*6XMB)crqcLOo%5F;>m<~G9jK!h$j=`$%J?^A)ZW#Cllhygm^L` zo=k`*6XMB)crqcLOo%5F;>m<~G9jK!h$j=`$%J?^A)ZW#Cllhygm^L`o=k`*6XMB) zcrqcLOo%5F;>m<~G9jK!h$j=`$%J?^A)ZW#Cllhygm^L`o=k`*6XMB)crqcLOo%5F z;>m<~G9jK!h$j=`$%J?^A)ZW#Cllhygm^L`o=k`*6XMB)crqcLOo%5F;>m<~G9jK! zh$j=`$%J?^A)ZW#Cllhygm^L`o=k`*6XMB)crqcLOo%5F;>m<~G9jK!h$j=`$%J?^ zA)ZW#Cllhygm^L`o=k`*6XMB)crqcLOo%5F;>m<~G9jK!h$j=`$%J?^A)ZW#Cllhy zgm^L`o=k`*6XMB)crqcLOo%5F;>m<~G9jK!h$j=`$%J?^A)ZW#Cllhygm^L`o=k`* z6XMB)crqcLOo%5F;>m<~G9jK!h$j=`$%J?^A)ZW#Cllhygm^L`o=k`*6XMB)crqcL zOo%5F;>m<~G9jK!h$j=`$%J?^A)ZW#Cllhygm^L`o=k`*6XMB)crqcLOo%5F;>m<~ zG9jK!h$j=`$%J?^A)ZW#Cllhygm^L`o=k`*6XMB)crqcLOo%5F;>m<~G9jK!h$j=` z$%J?^A)ZW#Cllhygm^L`o=k`*6XMB)crqcLOo%5F;>m<~G9jK!h$j=`$%J?^A)ZW# zCllhygm^L`o=k`*6XMB)crqcLOo%5F;>m<~G9jK!h$j=`$%J?^A)ZW#Cllhygm^L` zo=k`*6XMB)crqcLOo%5F;>m<~G9jK!h$j=`$%J?^A)ZW#Cllhygm^L`o=k`*6XMB) zcrqcLOo%5F;>m<~G9jK!h$j=`$%J?^A)ZW#Cllhygm^L`o=k`*6XMB)crqcLOo%5F z;>m<~G9jK!h$j=`$%J?^A)ZW#Cllhygm^L`o=k`*6XMB)crqcLOo%5F;>m<~G9jK! zh$j=`$%J?^A)ZW#Cllhygm^L`o=k`*6XMB)crqcLOo%6w;>o0VGAW)+iYJre$)tEP zDV|Jo0VGAW)+iYJre$)tEPDV|Jo0VGAW)+iYJre$)tEPDV|Jo0VGAW)+iYJre$)tEPDV|Jo0VGAW)+iYJre$)tEPDV|Jo0V zGAW)+iYJre$)tEPDV|Jo0VGAW)+iYJre z$)tEPDV|Jo0VGAW)+iYJre$)tEPDV|J< zCzImIqo0VGAW)+iYJre$)tEPDV|Jo0VGAW)+iYJre$)tEPDV|Jo0VGAW)+iYJre$)tEPDV|Jo0VGAW)+iYJre$)tEPDV|Jo0VGAW)+ ziYJre$)tEPDV|Jo0VGAW)+iYJre$)tEP zDV|Jo0VGAW)+iYJre$)tEPDV|Jo0VGAW)+iYJre$)tEPDV|Jo0VGAW)+iYJre$)tEPDV|Jo0VGAW)+iYJre$)tEPDV|Jo0V zGAW)+iYJre$)tEPDV|Jo0VGAW)+iYJre z$)tEPDV|Jo0VGAW)+iYJre$)tEPDV|J< zCzImIqo0VGAW)+iYJre$)tEPDV|Jo0VGAW)+iYJre$)tEPDV|Jo0VGAW)+iYJre$)tEPDV|Jo0VGAW)+iYJre$)tEPDV|Jo0VGAW)+ ziYJre$)tEPDV|Jo0VGAW)+iYJre$)tEP zDV|Jo0VGAW)+iYJre$)tEPDV|Jo0VGAW)+iYJre$)tEPDV|Jo0VGAW)+iYJre$)tEPDV|Jo0VGAW)+iYJre$)tEPDV|Jo0V zGAW)+iYJre$)tEPDV|Jo0VGAW)+iYJre z$)tEPDV|Jo0VGAW)+iYJre$)tEPDV|J< zCzImIqo0VGAW)+iYJre$)tEPDV|Jo0VGAW)+iYJre$)tEPDV|Jo0VGAW)+iYJre$)tEPDV|Jo0VGAW)+iYJre$)tEPDV|Jo0VGAW)+ ziYJre$)tEPDV|Jo0VGAW)+iYJre$)tEP zDV|Jo0VGAW)+iYJre$)tEPDV|Jo0VGAW)+iYJre$)tEPDV|Jo0VGAW)+iYJre$)tEPDV|Jo0VGAW)+iYJre$)tEPDV|Jo0V zGAW)+iYJre$)tEPDV|Jo0VGAW)+iYJre z$)tEPDV|Jo0VGAW)+iYJre$)tEPDV|J< zCzImIqo0VGAW)+iYJre$)tEPDV|Jo0VGAW)+iYJre$)tEPDV|Jo0VGAW)+iYJre$)tEPDV|Jo0VGAW)+iYJre$)tEPDV|Jo0VGAW)+ ziYJre$)tEPDV|Jo0VGAW)+iYJre$)tEP zDV|Jo0VGAW)+iYJre$)tEPDV|Jo0VGAW)+iYJre$)tEPDV|Jo0VGAW)+iYJre$)tEPDV|Jo0VGAW)+iYJre$)tEPDV|Jo0V zGAW)+iYJre$)tEPDV|Jo0VGAW)+iYJre z$)tEPDV|Jo0VGAW)+iYJre$)tEPDV|J< zCzImIqo0VGAW)+iYJre$)tEPDV|Jo0VGAW)+iYJre$)tEPDV|Jo0VGAW)+iYJre$)tEPDV|JnbFG9{i&i6>Ly$&`39C7w))CsX3dlz1{Fo=k}+Q{u^#crqoPOo=B`;>nbFG9{i& zi6>Ly$&`39C7w))CsX3dlz1{Fo=k}+Q{u^#crqoPOo=B`;>nbFG9{i&i6>Ly$&`39 zC7w))CsX3dlz1{Fo=k}+Q{u^#crqoPOo=B`;>nbFG9{i&i6>Ly$&`39C7w))CsX3d zlz1{Fo=k}+Q{u^#crqoPOo=B`;>nbFG9{i&i6>Ly$&`39C7w))CsX3dlz1{Fo=k}+ zQ{u^#crqoPOo=B`;>nbFG9{i&i6>Ly$&`39C7w))CsX3dlz1{Fo=k}+Q{u^#crqoP zOo=B`;>nbFG9{i&i6>Ly$&`39C7w))CsX3dlz1{Fo=k}+Q{u^#crqoPOo=B`;>nbF zG9{i&i6>Ly$&`39C7w))CsX3dlz1{Fo=k}+Q{u^#crqoPOo=B`;>nbFG9{i&i6>Ly z$&`39C7w))CsX3dlz1{Fo=k}+Q{u^#crqoPOo=B`;>nbFG9{i&i6>Ly$&`39C7w)) zCsX3dlz1{Fo=k}+Q{u^#crqoPOo=B`;>nbFG9{i&{lJs??C`&Tx&8X>-R-kKy?Xol z_RWjiAN=g4dj9Lp-QDZk&;HxnKi$52^Y!hUyJzoizq)z#=KZs~+js9?-MxMHXZ`8l zzkT-Nk2i0=x_$Qc%SZj(X}9HH@7~?q-G24z#j}^Uf4Y7B_M3To7TBjh(evu<-TPnv z^SeX$`@8oK*kM1ri9hxSF13Z9e{6bo^WiMId-mehpI*Iwb@S%9V3s*?mWk)#r|ZxC z=H)Kazkb4-&)(l1VtRJ-;_$oA=DW$=&A;FN?YqdS?nVP#&nvsCPIHu&xx|*aH9Tn7 z4Y0pYy;tXL`R>o$Jv-ce-n{?v_T8r+fpcW&6`dE4yo^V?A}=1#0GHE2HHNwxHW=l?Z-hE1fK23;cd!W1g zarl&-j=+jc4DC$4+uClXWkD){GnT()WRLUr$TULyT}0jC3qU*l+9X zImcDHuB{s3XIC`;IOH&8fLracKX=rqee6xW#H`l>yJ?x7zJ_7M#b&Mtm?A&NZTqrA zs*~P{)|2xeej%N68_%5-PkhC!{STl%eo&H4;Djhd!;AYmr)u^OTJ&JEmLD2|3)bd3C$t#O0E! z7%g0E3F$RlNMq;Y72QJCbI6yx(X4Ie@xp_<$1X@bYF_bzm;9JhIgeTn@Zd+e>$^{m z+_RoNa@qIxBln!_#`LM9-ccSEhI_aleO~RX*Z=7f%cW23G1Kc3`)vhdz9;aKX!)Ka z_vJYg$0#T1cG`?Sz8QS&&$WNT2+<92hq>%sX@mlq?PovzXRqgoUVckX^B2$kDUY7) zJ8D|(A>q9LC*m#%<3=M_G{qW9~Wfqc+`y3|y{;(KR=vBYpg@@)t++L^KH z)9F$@k9|*dZTn-m$Fp_zthIdekr&F?vu{u6%sla?j<8f?``J%B+pl}1J#x=__V1Vr zir2IM&bsPm9R)Mj)^mYLwF_KzV@@WHuy0TPi_4$0XM2Vnx#!e3$;(d4o@%aiWowKE zU;43h!R__D&>nk=bnNJ^VNiC?O6=uNx~tf~gFO_i=&T?3%pBl!UeSTPhFR$K?Cx_$ zdj3c1r?j81Z=alXKaM*yaD9jP{Odi>I%dCEem=)^#?kwJl*~3>9`T#vAE&5!Wj5YXN=Kv3@PxZPO^OeN4^;~{tc)_9cWlxn;oxqoSQvJC|M!D2R zyYZbRQ;)VY2cPgh<@CbXn|d7!pO#$|Hrh#gk&?r+onY&FE4ZFJ!kT8UpX^EVWFP9T z?npkVnT-LC-r(26nsARK<#qT9ksKTznzZUGeA~bVA^)g@5)ztl&`I4p|f$6mt)(R`zhm_ z(*(Z_rY+Xz{Fd2IU-i^u)6=BP`n(gBdtO zdG*RV7QdP=)-&vvKyCNHr`7Yc2dZN~IG^u(Ge@{$hn`Hhy4jHtMq8t8>tj>pWyUe% z6kd!y5?{V&?5~&^{75{F$A~pe+zeGOaI!kfF`s-d(6PXS;nuVDjNWtg6($cKm0b9Z z>Iwo`&okL@PwoR8qqD9!dSrS%Us3xqXW2Ps1=sV8xSmJ{PK@W+cbEGPg5?k3r5@0i zpDG=$KxU+5x8hsS^-U{|7SD>VspWeXm_b?Fu9*qSmMRZ*%I#MZbuhh=``d;uq_b1)}iLsv-Ofny0I_0q34K3xfUP&{IZS*pusMj@z$=N>E}MJ#&fmbGk>ZKe8K6ak-yXYaZ@%6 zJWs4Om9Aj=X5gMjo@k>Tqy>`5O4qPGwbz8hEPlMi{eLBoA?KV&81b@P@1Sq&tsVO) zjd#@7IbnbLvHQ@c&7x2Bt9^#AfVa=yjjqW8a#`J~Kv( zd%HfykNd1eZP#!@E}020{!sYn@33K=uK zEOcU?a1qMzg|{bpFSGoyUA||b5$x0HS!Q3Y|D6?x9*IzX8s|M`b*CaEOTGD z(2Uh+7ty79R`N)D;+_*;eNXkodEhFzb4-YhH{UV#v&VK99DTd5@8RWKMKzZx(>~Qu z^|)-iyjn2A`8VEHjBtNlc#+Kj#lffl#NqCu7yR~_N6)hj@$EE=r?(SlswXr%JaW(2 zgO-uD=t4_57YhIUchPgcBN;BeUHEib^yIt12zk(x=KRaI#~a@ddZDah4~-Y=^+@&O zugBfy^Hy_5U)^3k=`;B9-)eS^(^V@vG0t)O9RNfaA;zoexZL~HQ@n7v^g^7|zL?*7 zww@t+#(C5~%XU546}j6OW~i~!diLe{zIo^@-yXi~yMfVv?!r_2W6%1o^;SM~f*pFU zeaXGOXaar`XT%8elh=Cgt>Nrl-#%IR74X^KDUJW8X6T8+3%s%$>-$8XFZ1D;+zGe-Bz+t{iL(w4VKUrfXxJOSncGWS;DXd(JAtbzk=v*g>QIiAV3d&p$Su znOr~W&pdYAf2w!1Yg^Mc-i+jEx7us#IpZpV$3NqZ@jW`r_l)&Od(MR?%dQG~^zHic zZ|%=r+%)#w;q|@QSi`t?w7szW4SJZ_;6+l%*!PI#kJ{C}DINNLJJKB1(qHYn{QevB zJRWmP)}4jd9sQ_}Bnv%Q40cssZocV}vzUucEpPYQ%b(4|vbtxy%GQl6FLp$hTcEzS z%l4%6m}~e(iXZ=kIRkvxeE00IA#dJ)dHe2a)_?zOq@S|&pYpTU_w+dGT8ot@f#oli z5l`!_XT1+N>$zGkHj6geE;;9CAN72@)DU@utKgIFw~rXoFY;hN=2xm5H|KK6#URVv z@J@PdUM#15@)>gODYkKT&E=*EFEp!rQJMeJ(^01_GZ~2VknAloT!R$4puctioFxs&gx#x1%+t^q3nx1k8eo;QupFHW%Wj&YbLbtz> zN)blbRcpGluHv3Mz{1th7O<=mfgOF~`GS|-$oH>n>lyttIscr~*!y#YzjXR)*2ldb z8FuFCe51Y5ZfTcZ!#(y;bFJIAp1-=*nGi5mm_A+c#l3K9z5ncyfZD(TmE)7a?kj+TPL2& zUUb+!+8VDBKH-FkdBm0agFibgzOBpmY-8;a62J(vt7FasjP_R_bh{ms4tshh|4{jQ z#mQ;Bcc<5}edkZcOTMKZ;%qqSgYrsWmJa(?`qG#8Y5?Cgd=JC`(;I_5#f%x%rFxbt zVHshdICu&eV9N)%a}O|g_Mi~Xxr@(N8+c~jVdS1Qyb~Ycj&-@a&!?P>|NZvn<^TBh z=G~$1{jRK^5%=zuipRh_T_3edXXoqhkuLeMW=etUeSN@gEZ)>7Mr13+nG988eC7LS5Y-{#`(qN zkMTL)-K}&&cFd=utC%#nzJ)@gTx-imxT4$ez&%4vmR`j{xUQVL&Yl&WE{pe^YG&ia z)0qR!>74bA`2Z_iF7Rq$+0_OMoq0z;ubg;vx`6a{;lbxRCo3=E1TEXK_?dIIZ@e35 zLj5`)=f<43cse~xzoC2r3zM#`XMpLjr+wF0b{IT_K5;cUC~_hnyp-s`db%gkEj`YL)>IT#qAXW-YkE1I_& z<=Oe{U*9zJMAa%v=B{cD#`;2D_gCtTT7EuSF5h~teJ`%)f#bA2M-3$|_Qk9#A2AGd z&=&aMF_#;T8P&)=L(dqFeG@rx3B~{d)f(P^E<7+EZLQ$AuSdswG8ww(^j~j1z(%d+ zp1*w0Xj}Ib*71&Z1V{LUk$cv&bt4?f>%20TIR~!q%v;YEjc}E%^{g9V`$o7Ij{eYc zf&G6TSYqk7awF{EYn^IY_J#AQ_RyKXcx}-+$^pK~Z-D9O5faP@$!qxm+44O@+&51b z-KSgzpMU&&PiqioGgynnIHqr9%zcyB!m4w_bnOy zIgb0-sceax*vZ~oopNU5eCx}H$l>FBCG?=HG%h`qImgrUId3(ea%=}3u8ucsU)y5H zWu`Jm3v}R~Wwv|FQ|)5wX2-tkjc}J5;S?5G*>iQ#u&1c z+}|B|me)nAm=tS{ihnwp7?VvH~1Prc^+XlYn?r7-E%_>J)Xps zF~YfX#>GmPJ4O#4o1ABj_K83B$S1^Y3j7nFyhpfzFD?6xkd!XI-g4A+^$&f%KIa4I zWd}#+pQ;|_;o)pgNn`H*t!L{Qqi5`=sP)|1#v6)^b_R{MMWfC`jkcc#y}r-#*`G4; zI_T8ZQJZj~?L6z@{jp*Gal^v%)we!;&gg6d+O222`K0Sh2W!3QuV*iu>}(of5!~`e zYU|m0#=l~h%ymz62>pZR{zDh*S^w+Vvki;JzbZ%Wx%@Tx1d`=wk)HS?2L^FO4o zd(gku&%X4c#WCu^E;Q|PzT4v?2Wac;IsZ8}=32U8M>T`QvwY7=a?`aul|R)JaHR)w zJ%?*OkzRO@43-hEuIDmVz)_>dbBhnBexf+@T;(cWm0srb8D|~*Ie&T$+ket~%pzym zmT6CN1)Om%@Cn^~2BG^(c2#P|GiH8E@uu z$ydQ6yww=*$c-`|Ji->A_+~%mfaAnHBhA2_GR#FE+6EXB4KdaqWeLHePn*Nt*9Q1Q z7oVDW=>22SQ|7ue&xLx%8JLdumqr*;FY-m0k3CzKxh-6JcWmeR z@C!{PF0_(i%+-jiT;KO}ddA7@XUz|5S}-ys0eKBeaxOgyd$Doig;&mg$}`_!2YS&( zIorNC+oiUiJu}W5(}m8A$9=)w8Mpcj-@v_|!*{V9Gw5kK!g8KPZ^*;XHeAs!4KQjS zY+B=Tm)}K}%AN_D9(9Uml|7FfZ>(XAIPfN};o)QT=je)7Nt|}Zd->_N(H=W5{T6c7 z(}82(-q!i81xMY=PIJFoWc$X@G}ihMx}Lv!Z9SviuNTb-Cw=x=N2SrVPmm3??=H=% z4*iP!JkKWAv1)AjtgxP|_&UErc7*4QrG9Rpz5gi}?a_5tYo3gmqyMh??%82`-@O0w z_T9ksotJ_{&iz$5k9;n^@Hg=cyc}uRo8I6fl5y^cXI)jk@YCG6ZzGr4yhYzdp7dt0 z%*gF*Z|zQXdmb54A9LDlZBeanqG9{#t9iLL^uD<08>;gkz)yLe8}oEpbUt$IsgI|= zBz=&9)TjOQ@`J0%987&K6 zXo<#A$^Nw3*-BP;Ja!m#rjmuh-lU%3vRe9W)&OnU#%G1KtxFzfT4&JpFC$Lbu6)DCnv;JzJxj9YXC6)tGIm+M z=c3Q$W4?SG=ckuH9acKOaiz02V;-k--b5e!s$`L;)-g|nv*xEglV;Jvl zFxqY$zlY9`}OB&duydVD}(+9-IAwzGLC(*E>X|- zsJ*yYP4;Cbg-#RlNCnU5DA_yv*>Lt?MkpdXe~HtzOhTRO)EVX4Tj=@xDV`oDeV|&x zQ&@6>WSH8`B_=S=F-Lm&o7LI=;+RiqXM0RK?M&Te-kLA-Z5k(^-yick&7RzY^?K&( z#=O%kaML=~4dCIon3GPltwT8+;SO_wccaS&c}4Fu7JoZ<&Ruo!m9r~&-Wqj5#R%JS zvg0yj-hNVph70h}mwKo>=aq5(-E@-2`fFdZ{9wNB8U`9qr)RvSGwV4^#@lySFF`TWkX>pooPT!XQaxvTfE(>H7`R;S>IXQVKJ;19CJZxuaQ(B=C%*tr`7&(uy67i- zms#|}c;su@1zs?mL56$WPFSSUVyKbPdPY3g7tdu0lY!r;tYN4#+DW$cEIR|Sev|Q=h+kEiHT(bla{i&aZ{Pps5Y&em%HKTu*MIxFzx&<4{_COR z_7KA_K6`ffn?Lp;&8FNRKIGZ6KmPff+jrmGyu11O!)HBvHb?yB;h%?iZr{KC_T7uy z-+bKgx#u^}-hBJ|^~Z+0!|(p)+2_Cg$AA0Xq2;j6e|-D$`HP#^uRna$PXoJJM7ir3V#3U?VBGq9zOf_%~yAS{LQne zPj;{4yA6N$_CMa=Ykq$?Q{LRYdhvYT-|z2#)@R3`avy&C*MGSA;@j6Z@BZcOfBfKc z9&`G>c>Ct=aErNp_uaXBOxOtTK1k@NlEIBqZAzW(;>H|IZefA`_<=r;Y}_VoPu zZ~yThpa1S3fA>$n`~4sO_2V`4W83;&^oKq84{u)H{^utgz(eHUod+Lc{D(J(XM@{! zk2%c${O;A)H}C%Zf8750r;hcZ;UOD8ckPbZ5r@|2uU?+#ME zH}RN}&g-`?ZdQF$J!Y`;`ml+2-@aV!Q@bBC+%~$K*%43E)U){c4|FgRvGvfK~ z70GHh@tDERcboY9{fnE|>)p^}hCFZPH3_TT!ea(JUmjXtzq)(1;>|o}(DUm3+trQ- z{JSB~$K&zP@|){NoG9UwrrHe(G&#RK#&_L*L!JdHMG1=kITCU;gI9yOE!J z@x1RJKKO?pD{7*Np zzrDTxfqy*$FK*t?Y5LXIufCt9{P6R?c~-k4|KhuOu=bOG?%D2FyZVt&dwF|zIANau z?X&iW!Tzn+aR2q~+t;@@Z|?oXC!F7Z_x+a4uTOvC58Z#?_qTrHe}Db<=I$r@|8V=C zcMpy8!)Jf~;kx>Xzi`-u+dsVe>kHrqTofb+|MCyN|NDFY{y%^J+uwiR@cWw|{Nv3Ju37vhfAR1~zyI&= z|MQo>|F?hq$M66C4?q3QufM;NfB1)fIWOk<<&W%}ceh_34#o3tzdHxMOKiv9g5TYK z^ZMp6W_;KU{N3##m%f?TA03YQ{onfd!9VhgcZWO5^RHjM`FI2V?#Mp>k5_kpeEuJQ ze03PyeRIR*OuJwG*lqaR_n&q)ebV};HdRa~TL09hY|~@cKh4QF%G$R6XMyrf5yarj!hY)E@nrk( z8kdf(=n5QR4ceI&j}5xV&$15fM<-7nv?I=aoer{4?cAHW^fI*--GvvnIXO>9v4o-@v6;0-k*XmtO~X z_Dwv~D!?Cq?L6deINLhGwe5#NRuZl#kDX^p;<_H923fPXrnBZ8iyv2Z(u}Y=a!seq zvdb*jw5J}qUbF43AGu!hr$5K0DNnpSvwH3xwmkF8o7eAuY?aQ#mTG-{bNAwp&tJd& z>eY+;zj5>C$JT_sx&3P1`lAEGY4Ia#!fxNZc>D6h+L&)%z4-SZq?eDYM8Ep&@Bgpg z{>N{B``iEi``wE-zx(Y!e)V7e%a8oQ^DQwt9J~zQpM+TY?z6m3u*9gMP1_QqZHdv( zAGvLb(YD0sjQe3*VsuSIup{=xb{ewujITQTk2gkJU?UZYd>Q~vC*Y4CL+{2a}efjPG{GWe*{@tsd zn|JT$#VW`C!TIM8f9?k^+}(#43!lDP>bp<6LJaQQ+8cM|LMGK&*Lbyzzkc=IW7l{c zK5KX6UE-%NVE0{aYrM8KUfUY4qYmIgYrJ0l;JdrGHC~TfIlK*JF0r6|M4m%x-vctGphwv$j=U+bXZYH*j0!wWd{GkDAE2s&!tE+Y?W8rPpJo zRJWC0Pj#i&W6qjwrPq^O>GhaX=4q|;dd!}B-rF(yQApv!aK!zfNr)vu{s58h`Wc*Wdj4SKl7Cd;ic4-&P21D}*k-LTJ8{e6ro- z-u8R1$$DDs)_o-%^LRUNc#h5~?>)I#zPGmX zp1JZZKWb^rJSg5p_^xFhPWG5;ns$GEAHLR2KV~HNVbkj{gt7fRXH&bd%G@pahW`|s ze(V6w4rlwcn__+-@;-ST?s+TU^kar__gA++ufx;e;Y#0UH8($;27TqZ{ivlZ*a?TR z`)lgf(0q$16@Ns7yGY(#J{emAg>RfUCLL4=uTygzE95a?&cl|;fGX#7J^$T&#IPgi- zFT^n;#jSqf-Yz?4z5@9oQNMg6v-{$~b}N9@dPMg6v-ep^w$t*Bq~hHoqCw-xo< zih6AMwxWJpQNOLIU-hPME9$ou^&j`bwxa&owxWJpQNOLI-&WKQ@z!x$QNOLIzqg&+ zD)Areuww?UPhge!F+ZFRZ`zRpQ65l;2i~f49eu87^M=D)D0mf!iwa@Aub{Z@#xx z;ve7iKXjFNHLuJEDEISS@FzdG_O13OUN*jOImLsN`_l{ZU*Ful_~Y}}Z@+r=;{I>k zy!rEw{K}i#ujZ{kI3{yn#r|&` zx0SD@zuA_M|J?qYyN+EUb#+U~kD0>QmXIGisdvQAy!IvJ$1c9Qx+UbtF1@n+_wiTXPytxPk(Uio1J($b8nuzZ8`Hv zmNR$v{dA(`%!kj4euCx9y~<--&b%#W{@EkefRwBo9A!desYk1$=t8reDm$y`%g^ZfBEIN-`+jox4*vm&*v|1 zzd78Ce)#i$Jp*6dyg&T-w{Kp*`uea#zOPaE)6MH|KfJ(x{rV?9_{-bFx$*Ay{ZD=N z;re-TxEuYA4}bqZ-rfDghrhnP`SP%{U;gyRKPdE9Z=S#U`kQYL=fIy2JLko_+xesJ z?a$q>c0cjU?{B{T=JoCS4F!Jp?oOHJP@!$D>-+u9v$L+&WJ9gK+dH(g=cef9S!tbZ|-}j^4pPen+D*PUJ9{v4R;kQ-zJ^SI6v{m?R6@Dwf zcWxDaTZP}>-teu$Z>#XzD*V>F>05>0&u{ux;kQ-zZ54i7h2K`;w^jH(trcTih2K`; zcW*nl3cnxhu&u)H!*19r{I&|e?>BI(@VnhA{6^ozt-|lSuC2oFuX|#v@Y^c<9#XzD*Oi7Q^yQW$FCmw@tyU_AN9}gZePB7ad-PtqB`jn{oR=Pp*19JSAQw> z_}LY}_i9MyKl{_gDb1(diEBvOuDAW)t|56?dF1(O7w1LM-TWjAwLg5;uEz-$YPX$F z+gGt{3$=g#$obzmY%5=<9bf;$TxIs)(cK9aYX9@QS6|<}`}5B%vVL?);g*IkoQCZY zS@^^4z4i~O|N4i2`2BzT#k`;2-`*WQ zyS8_``SL}ZZ!)hA+wqV8xO;x{;`Qyz=ZB+p*kdoB|IzxGzxc82HvK9i4uAfzwYP8n^y=N)H;4P%hqAB39qn+>e114FKHTn}eQ~?5 z*!Ak+FY~777q>TAFK?Pz_nYqRi!Wb%VK2XU`7yZteu^(&mVK5xfB9K`Vfop9ns%S% z3-33++gshthyUH}m$&b3-y9ydAMy9z-G2AT{_#M)_~zTcoCt@*F_*L+hUh=II1l~L z-~aJuF2a0fyT85i_oa@BaDUp8bEG`LCwkfBWzMbCF;BU{o9~#cvK9Gat9VE^`0okAHsuw|c(5{rc^@ zKmTPL@7^9BTAzPxduDCF%iUoI@8@0eA8+6N`|Z2uACBd8a}zg}pG~{oKdam6=ChtJ z?6WVvsOGn~-d;BQ#r%UW?TdDMQ!fsOI;YQS*YmT(G5YeeFK(J!x0`B*XTxt^eRKQz zaH!`X{&Gc3FJ9h!aoh5?x5N0aeYyMci~Y;)U)=haUQ^%iZ+2hqeE;%`FTc30-QkLv zCi^mOwb@#}756|NJli-@pI2zx&;H z7tP^z`|))D=DYj-;llgt`Txhm?dkU2_g4c30-tF@{7{klgX`zBXTRM2@&~v3U)sHY z`MrO|y?^Dsf7QKzwR``X?)__j?_d4?Uq>+aqp|zp*!_6yen56VBD)`w-H*xc2W9u8 z^7~Oa_R;;Q{C-q^KPtZ;mEVub??>hLqw@Pv#r>$_epG=&cR#AQA649sD(*)W_oIsY zQN{hJ@_tl#KdQVRRpN}gA64FuD(^>?_oK@DQRV%p>V8ypKdQPPRo#!OaH8Ljs_sWs z_oJ%&QPusZ+Wn~7{ixdgsM`Ig+Wn{+uD|PKCH{A6Nw|&EX-*DqM-1!Z+ ze#5=rf{7CG;r4I1{~Hp3Ap;mvfFTDMl7Jx#7}9_xGdbczCNQJ|LoP5R14A}2qys}f zFeC&+MzG>g1Mwj#7_x#PEg15GAu$*-gCR8-a)Ti`SmRg-@gYAL5`-Z`7*d2GM;MZX zAxjw2gdtDZ#K9WkL#i<33PZ9mWD7&OFysqE!Z2hEL&~r{N0W#TS;LSv40*$lI1HJ? zkU9*x!;m}-*~2=AyBzOgz>ECh$RCdU;m9A3{NczSj{M=sACCOt#sN9vL;i5&4@drR z|7!V(y8Tin^rv^SY@VS8x4#*#l{NczS9(<%he8?Y;{NczS zj{M=sACCOt$RCdU;m9AJeC9%Y$RCdU;m9A3{NczSj{M=sACCOt$RA#OC`5e7ACCOt z$RCdU;m9A3{NczSj{M=sAKv)XiujN}9QnhMKOFhPkv|;y!;wE6`NNSveBxs@;zRy$ zn1oBY!yZha-PD@`odTIP!;gPE2rS z0#g&nAA$T4$RC0H5y&5b{1M0>f&3B3A7Pw2L43#`f&3B3AA$T4$RC0H5y&5b{1M0> z;hft+e8?Yx{1M0>f&3B3AA$T4oO$BZ6X%{}@(J;A`ib*ToPgpC6sMq&KLYt9kUs+X zBa*XYh!6QAkUs+XBalA=`6G}&0{J75KLYt9iqm(95BVdIKLYt9kUs+XBalA=`6G}& z0{J75KLYt9kUs+XBalA=`6G}&0{J75KLYt9kUs+XBalA=`6G}&0{J75KLYt9kUs+X zBalA=`6G}&0{J75KLYt9kUs+XBalA=`6G}&0{J75KLYt9kUtXnBauH6`6H1(68R&M zKN9&Pkv|goBauH6`6H1(68R&MKN9&Pkv|goBauH6`6H1(68R&MKN9&Pkv|goBauH6 z`6H1(68R&MKN9&Pkv|goBauH6`6H1(68R&MKN9&P`3{3GG58jPuQB932IAwJ48F?X zy9~a}K>kSNk3{}RCKehWycxKN|8!L;h&U9}W4VA%8UFkB0owkUtvoM??N-$R7>)qalAZ zCKehWycxKN|8! zL;h&U9}W4VA%8UFkB0owxUPx|tGKd?ORKoHii@jcbrs^{`YJB4LjGvT9}W4VA%8UF zkB0owkUtvoM??N-$R7>)qalAZCKehWycxKPKdl3Hf6}{+N(ICghI^`C~%LjIVLKPKdl3Hf6}{+N(ICghI^`C~%LjIVL zKPKdl3Hf6}{+N(ICghI^`C~%LjIVLKPKdl3Hf6} z{+N(ICghI^`C~%LjIVLKPKdl3Hf6}{+N(ICghKa zD|ER;muqynNSCX0xlEVqbY-C~;^R_XLjIVLKPKdl3Hf6} z{+N(ICghI^`C~%dpe{|%Jj{MP)KRWV9NB-!@A07FlBY$+{kBdpe{|%Jj{MP)KRWV9 zNB-!@A07FlBY$+{kBdpe{|%Jj{MP)KRWV9NB-!@A07FlBY$+{kB;=&!P3wJCp+_AoJ#{$D0D-3rm zG2F4naK|FU9jgp?EHm7(&Tz*?r_J#!yPLRcPu^JvG#Ds;=>)Q4|gm-+_C;}#{$G1D-d@q zLENzhamOOW9jg#`EJNI}4spjq#2qUUcPvHRu@-U1V#FP*5qB&{+_4^U$AZKiD-w4s zN!+m}amS*>9jg*|EKA(6E^)`g#2qUWcPvfZu{LqX;=~=R6L&06+_64!#{$J2D-?Gu zQQR>{Ns+r$xl5V5)VWKcyHvVMsk_v=OR>9FyKA{SBb9=8sd$%?cd2=oqIao!m$G-} zC@FlG%6BP!m)dtJewXTZDSwyxcPW6E3V11jml}8}f|n|IDT9|fcqxRJN_Z)Sms)r! zhL>u1DTkMOcqxdNig+oBmzsDfikGT*DT|l7cqxpR%6KV_m)dwKj+g3qDUX->cqx#V z3VA7!ml}B~l9wuZDU+8vc`1~aN_i=jms)u#mX~UIDVLXec`2Beig_uSmzsGgnwP41 zDVvwNc`2Ni%6TcBm)dzLo|o!*DW8}6c`2Zm3VJD_ml}F0qL(UqDWhi{y|IwqSV?az zr8m~n8;j|U)%3=4dSgAkv7p{qQEx1%H`de}i|UP4^~SP#V_m(mu-;f%Z!E1h*47(~ z>y6d*#`1b&eZ8^3-dJI8EU`D%*c*%NjaBx>GJ9j4y|K{VSZQx8wKvw<8;k9Y)%M16 zdt<%5vEbfVac?ZSH`d%6i|&n8_r|h&W8J;6@ZMN?Z!Enx*4`V7?~T>>#`1e({k^dO z-&lcfEWtO{;2VqZjaB%@GJInlzOfMBScz{e#W&XC8;kLc)%eD8d}BSnu^``Ak#8)? zH`e4Ei}H*5(_F^NrQ{#`1h)eZH|k-&mnSgCI;)i>7a8;kXg)%wPAePg}8v0&d=v2QHdH`eSMi}sCG`^K_; zW8J>7aNk(DZ!Fz6*6tgN_l?#2#`1k*{l2k)-&nzKEa5lS@EeQxjaB@{GJaznzp;?t zSjlfJC>V_mEYSm|#p^*7e~8;kvo z)&9nEe`CGBvEbiW@oy~oH`e?ci~fyO|HiU^W8J^8@ZVVZZ!G;c*8Urd|BcoE#`1q- z{l5_aFbV)h0>Eeh7!d%Y0$^kSj1GVi0x(JdMhd`a0T?j=qXuB)0E`}h5d<)b07eqP zXaX2f0HX?EWC4sWfDr~T$^b?hz-R*)aR8$ZVB`UeK7bJjFbV-iBEV<_7?A*@5@2Kk zj81?N3NT6mMk>H)1sJgaqZVM~0*qdO5ezVj0Y);wXa*S30HYdUWCM(DfDsNb$^k|? zz-R{;@c^S9VB`aget;1WFbV=jLcnMU7!d)ZB4A_$jE;a25->^vMoPeF2^cW}qb6YF z1dN`55fm_r0!C86XbKon0i!BlWCe__fDslj$^u4Oz-S8?aRH+)VB`gizJL)JFbV@k zV!&t&7?A;^GGJr|jLv`&8Zb%&Mry!l4H&Tjqc&jV28`Z-5gagz14eSdXbu?B0i!x# zWCx7yfDs-r$^%Avz-SK`@d2YgVB`mk{(uo6FbV`lg1~4H7!d-aLSSSFj1GYjA}~q> zMvA~_5g0K7qeful2#g+q5hO5*1V)m;Xc8Dv0;5V`WC@Hefe|J!$^=H5z-SW~aRQ@G zVB`smK7kP^FbV}mqQGbr7?A>_Qeb2Xj81_ODlkd~MykMQ6&SGsqgG(#3XEQX5iBr@ z1xB*KXcidJ0;5`BWDAULfe|h+$^}Ncz-Sj3@dBe>VB`yoet{7%FbW1n!oX-47!d=b zVqjzpjE;d3GB8R8M#{iw85l7Gqh?^_42+(E5i~H021e4rXc`z%1EXqSWDSh2fe|(^ z$_7T-z-Sv7aRZ}nVB`&qzJU=qFbW4o;=pJe7?A^`a$sZ*jLv}(IxtEHM(V(59T>3# zqjq5A4vgM`5j-%82S)P1XdW2R1EYFiWDkt)fe}71$_GaJz-S*B@dKlNVB`;s{(%ud zFbW7p0>Nk?7!d@cf?#A2j1GbkLNH1QMhd}bAs8_PqlRGQ5R4vz5kxSG2u2dYXd)O< z1fz;zWD$%mf)PeA$_Pdp!Du5GaRj4|VB`^uK7tWQFbWAqBEe`R7?A{{l3-*Kj81|P zN-#K*B^a>;qn2Rg5{zDg5lk?O2}Ux(XeJoZ1f!Z@WD|^Tf)P$I$_Yj~!DuHK z@dTruVB`~weu5EDFbWDrLcwS#7!d`dqF`hcjE;g4QZPyiMoPhGDHt&Yqo!cw6pWsN z5mYdW3Pw`FXet;{1*589WEG6Af)Q3Q$_hqW!DuTOaRsBUVB{5yzJd{0FbWGsV!>!E z7?A~|vS4HujLw1)S};lrMry%mEf}!{qqbn=7L4A45nM2e3r2FmXf7Dh1*5uPWEYI? zf)QRY$_qw%!DufS@dcy4VB{B!{(=!;FbWJtg28Ao7!d}e!eC?=j1GelVlYY!MvB2` zF&HrhqsCz57>pi+5o9om3`UZ{Xfha42BXSgWEqSugAryh$_z%D!DurWaR#H#VB{H$ zK7$cxFbWMuqQPi17?B2}(qLp7j820QYA{L-MykPRH5jo5qt;;L8jN0p5o|Du4MwuT zXf_zp2BX?wWE+fbgAr~p$_+-k!Du%a@dl&bVB{N&euEKkFbWPv!og@b7!e1f;$UPP zjE;j5axh8`M#{lxIT$eqqvl}b9E_fW5p*z$4o1?!XgU~C2czm>WF3sIgAsNx$__@_ z!Du@eaR;OBVB{T)zJn2XFbWSw;=yP<7?B5~@?c~hjLw4*dN4{4M(V+6Js7bEqxN9r z9*o|D5qvO;4@UCAXg(Ox2c!C6WFL&~gAsl($`3~R!Dv4i@du;+VB{Z+{(}*KFbWVx z0>WrO7!e4g0%2qzj1Ghmf-p)DMhe1cK^QRzqXuE*AdDV_5ri;`5JnQhXhIlK2%`#N zWFd?$gb{`?$`D2x!e~PnaR{RhVdNoN+MHsONqZVQ0B8*;y5sWa35k@k?Xhs;(2%{QdWFw4jgb|J~$`M97!e~br@d%?H zVdNu>euNQ_FbWbzLc(ZB7!e7hB4K1CjE;m6k}ygVMoPkHNfBIry6or$0`5p*Vk&P33e2s#r%XCmlK1f7YXGZAzqg3d(HnFu-)L1!Z9 zOaz^YpfeG4CW6jH(3uE26G3Mp=u8BiiJ&tPbS8q%M9`TCIuk)>BIry6or$0`5p*Vk z&P33e2s#r%XCmlK1f7YXGZAzqg3d(HnFu-)L1!Z9Oaz^YpfeG4CW6jH(3uE26G3Mp z=u8BiiJ&tPbS8q%M9`TCIuk)>BIry6or$0`5p*Vk&P33e2s#r%XCmlK1f7YXGZAzq zg3d(HnFu-)L1!Z9Oaz^YpfeG4CW6jH(3uE26G3Mp=u8BiiJ&tPbS8q%M9`TCIuk)> zBIry6or$0`5p*Vk&P33e2s#r%XCmlK1f7YXGZAzqg3d(HnFu-)L1!Z9Oaz^YpfeG4 zCW6jH(3uE26G3Mp=u8BiiJ&tPbS8q%M9`TCIuk)>BIry6or$0`5p*Vk&P33e2s#r% zXCmlK1f7YXGZAzqg3d(HnFu-)L1!Z9Oaz^YpfeG4CW6jH(3uE26G3Mp=u8BiiJ&tP zbS8q%M9`TCIuk)>BIry6or$0`5p*Vk&P33e2s#r%XCmlK1f7YXGZAzqg3d(HnFu-) zL1!Z9Oaz^YpfeG4CW6jH(3uE26G3Mp=u8BiiJ&tPbS8q%M9`TCIuk)>BIry6or$0` z5p*Vk&P33e2s#r%XCmlK1f7YXGZAzqg3d(HnFu-)L1!Z9Oaz^YpfeG4CW6jH(3uE2 z6G3Mp=u8BiiJ&tPbS8q%M9`TCIuk)>BIry6or$0`5p*Vk&P33e2s#r%XCmlK1f7YX zGZAzqg3d(HnFu-)L1!Z9Oaz^YpfeG4CW6jH(3uE26G3Mp=u8BiiJ&tPbS8q%M9`TC zIuk)>BIry6or$0`5p*Vk&P33e2s#r%XCmlK1f7YXGZAzqg3d(HnFu-)L1!Z9Oaz^Y zpfeG4CW6jH(3uE26G3Mp=u8BiiJ&tPbS8q%M9`TCIuk)>BIry6or$0`5p*Vk&P33e z2s#r%XCmlK1f7YXGZAzqg3d(HnFu-)L1!Z9Oaz^YpfeG4CW6jH(3uE26G3Mp=u8Bi ziJ&tPbS8q%M9`TCIuk)>BIry6or$0`5p*Vk&P33e2s#r%XCmlK1f7YXGZAzqg3d(H znFu-)L1!Z9Oaz^YpfeG4CW6jH(3uE26G3Mp=u8BiiJ&tPbS8q%M9`TCIuk)>BIry6 zor$0`5p*Vk&P33e2s#r%XCmlK1f7YXGZAzqg3d(HnFu-)L1!Z9Oaz^YpfeG4CW6jH z(3uE26G3Mp=u8BiiJ&tPbS8q%M9`TCIuk)>BIry6or$0`5p*Vk&P33e2s#r%XCmlK z1f7YXGZAzqg3d(HnFu-)L1!Z9Oaz^YpfeG4CW6jH(3uE26G3Mp=u8BiiJ&tPbS8q% zM9`TCIuk)>BIry6or$0`5p*Vk&P33e2s#r%XCmlK1f7YXGZAzqg3d(HnFu-)L1!Z9 zOaz^YpfeG4CW6jH(3uE26G3Mp=u8BiiJ&tPbS8q%M9`TCIuk)>BIry6or$0`5p*Vk z&P33e2s#r%XCmlK1f7YXGZAzqg3d(HnFu-)L1!Z9Oaz^YpfeG4CW6jH(3uE26G3Mp z=u8BiiJ&tPbS8q%M9`TCIuk)>BIry6or$0`5p*Vk&P33e2s#r%XCmlK1f7YXGZAzq zg3d(HnFu-)L1!Z9Oaz^YpfeG4CW6jH(3uE26G3Mp=u8BiiJ&tPbS8q%M9`TCIuk)> zBIry6or$0`5p*Vk&P33e2s#r%XCmlK1f7YXGZAzqg3d(HnFu-)L1!Z9Oaz^YpfeG4 zCW6jH(3uE26G3Mp=u8BiiJ&tPbS8q%M9`TCIuk)>BIry6or$0`5p*Vk&P33e2s#r% zXCmlK1f7YXGZAzqg3d(HnFu-)L1!Z9Oaz^YpfeG4CW6jH(3uE26G3Mp=u8BiiJ&tP zbS8q%M9`TCIuk)>BIry6or$0`5p*Vk&P33e2s#r%XCmlK1f7YXGZAzqg3d(HnFu-) zL1!Z9Oaz^YpfeG4CW6jH(3uE26G3Mp=u8BiiJ&tPbS8q%M9`TCIuk)>BIry6or$0` z5p*Vk&P33e2s#r%XCmlK1f7YXGZAzqg3d(HnFu-)L1!Z9Oaz^YpfeG4CW6jH(3uE2 z6G3Mp=u8BiiJ&tPbS8q%M9`TCIuk)>BIry6or$0`5p*Vk&P33e2s#r%XCmlK1f7YX zGZAzqg3d(HnFu-)L1!Z9Oaz^YpfeG4CW6jH(3uE26G3Mp=u8BiiJ&tPbS8q%M9`TC zIuk)>BIry6or$0`5p*Vk&P33e2s#r%XCmlK1f7YXGZAzqg3d(HnFu-)L1!Z9Oaz^Y zpfeG4CW6jH(3uE26G3Mp=u8BiiJ&tPbS8q%M9`TCIuk)>BIry6or$0`5p*Vk&P33e z2s#r%XCmlK1f7YXGZAzqg3d(HnFu-)L1!Z9Oaz^YpfeG4CW6jH(3uE26G3Mp=u8Bi ziJ&tPbS8q%M9`TCIuk)>BIry6or$0`5p*Vk&P33e2s#r%XCmlK1f7YXGZAzqg3d(H znFu-)L1!Z9Oaz^YpfeG4CW6jH(3uE26G3Mp=u8BiiJ&tPbS8q%M9`TCIuk)>BIry6 zor$0`5p*Vk&P33e2s#r%XCmlK1f7YXGZAzqg3d(HnFu-)L1!Z9Oaz^YpfeG4CW6jH z(3uE26G3Mp=u8BiiJ&tPbS8q%M9`TCIuk)>BIry6or$0`5p*Vk&P33e2s#r%XCmlK z1f7YXGZAzqg3d(HnFu-)L1!Z9Oaz^YpfeG4CW6jH(3uE26G3Mp=u8BiiJ&tPbS8q% zM9`TCIuk)>BIry6or$0`5p*Vk&P33e2s#r%XCmlK1f7YXGZAzqg3d(HnFu-)L1!Z9 zOaz^YpfeG4CW6jH(3uE26G3Mp=u8BiiJ&tPbS8q%M9`TCIuk)>BIry6or$0`5p*Vk z&P33e2s#r%XCmlK1f7YXGZAzqg3d(HnFu-)L1!Z9Oaz^YpfeG4CW6jH(3uE26G3Mp z=u8BiiJ&tPbS8q%M9`TCIuk)>BIry6or$0`5p*Vk&P33e2s#r%XCmlK1f7YXGZAzq zg3d(HnFu-)L1!Z9Oaz^YpfeG4CW6jH(3uE26G3Mp=u8BiiJ&tPbS8q%M9`TCIuk)> zBIry6or$0`5p*Vk&P33e2s#r%XCmlK1f7YXGZAzqg3d(HnFu-)L1!Z9Oaz^YpfeG4 zCW6jH(3uE26G3Mp=u8BiiJ&tPbS8q%M9`TCIuk)>BIry6or$0`5p*Vk&P33e2s#r% zXCmlK1f7YXGZAzqg3d(HnFu-)L1!Z9Oaz^YpfeG4CW6jH(3uE26G3Mp=u8BiiJ&tP zbS8q%MADf^Iul7}BI!&dor$C~k#r`K&P39gNIDZqXCmoLB%O(*Gm&&AlFmfZnMgVl zNoOMIOeCF&q%)CpCX&uX(wRs)6G>+x=}aV@iKH`;bS9F{MADf^Iul7}BI!&dor$C~ zk#r`K&P39gNIDZqXCmoLB%O(*Gm&&AlFmfZnMgVlNoOMIOeCF&q%)CpCX&uX(wRs) z6G>+x=}aV@iKH`;bS9F{MADf^Iul7}BI!&dor$C~k#r`K&P39gNIDZqXCmoLB%O(* zGm&&AlFmfZnMgVlNoOMIOeCF&q%)CpCX&uX(wRs)6G>+x=}aV@iKH`;bS9F{MADf^ zIul7}BI!&dor$C~k#r`K&P39gNIDZqXCmoLB%O(*Gm&&AlFmfZnMgVlNoOMIOeCF& zq%)CpCX&uX(wRs)6G>+x=}aV@iKH`;bS9F{MADf^Iul7}BI!&dor$C~k#r`K&P39g zNIDZqXCmoLB%O(*Gm&&AlFmfZnMgVlNoOMIOeCF&q%)CpCX&uX(wRs)6G>+x=}aV@ ziKH`;bS9F{MADf^Iul7}BI!&dor$C~k#r`K&P39gNIDZqXCmoLB%O(*Gm&&AlFmfZ znMgVlNoOMIOeCF&q%)CpCX&uX(wRs)6G>+x=}aV@iKH`;bS9F{MADf^Iul7}BI!&d zor$C~k#r`K&P39gNIDZqXCmoLB%O(*Gm&&AlFmfZnMgVlNoOMIOeCF&q%)CpCX&uX z(wRs)6G>+x=}aV@iKH`;bS9F{MADf^Iul7}BI!&dor$C~k#r`K&P39gNIDZqXCmoL zB%O(*Gm&&AlFmfZnMgVlNoOMIOeCF&q%)CpCX&uX(wRs)6G>+x=}aV@iKH`;bS9F{ zMADf^Iul7}BI!&dor$C~k#r`K&P39gNIDZqXCmoLB%O(*Gm&&AlFmfZnMgVlNoOMI zOeCF&q%)CpCX&uX(wRs)6G>+x=}aV@iKH`;bS9F{MADf^Iul7}BI!&dor$C~k#r`K z&P39gNIDZqXCmoLB%O(*Gm&&AlFmfZnMgVlNoOMIOeCF&q%)CpCX&uX(wRs)6G>+x z=}aV@iKH`;bS9F{MADf^Iul7}BI!&dor$C~k#r`K&P39gNIDZqXCmoLB%O(*Gm&&A zlFmfZnMgVlNoOMIOeCF&q%)CpCX&uX(wRs)6G>+x=}aV@iKH`;bS9F{MADf^Iul7} zBI!&dor$C~k#r`K&P39gNIDZqXCmoLB%O(*Gm&&AlFmfZnMgVlNoOMIOeCF&q%)Cp zCX&uX(wRs)6G>+x=}aV@iKH`;bS9F{MADf^Iul7}BI!&dor$C~k#r`K&P39gNIDZq zXCmoLB%O(*Gm&&AlFmfZnMgVlNoOMIOeCF&q%)CpCX&uX(wRs)6G>+x=}aV@iKH`; zbS9F{MADf^Iul7}BI!&dor$C~k#r`K&P39gNIDZqXCmoLB%O(*Gm&&AlFmfZnMgVl zNoOMIOeCF&q%)CpCX&uX(wRs)6G>+x=}aV@iKH`;bS9F{MADf^Iul7}BI!&dor$C~ zk#r`K&P39gNIDZqXCmoLB%O(*Gm&&AlFmfZnMgVlNoOMIOeCF&q%)CpCX&uX(wRs) z6G>+x=}aV@iKH`;bS9F{MADf^Iul7}BI!&dor$C~k#r`K&P39gNIDZqXCmoLB%O(* zGm&&AlFmfZnMgVlNoOMIOeCF&q%)CpCX&uX(wRs)6G>+x=}aV@iKH`;bS9F{MADf^ zIul7}BI!&dor$C~k#r`K&P39gNIDZqXCmoLB%O(*Gm&&AlFmfZnMgVlNoOMIOeCF& zq%)CpCX&uX(wRs)6G>+x=}aV@iKH`;bS9F{MADf^Iul7}BI!&dor$C~k#r`K&P39g zNIDZqXCmoLB%O(*Gm&&AlFmfZnMgVlNoOMIOeCF&q%)CpCX&uX(wRs)6G>+x=}aV@ ziKH`;bS9F{MADf^Iul7}BI!&dor$C~k#r`K&P39gNIDZqXCmoLB%O(*Gm&&AlFmfZ znMgVlNoOMIOeCF&q%)CpCX&uX(wRs)6G>+x=}aV@iKH`;bS9F{MADf^Iul7}BI!&d zor$C~k#r`K&P39gNIDZqXCmoLB%O(*Gm&&AlFmfZnMgVlNoOMIOeCF&q%)CpCX&uX z(wRs)6G>+x=}aV@iKH`;bS9F{MADf^Iul7}BI!&dor$C~k#r`K&P39gNIDZqXCmoL zB%O(*Gm&&AlFmfZnMgVlNoOMIOeCF&q%)CpCX&uX(wRs)6G>+x=}aV@iKH`;bS9F{ zMADf^Iul7}BI!&dor$C~k#r`K&P39gNIDZqXCmoLB%O(*Gm&&AlFmfZnMgVlNoOMI zOeCF&q%)CpCX&uX(wRs)6G>+x=}aV@iKH`;bS9F{MADf^Iul7}BI!&dor$C~k#r`K z&P39gNIDZqXCmoLB%O(*Gm&&AlFmfZnMgVlNoOMIOeCF&q%)CpCX&uX(wRs)6G>+x z=}aV@iKH`;bS9F{MADf^Iul7}BI!&dor$C~k#r`K&P39gNIDZqXCmoLB%O(*Gm&&A zlFmfZnMgVlNoOMIOeCF&q%)CpCX&uX(wRs)6G>+x=}aV@iKH`;bS9F{MADf^Iul7} zBI!&dor$C~k#r`K&P39gNIDZqXCmoLB%O(*Gm&&AlFmfZnMgVlNoOMIOeCF&q%)Cp zCX&uX(wRs)6G>+x=}aV@iKH`;bS9F{MADf^Iul7}BI!&dor$C~k#r`K&P39gNIDZq zXCmoLB%O(*Gm&&AlFmfZnMgVlNoOMIOeCF&q%)CpCX&uX(wRs)6G>+x=}aV@iKH`; zbS9F{MADf^Iul7}BI!&dor$C~k#r`K&P39gNIDZqXCmoLB%O(*Gm&&AlFmfZnMgVl zNoOMIOeCF&q%)CpCX&uX(wRs)6G>+x=}aV@iKH`;bS9F{MADf^Iul7}BI!&dor$C~ zk#r`K&P39gNIDZqXCmoLB%O(*Gm&&AlFmfZnMgVlNoOMIOeCF&q%)CpCX&uX(wRs) z6G>+x=}aV@iKH`;bS9F{MADf^Iul7}BI!&dor$C~k#r`K&P39gNIDZqXCmoLB%O(* zGm&&AlFmfZnMgVlNoOMIOeCF&q%)CpCX&uX(wRs)6G>+x=}aV@iKH`;bS9F{MADf^ zIul7}BI!&dor$C~k#r`K&P39gNIDZqXCmoLB%O(*Gm&&AlFmfZnMgVlNoOMIOeCF& zq%)CpCX&uX(wRs)6G>+x=}aV@iKH`;bS9F{MADf^Iul7}BI!&dor$C~k#r`K&P39g zNIDZqXCmoLB%O(*Gm&&AlFmfZnMgVlNoOMIOeCF&q%)CpCX&uX(wRs)6G>+x=}aV@ ziKH`;bS9F{MADf^Iul7}BI!&dor$C~k#r`K&P36fC^{2GXQJp#6rG8pGf{LViq1sQ znJ79FMQ5VuOcb4oqBBu+CW_8P(U~YZ6GdmD=u8xyiJ~)6bS8?CMG%)6P<~P&csA#VxluK(V3X&OiXkpCOQ)nor#Ig#6)LeqBAkknV9HIOmrqD zIujF}iHXj{L}y~6GcnPbnCMJQbS5S`6BC_@iO$4CXJVo=G0~Zr=uAv>CMG%)6P<~P z&csA#VxluK(V3X&OiXkpCOQ)nor#Ig#6)LeqBAkknV9HIOmrqDIujF}iHXj{L}y~6 zGcnPbnCMJQbS5S`6BC_@iO$4CXJVo=G0~Zr=uAv>CMG%)6P<~P&csA#VxluK(V3X& zOiXkpCOQ)nor#Ig#6)LeqBAkknV9HIOmrqDIujF}iHXj{L}y~6GcnPbnCMJQbS5S` z6BC_@iO$4CXJVo=G0~Zr=uAv>CMG%)6P<~P&csA#VxluK(V3X&OiXkpCOQ)nor#Ig z#6)LeqBAkknV9HIOmrqDIujF}iHXj{L}y~6GcnPbnCMJQbS5S`6BC_@iO$4CXJVo= zG0~Zr=uAv>CMG%)6P<~P&csA#VxluK(V3X&OiXkpCOQ)nor#Ig#6)LeqBAkknV9HI zOmrqDIujF}iHXj{L}y~6GcnPbnCMJQbS5S`6BC_@iO$4CXJVo=G0~Zr=uAv>CMG%) z6P<~P&csA#VxluK(V3X&OiXkpCOQ)nor#Ig#6)LeqBAkknV9HIOmrqDIujF}iHXj{ z^aE$&v%~-X<@W2hcel^}^y=;F+cz(6fAFIt_x#tJySvx7pZ)&rvloB7dGponv$tP9 zySsh&?$zDfcYhu#(slMce*doL2(2g5<$IRdSI7R<6GZpw?cMud|MR;;_xro|kKLdj z`>=n#dv|kp`_-!#&tBgC>Gt*8Z@#{Lb9d4`@w0#7N4N9SI~G4FKmXYD?B=VRS8v|m zJ$v!$Pp@9Tx_R^dA?L;k_sqIFe(s!k@HzGH)BPH5UL*IMb&rgD!`OP(beUXkEIRh+ zX6${p{CPWa&r{utqiy%tcY@`6#=aMgcAh=evt;C+?U-zL!*R}~Q}5wN&thEeke`2& zYoUWZgk#qU%l?+t!T{>8D==E84cL=d80U>)C|G zBi(woo^_q&qmOm3<>!9(2z=TU*_tNQK0UFl{$Q@}b<)|^ppMzG4Jl6W{Jeb3ThG>W zg*_v@HX1M&u6Dd|EkCz(j=1uyeZEo5kYlic2Nt7Wh$pK?TzpS&J!|b5d%o(a?w_q^ z>)CqN+%x)^V*9zv&!LazYn*Zx@!C5EcnBZ&LF@W&gBLm~YdpjwoE8_l$t}L$ww|qL zy*=Br^wx8|Lxdr6+Q2>A{yF=7bI2#Btz$*cc%Of4`bi7&Y`?zzQ`;&owMPf}$isAP zJ;P<3rSAe)zIzw%x%6`L<+ph2c}hK_KU!@+x6YqCYmOOn+&^T;1&`Ny%8oAdCg@xw z*af_~VV`=pp3{vr*R#lB+-J7!iD~N@VWOW7_r3q0IVar?RT-G$BxJdAgS zF2BE5l+%WMMqkAQ!FadulRl&ld2AlAArGG$`21tjaNjL>%@1+4R#(NA~Z$zX2+~xMzIf(TE9`~1f zBpU1u&l$e>YP8YA2xrIg>0#>`<7V}VKerml;sW2zzV!@u)-NCF*0c2teFs1BoqqJK zS?_TeZFqX2>E#g?^^Nt0Z;>aPHB_ZtZguQPFOygHV1Ci1wP(Bx2U#M#_>{xevpuJ5 zdt~d`dY)v@=ub`KJ*};@XMqKWBd@1D=>zi;VV*HVU;HKSSnn*yI){e4VT~}Mv&e03 z`Fr@2>DjXWFz4wSrxdQ?aBqikr9IopxAi>bo~QL>f8AI6GLJ&bK9wx;s&DL*{F2Xn z>$wIWS5CXgc3$AiMi#$kt+Z$9`{eR_V6@CNSUjU`(blu|Y&{R}8DYpb`m@@2YjCgo zE_{7i>imVMqiykuA{}h0=xQcNp5{x=?G}Cg$9Uiy%n+yS*v00jO176(aie5Jy)?umQIUX&x2C=Wy-$B{EocUmhp}KTsn4i>-nqeNekjuZy4i#L@c)I9QjMm>cC}zJj%D7h_5nKg+hB5qhq{hu(U&p22&@zJ0GJX^c0b zU;f6p24#Km669;?c<`jeWBW(AqDF{e>)Cpqat7Y^$ksE;5q?6oJnNkdU)7L(%t7DE zI<9VHx7aB>-ebV|o*geO!(aB@&3LaEN4VtHlYf`**^a{?J!^ccv7Q6Gt+5#K7(e09 zUdy6_M~r92e2lon6UhK`5X+CKAEympe%062Es`1G4mjFA8Rq7=^=v(B9rkZMThF@E z)jE5&J#rmAqaDZVn+*9hvtg`veG5G5jrFX%{8(#c9anG*8tsNQ+F!leTlBFr1{=R~ z6+O$Gg`a49zZkP) zHJ9LU7x8NL+z402c38Kb?bx5C=cq^G<-^>1*4lFgt!L}m&i}1vWyk&jgPF@1aAOVn#+K99bBeC>wa=9e z`M2BPB9kg-e0g!EUz%iw6OfToXxruMJST9-q6QVHMcv!1rPD^o3Ql^aI4sQww~*}<8Rqyyq;?~CgVA^UZE#-9W_|3u1zC&RZI?2Al(Y!mqxOpu?lv&YVL%lBOO)@?l_^jsi4 zj&`*?>8!lA`qx4K`g-o>SFj1I?0KsDYJ|t>tDHQzu6G01Gw#@K8(Ys}!?x{oSyRt= zd1?85GTKdY?3bb=jILJqbotO1B13#P)2ha(4?pwIIO{UR_78ahtn-;->7ch|c=!>kThDlB#i@Vpf;HJ=mw#V!8Ps{IS4JJs4R^y^dKmje#@B=0IWKZo zzLqs&7ul(oUE_6aJwr@K47RN+N)Cp?p6_}_$n8JgGtc&a*L?TvkelAT|MK?Tu*ECO?4nT&jPV}Q z)-=f9UR8|IGumFf!gwzAJp`*Mdfcx40nf(eH*U1U{Zvn~t!J%0+ZlU_2aHi3Kpxby zylOwdPS|=beM>sB-f+0;#6@(f(VzFPZ~Q*YEq%xmj%AMHXy?dDkJ?F}&jwpI615ypShaiX^(Th9m;IM+9NxVl3$!k~3MTeN(n+r@UwAb!EcVxt}Hr#WK0hOImK zZFYgD!E;cpCGTK4iz*kHa{i#^r99r+EeN;8E59ld{Xb1Ub!L9$*)aa3YUAcG!9z z*7NztrUyDO>p5^^58xNR>_)t^jodTZX+Offd9i2Jv1)%tJ2gh`SsBVP+M#_)r{pJZ z>$y|f)#c8G_V7X{=hEvOx1Q@Cl3&Ap_mbb6FxvilPzliL&Wh3Ec{)97z5iYJ{@Hr2 zeb29F>&9QXJ95wEp9NQz`mg)q8Eubjdv5Ew{+_j74QqOlzCAds`ApmP&w~eN%Y-`0 zD16B>rOWQoa}*UkbiVSk1r8(keENod{;}!g-!gT9OW_gK77x0meabH_^xYZP@~U&4 zJx_HUvwY9R%RRPp;K}x^spRXN+jsqR(zPGyE7|hXuOqzL`mUvGsu%AW;e_2r>x&)M z@h4u!KB-6egzd@fX>Iqm4ddSzM(%mK_pEaj0-WaK#?QsqShRQTv0muOYo+(;$G%GY z{NqRd^y{L5ppp}uYGZwO!Nu==r@7gm%VZzmS^C5w9Ce8eu-Om4$qW$u03ociXLxI1 zumRwDQo)jR@(6?1(LQ^vSM=D?J*6vpOLFV&xxgdTDDyAtxu8}(oM zKPNBz_a9!YcqJW=eFc5=1$F)W-SU^;72HGzD^0kzp5d;URrYLSu2J^mqyFluLFT8r z;p^Gmqg_Rh9;#jUt{?A_ecRuk@^erAQSJ$MobqAd*z@i4kDqxyt7lzrr>{F@U(PDJ z*x|p1!?N{^-E-aN*lH)j2K%d*>RH=jgVEwy&(@9bUUfUJr{B8A4!TeIXffy$&sasW zBg_nZ{JCQ>!RxA*y|SL+7={6!@IU!%S`eUE(JP;|{L&M@9Nv$w-FN(vtJA^YU3hR-e?ul3AbmtOv$TsumE z_wqdp4Qa-G9v^aqwDqj`lo{@!Z?t>MS+A+B&fsEeQWvCe__2A+s z+QV*~S9@ZttG4I!k4?uZ_-kI8^>lg`ndcf|6tPXuUFxa(+ImJd{Q@!mCaQ> z{6gEzp0!w8+v>%~^elOeZTF?Mef9{Su=Q+b))U!VBct)F8hkFl5l`vbxw@XEzdYik zF`sfX*R}-qXRo1SC6;wOt)FIUVw4B05oV#reN!_y1v2I@4RD`6^IMM*MmV3cB^MZJ zo$($&%AD5-_lYb}(bKS*xKg z{$VDaE;xHLsiwoIo^G3g!jXbeq0>)3)Zjq%)HJoLOaLs^?F`AFR+q!dte^9XW@%< z@z)ir89-jJAn9sS;?O;dFIqg~xNm^nyPlJD9n~$Tx?c{r`v*I@MtJSLrU&COzkKu) zxAH1hZZ31puW9?PJ%-WFjfKLd3IGl0I_^XB!}{0{V1_N@1!wctxX-m|Xn z&0PI}`H>qi!bv~gSaXQlwEEWF1`C_!&%E&z{TDJfAPdMs~yUc^jc50rb zXM~Z*nqs)vjbVhHvYw2wt++koK0Eiw?VQ8pB?f&XWbV-}uK^|?KkYgm`~ExjuyH-x zI`YEAv)^`?<(SU1oMAmD?`RjvHjo@M$1k~@XM`uN)p`oU1$6#LoYAiH@m2$@XpFt= zczJuZyyU60qPySrkn|KTgUc;*9rep)&i{zDY+Ad(0Xgb*)@V1})AlS`HZjV2wsoYY zPYQ0}X{@bhMYo3`Qu7t;`?3G(c0P^~>Bv1pj0u;F?#OwM3w~SfGAkIDFL4|0f*z=V z@Cu6jF7z-w@R!V-=WAG2eBif%%!%)u=Qx4J{NkB&mkbZS+Ih4qVyV|ci@lDzpwV_6 z51Hq^P#!xx`njWXuCMew?J*nfrg*~Ca@sxh|8MWymfKXaFnph<(A~bW-6#6sJCEaC zj0s;H43@i?JeghDI( zf2!~M9?_KFOJ%&6%D#wYY$&6i4CDKd3TpZ)G=U*{){ewB^5%y4K{$vbKaZnvX#f3m z{fGbQ>h9v>#odKJYk_}z`HwGGlaIxz{nT%aFmgiZ38QyZo(q1?)u~54rP#lIo4oBY zd5i~18b2%c+;orkSzsiK0v&}hRrj`?QKTeik+hBP^LwuQfsH1XhA0yW62v_8MC8ec zhMX=mz3&+rMoOJ9udh!;B*K$`d*tN)o|E97#-8rW$=hwmjIyCWiYHl_CbtOkVnZB4 zC`F&+D2>>7^c=;+mYw%*zg}Km-QLdL5I?>o=xuR?nVq!P`~LJ8=Z`@i3`g@m6-1Fg z??C=y2b_s$vvF<*+BjZ#v9PB)?|r+t`Fgd|J70fx;pdaypXa@FJQyszwy|4J|2$?R ze-PZqgI6DX-3EK2?^)!B!C>&xjJNa=!Wc5wr`H=~4-e%)=>7XYFR!J+&|b#(lQb!O z|K;VklWDXf3+G)<=!T?j%_Wg?OIxh~!K&tuc~*|Gl}W*=?6ws&WyklGH2NhGyH>5bs z2IFD+ep$YpTyK52AC@oR`xiGi{?*OZr>j5iZWqSsTT)D;nod8>X8m)X41@gX(|AxM ze~+KG<=?(gHR|#(*+GSgKWJ8j+p@N73q2hVAD>S9gDfbds6zz0g9!6N*hMASN=@tA zf;KFBq(7?>u;S(s?y232+Zsc(-4gM6bzZ&9+KWt;`~2nl)Ail;x2q3Ts=~9I^z(5# zEexAmD^Dfi+>|wc-p-X4&P?0pB5zMZ36)dhuA*46bUQnymiUr%Ejr?B<0 zPFrskyiTL|*A5A^EuZ#-NgJ=zB7TVTDruh8_vO<5W?A1M_{CEGrV#d3KZ%;@$Y0Yx z)9hz@XMboYL<$Ho8D*}9#{l8Qwsl4M{YUI-lpGM{Qyk>3#wP&r6?O`?mMRXDavpXy zQV-l;i(cPAZ?-zh`n&V~qaI)f|>oSsx6W@lhSEXI+uckK*fcLrJd z;C7S{tMd?N?%%sJh~pV$<2-aXnGmxx4bdNk!NA=>A(kgC2|Z^cgczQ_eRYr&iNj$+ zOiwb(ob>~2YI$Z<#jAzoEvu(m8yNn!}vJL5QQ51P{KQ1b*p?pn~|Bd73v^?nQZ!HR4mOCP{Jh9cGoGO|s zvH+d-T1H;(b~b|eE*Vx5q!v|I8qxB#>SYoCmSuxDNbA{ly+gyI9gF=g3DyWT z;G7XYJXzpUpE)$5(0Fe+G@#IUfQQBt8V~UBa6;oDHiRZD&)vV58xY$pLu2rQYdW+9 zZ*ZuNHN#1`braPRkiw;2(*%*>Sf^$JtRYj1R)ZHNSX-~Argd0 z5F$Z{1R)ZHNDv}vZ=XSk1R;`^2Z#^}LL`fsMu-F<5`;()B0-1*Argd0+N;$55Q$QZ zni?RI@~I(0)d`WPQ^q6{az}`y{1~GWpG1NX$@a`4L~=lgBw6?snh23pgGef9c~lTd z1)UHgS(dWDbZ>vG7)gZ=b*vak1<4R2L5u`3l4im@HjJdgG=vxlVkC=kb_pY?awl<= z7)ceuI)sr_k>Thtk}6^$MuHeg%L7D=#3hWR(ib_M0!gYVail0om9JD0B{^1xBfH21iy0VUB)^ICtYjXGt9@Y%Ccc0x(Cq*^0Nf+)$>%!ze* zLzDzj5=2Rsw6s;$r-}&X8A^okxZza3e5nP|wr(Gmj)#v=r(+wp-ZyY$Ly!%5cQ&LP zNla0q7?WBvPf;T%CvJ}<$c;@II46P&CT!q_ap4iE5BYU>4S@siP<)RXX_z*e39o!` zpC=ukJ0ahOmw`({rQn|Mj18G_`=jF-2LTsl;EI>ga^?|5rDlp7w*!ZKcU}grg(4o& zjB=`sA;dF>To9=ta1b;xiQF^HNMUaVsaD30c;EmsMek~9^upzf*f*fw~o-E29Da^u8890 z4G}Ynd3MfoJamYKTjrX5&)yB%k*SmERuD7VadI65 zTy%h32VoLOu1~cBeeH*$4zYm8K(2F$4?GHTokN@=lIu&7K@4NZ$#o9#fJm+{Dh9EO-6q#L!~`O_KGzGl zSdQenn3~hw$@R&D2DKqo$@S?23$?79TsKnC-N|+1c^*Y_9m)0WqxF6xxNxPlIuvWBf0K$=t!<3xjs_?$@LzR z>qxF6xsK#|D^~ZATt{+!DNdxZ7X*VC*CU{@A=VXX?3q~Lr%=0EUBS4ytJ5SHH+PlB zhCN%_X&M^@oF4;?4Z_log2slpJfyLA2H3k!W5bDbq_L-pLF{73X>5oIL>hap7jWep zY3$ib|6Vkku*&-{~BL425?(2O&f0>&`*QQ2M%a5Hgs)?iz#)rLQ{& zA%p4b&N0YPXV^Uk8N4yCdkiudzwRD`492g!#~?!-T<00&JwWUZGRXUo*xh50L7Ymr zfmjfS-2osL1V1AXJ2?b{q&p78Lfl~&fLM?fL?Cu97{ovx0T2r@lL*Al#DbVkr-4|A zpL80C1tZOl0Z*=-;e1T`N85DSB!j{%56&BNjo zQ|c_)`#<)b6Qz`?KChOW0;E^(yeb&cO7A?c3M%zWNI0+Jy!ysQ1G6S?ZcnV`tGLnN zZT;p`=2p{=8x8jCM;~#c0d6$tEsiAIXn-3H9G^RJqXBL-SRXiUG{B7pxY5Af&~c-| z_Rw*o0d6$FjRv^U05=-oMuX#8xW$B71}{qBMg!bvu!jb?(Ev9Z;6?-7XwVX+Am+6F2=ZpJc0SeTJdTpEh@)4n zs&zz9J@Z~5GNp>?8bD-9Ji%%PkvZo|?G7SO7D*9AMi6;x=GdCFInE*T;dL6tzxH-7 zHGs&U^JEz0Pg@}3%KI*`%)E1%p?g;u!RY*#P*kWgf)PW+?p5ahoPj|&53>yb0RRC1 E|EMrHumAu6 diff --git a/common-primitives/pipeline_runs/data_transformation.grouping_field_compose.Common/pipeline_run.yml.gz b/common-primitives/pipeline_runs/data_transformation.grouping_field_compose.Common/pipeline_run.yml.gz deleted file mode 100644 index 8f8bdf0d10a227867c5c0e7fb163e3e9d23ea222..0000000000000000000000000000000000000000 GIT binary patch literal 0 KcmV+b0RR6000031 literal 165228 zcmV+W{{#RZiwFo;D<@t618`|@Wo&6~WnXf2ZZ3IkYyiaEJ#U@ab%x}^WuysG1k=nbQaX!Y*J5+8`{$=m z=RbXVd-d)Y|Mu?N-~95;!|BuM_5A7e?VIbXhxflfzh2(fZyxR*dQ}^z^D)=m!>RTj zkN0=EyBW3H>HeI@<1y>5)~W6u_RU%==f|D8f1Jn7o2#q$4{xu2^{eA{^}7A|{PFXL z)2H{(Pq(ihPk(&6{q?uMx&8Go-hJ~=-@N#CKfk}hjH5D?tB{a@(~=zW$@kV?(%UQmyhr3kjd(g5A(PlH|^ni-W}sQYu#Lr`?gQ* zCdWJ-mv?D zefR61UAgWIZyQ_b^YX7iqe)%u|`Tz3QK0N>a{r%~~?Z?Xp^X2Py)xW;s z;~)O~`aiwpyYqL?FMs~wWBl~|>GCdL|Lp3h+>D#cCpcd|C4YQ=`H%CvZa; zIj+~u-q-WIPS<Yja>tS5JkuFnZwXq)WzJAl4a({X2WybrFhwJfpwCmI5pS`ZW@8{!P-JMRC*ME7w z`*8m5Z(sgyuH#`|KFy=|K{cN^zi)M?d$n``Kvc?UOqp48R`A&?dzw@TTU+zw?BTSu0Fkd zKEL_$RlWR3zrFhBZ{Pj(yZ-ZsAHJ_IU*?DR_g`M}$8X?2oKFwum;e5De(2wQ`}A=B z(~o~}|NQys)7Pi(Pai)2?f3rm)jIn8@kiz_kLBa?F+Oe{$BmDNkIl#K)pDxkRLiO2RB@^}Rh%kL6{m_*#i`;{ajNZ9+o`rwZKv8!wVi4^)pn}w zRGX>V&5e(TkIl#KU8wl{5W+TOIi zX?xT5rtMAJo3=M?Z`$6py=i;X_NMJk+ncsGZExD%w7qG2)ApwAP1~EcH*Ig)-n6}G zd(-x&?M>Uews&ps+TOLjYkSxBuI*jhyS8_2@7mtAy=!~d_O9(++q<@RZSUINwY_V5 z*Y>XMUE901cWv+5-nG4Jd)M}^?Oofuws&ps+TOLjYkSxBuI*jhyS8_2@7mtAy{mUu z@2=imy}NpM_3rB3)w`>ASMRRgUA?<{clGY--POCRcUSMO-d(-BdUy5i>fP15t9MuL zuHId}yLxx^?&{suyQ_Cs@2=imy}NpM_3rB3)w`>ASMRRgUA?<{clGY--POCRcUSMO z-d(-BdUy36>OItZsP|Csq25Eihk6h79_l^Rd#LwN@1fp9y@z@a^&aXy)O)D+Q17AM zL%oN35A`1EJ=A-s_fYSl-b1~IdJpv;>OItZsP|Csq25Eihk6h79_l^Rd#LwN@8NIe z!{5w@whwI|+CH>>X#3Fiq3uK4hqe!GAKE^&eQ5j8_Mz=V+lRIfZ6De`w0&s%(DtG2 zL)(Y84{aaXKD2#k`_T5G?L*s#whwI|+CH>>X#3Fiq3uK4hqe!GAKE^&eQ5j8_Mz=V z+lRIfZ6De`w0&s%(DteAQ`@JuPi>#tKDB*n`_%TS?Ni&Qwoh%J+CH^?YWvjosqItS zr?yXRpV~gPeQNvE_Nnbt+o!fqZJ*jcwS8*))b^?EQ`@JuPi>#tKDB*n`_%TS?Ni&Q zwoh%J+CH^?YWvjosqItSr?yXRpV~gPeQNtu@2TEXy{CFl^`7cI)qASOIwas`pgysoqn)r+QEIp6Wf-d#d+T@2TEXy{CFl z^`7cI)qASLy_b3~^b=x^srORv zrQS=umwGStj(SJEqux>PsCU#m>K*ltdPlvZ-cj$Ucho!T9rccSN4=xoQSYdC)H~`O z^^ST+y`$by@2GdwJL(PsCU#m>K*ltdPlvZ-cj$Ucho!T9rccSN4=xo zQSYdC)H~`O^^ST+y`$by@2GdwJL(wny8e?a}sVd$c{;9&L}dN86+A(e|zFTids`Z*AY& zzO{X8`_}fY?OWTowr_3U+P<}YYx~yrt?gUex3+I>-`c*leQW#H_O0z(+qbrFZQt6y zwS8;**7mLKTids`Z*AY&zO{X8`_}fY?OWTowr_3U+P<}YYx~yrt?gUex3+I>-`c*l zeQW#H_O0z(+qbrFZQt6ywS8;**7mLKTids`Z*AY&zO{X8`_}fY?OWTowr_3U+P<}Y zYx`F3t=?O`w|Z~&-s-*8d*|<7*UyG}Z}r~lz14fG_g3$%-dnx5dT;c;{9=CN)pDxkRLiNBQ!S@j zPPLqBIaQo0P8Fw$Q^l#`RB@^}Rh%kLwVi4^)pn}wRNJYxQ*Ec(PPLs%+YfC&wEfWb zL)#B+KeYYO_CwnbZ9laA(Dp;y4{blR{m}M9+YfC&wEfWbL)#B+KeYYO_CwnbZ9laA z(Dp;y4{blR{m}M9+YfC&wEfU_`_R~j#y&Lmp|KB*eQ4}MV;>s((AbB@J~Z~Bu@8-X zXzW8{9~%46*oVeGH1?sf4~>0j>_cN88vD@LhsHiM_Mx#4jeThBLt`Hr`_R~j#y&Lm zp|KB*eQ4}MV;>s((AbB@J~Z~Bu@8-XXzW8{9~%46*oVeGH1?sf4~>0j>_cN88vD># zfW`te7ND^JjRj~dKw|+K3(#1A#sV}Jps@gr1!yclV*wfq&{%-R0yGw&u>g$)Xe>Zu z0U8U?Sb)X?G!~$-0F4D`EI?xc8Vk@^fW`te7ND^JjRj~dKw|+K3(#1A#sV}Jps@gr z1!yclV*wfq&{%-R0yGw&u>g$)Xe>Zu0U8U?Sb)X?G!~$-0F4D`EI?xc8Vk@^fW`te z7ND^JjRj~dKw|+K3(#1A#sV}Jps@gr1!yclV*wfq&{%-R0yGw&u>g$)Xe>Zu0U8U? zSb)X?G!~$-0F4D`EI?xc8Vk@^fW`te7ND^JjRj~dKw|+K3(#1A#sV}Jps@gr1!ycl zV*wfq&{%-R0yGw&u>g$)Xe>Zu0U8U?Sb)X?G!~$-0F4D`EI?xc8Vk@^fW`te7ND^J zjRj~dKw|+K3(#1A#sV}Jps@gr1!yclV*wfq&{%-R0yGw&u>g$)Xe>Z4k4~lS_Mx#4 zjeThBLt`Hr`_R~j#y&Lmp|KB*eQ4}MV;>s((AbB@J~Z~Bu@8-XXzW8{9~%46*oVeG zH1?sf4~>0j>_cN88vD@LhsHiM_Mx#4jeThBLt`Hr`_R~j#y&Lmp|KB*eQ4}MV;>s( z(AbB@J~Z~Bu@8-XXzW8{9~%46*oVeGH1?sf4~+$AEI?xc8Vk@^fW`te7ND^JjRj~d zKw|+K3(#1A#sV}Jps@gr1!yclV*wfq&{%-R0yGw&u>g$)Xe>Zu0U8U?Sb)X?G!~$- z0F4D`EI?xc8Vk@^fW`te7ND^JjRj~dKw|+K3(#1A#sV}Jps@gr1!yclV*wfq&{%-R z0yGw&u>g$)Xe>Zu0U8U?Sb)X?G!~$-0F4D`EI?xc8Vk@^fW`te7ND^JjRj~dKw|+K z3(#1A#sV}Jps@gr1!yclV*wfq&{%-R0yGw&u>g$)Xe>Zu0U8U?Sb)X?G!~$-0F4D` zEI?xc8Vk@^fW`te7ND^JjRj~dKw|+K3(#1A#sV}Jps@gr1!yclV*wfq&{%-R0yGw& zu>g$)Xe>Zu0U8U?Sb)X?G!~$-0F4D`EI?xc8Vk@^fW`te7ND^JjRj~dKw|+K3(#1A z#sV}Jps@gr1!yclV*wfq&{%+89-T_t?L%W98vD@LhsHiM_Mx#4jeThBLt`Hr`_R~j z#y&Lmp|KB*eQ4}MV;>s((AbB@J~Z~Bu@8-XXzW8{9~%46*oVeGH1?sf4~>0j>_cN8 z8vD@LhsHiM_Mx#4jeThBLt`Hr`_R~j#y&Lmp|KB*eQ4}MV;>s((AbB@J~Z~Bu@8-X zXzW8{9~%46a`BamuUvfP;wu+lx%kS(S1!JC@s*3OTzuu?D;Hn6_{zmsF1~W{m5Z-j zeC6UR7hk#f%EebMzH;%Ei?3XK<>D(BU%B|o#aAxAa`BamuUvfP;wu+lx%kS(S1!JC z@s*3OTzuu?D;Hn6_{zmsF1~W{m5Z-jeC6UR7hk#f%EebMzH;%Ei?3XK<>D(BU%B|o z#aAxAa`BamuUvfP;wu+lx%kS(S1!JC@s*3OTzuu?D;Hn6_{zmsF1~W{m5Z-jeC6UR z7hk#f%EebMzH;%Ei?3XK<>D(BU%B|o#aAxAa`BamuUvfP;wu+lx%kS(S1!JC@s*3O zTzuu?D;Hn6_{zmsF1~W{m5Z-jeC6UR7hk#f>hBhxhY`xd2<2gf@-RYq7@<6jP##7o z4l!p<@ z!wBVJgz_*#c^IKQj8Gm%C=Vl)hY`xd2<2gf@-RYq7@<6jP##7o4l!p<@!wBVJgz_*#c^IKQ zj8Gm%C=Vl)hY`xd2<2gf@-RYq7@<6jP##7o4l!p=ii>!Cakt9gA9k-jPJoaQBdoqtbna7^YV^8L>C-c~odF;tN_GBJ=GLJo($DYh%Pv)^F^VpMl z?8!X#WFC7mk3E^kp3GxU=CLR9*pqqe$vpOC9(yv6J(jPJoaQB zdoqtbna7^YV^8L>C-c~odF;tN_GBJ=GLJo($DYh%Pv)^F^VpMl?8*H8u zW*%!ZkF}Y{+RS5Z=CL;OSetpQ%{uW*%!ZkF}Y{+RS5Z z=CL;OSetpQ%{6WBhl5R=5CFz!=Tas=`x+UqB zq+611NxCKJmZV#fZb`Z&>6WBhl5R=5CFz!=Tas=`x+UqBq+611NxCKJmZV#fZb`Z& z>6WBhl5R=5CFz!=Tas=`x+UqBq+611NxCKJmZV#fZb`Z&>6WBhl5R=5CFz!=Tas=` zx+UqBq+611NxCKJmZV#fZb`Z&>6WBhl5R=5CFz!=Tas=`x+UqBq+611NxCKJmZV#f zZb`Z&>6WBhl5R=5CFxeAV^?k2Ral6cGWJsYL{KL%dXmGSM9Q^cG*?C?5bUM)h@egmtD2XuG(c+?Xs(O*;Tvjs$F)~ zF1u=%UA4=u+GSVmva5F4RlDq}U3S$jyK0wRwac#BWmoO8t9IE{yX>l6cGWJsYL{KL z%dXmGSM9Q^cG*?C?5bUM)h@egmtD2XuG(c+?Xs(O*;Tvjs$F)~F1u=%UA4=u+GSVm zva5F4RlDq}U3S$jyK0wRwac#BWmoO8t9IE{yX>l6cGWJsYL{KL%dXmGSM9Q^cG*?C z?5bUM)h@egmtD2XuG(c+?Xs(O*;Tvjs$F)~F1u=%UA4=u+GSVmva5F4RlDq}U3S$j zyK0wRwac#BWmoO8t9IE{yX>l6cGWJsYL{KL%dXmGSM9Q^cG*?C?5bUM)h@egmtD2X zuG(c+?Xs(O*;Tvjs$F)~F1u=%UA4=u+GSVmva5F4RlDq}CI5LR|9L0>c_;sQC;xdT z|9L0>c_;sQC;xdT|9L0>c_;sQC;xdT|9L0>c_;sQC;xdT|9L0>c_;sQC;xdT|9L0> zc_;sQC;xdT|9L0>c_;sQC;xdT|9L0>c_;sQC;xdT|9L0>c_;sQC;xdT|9L0>c_;sQ zC;xdT|9L0>c_;sQC;xdT|9L0>c_;sQr~Y{}8<@-nCbNObY+y1Qn9K$yvw_KMU@{w+ z%mya2fyr!OG8>r81}3wC$!uUU8<@-nCbNObY+y1Qn9K$yvw_KMU@{w+%mya2fyr!O zG8>r81}3wC$!uUU8<@-nCbNObY+y1Qn9K$yvw_KMU@{w+%mya2fyr!OG8>r81}3wC z$!uUU8<@-nCbNObY+y1Qn9K$yvw_KMU@{w+%mya2fxkcbDsDTYg2||0GAfvi3MQk1 z$*5p5DwvE4CZmGMs9-WGn2ZW0qk_q(U@|J0j0z^Bg2||0GAfvi3MQk1$*5p5DwvE4 zCZmGMs9-WGn2ZW0qk_q(U@|J0j0z^Bg2||0GAfvi3MQk1$*5p5DwvE4CZmGMs9-WG zn2ZW0qk_q(U@|J0j0z^Bg2||0GAfvi3MQk1$*5p5DwvE4CZmGMs9-WGn2ZW0qk_q( zU@|J0j0z^Bg2||0GAfvi3MQk1$*5p5DwvE4CZmGMs9-WGn2ZW0qk_q(U@|J0j0z^B zg2||0GAfvi3MQk1$*5p5DwvE4CZmGMs9-WGn2ZW0qk_q(U@|J0j0z^Bg2||0GAfwN z1}3wC$!uUU8<@-nCbNObY+y1Qn9K$yvw_KMU@{w+%mya2fyr!OG8>r81}3wC$!uUU z8<@-nCbNObY+y1Qn9K$yvw_KMU@{w+%mya2fyr!OG8>r81}3wC$!uUU8<@-nCbNOb zY+y1Qn9K$yvw_KMU@{w+%mya2fyr!OG8>r81}3wC$!uUU8<@-nCbNObY+y1Qn9K$y zvw_KMU@{w+%mya2fyr!OG8>r81}3wC$!uUU8<@-nCbNObY+y1Qn9K$yvw_KMU@{w+ z%mya2fyr!OG8>r81}3wC$!uUU8<@-nCbNObY+y1Qn9K$yvw_KMU@{w+%mya2fyr!O zG8>r81}3wC$!uUU8<@-nCbNObY+y1Qn9K$yvw_KMU@{w+%m)7cZ2md?bNT1xpO1g? zRor$)1(Q+1WK=L26--72lTpEBR4^G8OhyHhQNd(XFc}q0Mg@~m!DLi085K-M1(Q+1 zWK=L26--72lTpEBR4^G8OhyHhQNd(XFc}q0Mg@~m!DLi085K-M1(Q+1WK=L26--72 zlTpEBR4^G8OhyHhQNd(XFc}q0Mg@~m!DLi085K-M1(Q+1WK=L26--72lTpEBR4^G8 zOhyHhQNd(XFc}q0Mg@~m!DLi085K-M1(Q+1WK=L26--72lTpEBR4^G8OhyHhQNd(X zFc}q0Mg@~m!DLi085K-M1(Q+1WK=L26--72lTpEBTPfLAO171fZKY&eDcM#^ww03g zq+~rQSx-vVlalqMWIZWaPfFI4lJ%ryJt=K$01dWCkRe0ZC>+k{OU>1|*pQNoGKj8IWWKB$)w8W=K$01d zWCkRe0ZC>+k{OU>1|*pQNoGKj8IWWKB$)w8W=K$01dWCkRe0ZC>+ zk{OU>1|*pQNoGKj8IWWKB$)w8W=K$01dWCkRe0ZC>+k{OU>1|*pQ zNoGKj8IWWKB$)w8W=K$01dWCkRe0ZC>+k{OU>1|*pQNoGKj8IWWK zB$)w8W=K$01dWCkRe0ZC>+k{OU>1|*pQNy^blIXWpvC*|m*9G#S- zllpT~e@^PpN&PvgKPUC)r2d@LpOgA?Qh!eB&q@6`sXr(6=cN9e)Sr|3b5eg!>d#62 zIjKJ<_2;DioYbF_`g2l$PU_D|{W+;WC-vu~{+!gGllpT~e@^PpN&PvgKPUC)r2d@L zpOgA?Qh!eB&q@6`sXr(6=cN9e)Sr|3b5eg!>d#62IjKJ<_2;DioYbF_`g2l$PU_D| z{W+;WC-vu~{+!gGllpT~e@^PpN&PvgKPUC)r2d@LpOgA?Qh!eB&q@6`sXr(6=cN9e z)Sr|3b5eg!>d#62IjKJ<_2;DioYbF_`g2l$PU_D|{W+;WC-vu~{+!gGllpT~e@^Pp zN&PvgKPUC)r2d@LpOgA?Qh!eB&q@6`sXr(6=cN9e)Sr|3b5eg!>d#62IjKJ<_2;Di zoYbF_`g2l$PU_D|{W+;WC-vu~{+!gGllpT~e@^PpN&PvgKPUC)r2d@LpOgA?Qh!eB z&q@6`sXr(6=cN9e)Sr|3b5eg!>d#62IjKJ<_2;DioYbF_`g2l$PU_D|{W+;WC-vu~ z{+!gGllpT~e@^PpN&PvgKPUC)r2d@LpOgA?Qh!eB&q@6`sXr(6=cN9e)Sr|3b5eg! z>d#62IjKJ<_2;DioYbF_`g2l$PU_D|{W+;WC-vu~{+!gGllpT~e@^PpN&PvgKPUC) zr2d@LpOgA?Qh!eB&q@9H_a|S)ZKoWal%tb!bW)B^%F#*vIjKJ<_2;DioYbF_`g2l$ zPU_D|{W+;WC-vu~{+!gGllpT~e@^PpN&PvgKPUC)r2d@LpOgA?Qh!eB&q@6`sXr(6 z=cN9e)Sr|3b5eg!>d#62IjKJ<_2;DioYbF_`g2l$PU_D|{W+;WC-vu~{+!gGllpT~ ze@^PpN&PvgKPUC)r2d@LpOgA?Qh!eB&q@6`sXr(6=cN9e)Sr|3b5eg!>d#62IjKJ< z_2;DioYbF_`g2i#F6z%k{kf<=7xm|&{#?|bi~4g>e=h3JMg6&`KNt1qqW)aepNsl) zQGYJ#&qe*Ys6Q9==c4{x)Srv`b5VaT>d!^}xu`!E_2;7gT-2Y7`g2i#F6z%k{kf<= z7xm|&{#?|bi~4g>e=h3JMg6&`KNt1qqW)aepNsl)QGYJ#&qe*Ys6Q9==c4{x)Srv` zb5VaT>d!^}xu`!E_2;7gT-2Y7`g2i#F6z%k{kf<=7xm|&{#?|bi~4g>e=h3JMg6&` zKNt1qqW)aepNsl)QGYJ#&qe*Ys6Q9|(4rq&^h1k&XweTX`k_TXwCINx{m`NxTJ%GU zerVATE&8EFKeXtF7X8qoA6oQ7i+*U)4=wtkML)FYhZgCBv*4Nq*DSbZ!8Hr6S#ZsQYZhFy;F<;3EVyRDH4Cm; zaLs~i7F@I7ng!P^xMsmM3$9sk&4OzdT(jVs1=lRNX2CTJu32!+f@>CBv*4Nq*DSbZ z!8Hr6S#ZsQYZhFy;F<;3EVyRDH4Cm;aLs~i7F@I7ng!P^xMsmM3$9sk&4OzdT(jVs zh1QI=d(d_d+U`NyJ!rcJZTFz<9<<$qwtLWa58CcQ+dXKz2W|JD?H;t;A;m~YF%nXYgcKto#Yjjo5>kwW6eA(UNJudfQjCNYBO%2|NHG#pjD!>;A;m~Y zF%nXYgcKto#Yjjo5>kwW6eA(UNJudfQjCNYBO%2|NHG#pjD!>;A;m~YF%nXYgcKto z#Yjjo5>kwW6eA(UNJudfQjCNYBO%2|NHG#pjD!>;A;m~YF%nXYgcKto#Yjjo67u_# zui~~dAyQ0;6cZxFgh(+VQcQ>x6C%ZgNHHN&Oo$W{BE^JAF(Fb+h!hhd#e_&PAyQ0; z6cZxFgh(+VQcQ>x6C%ZgNHHN&Oo$W{BE^JAF(Fb+h!hhd#e_&PAyQ0;6cZxFgh(+V zQcQ>x6C%ZgNHHN&Oo$W{BE^JAF(Fb+h!hhd#e_&PAyQ0;6cZxFgh(+VQcQ>x6C%Zg zNHHN&Oo#-*7X)7rd_nL9!50Ky5PU)K1;G~tUl4pj@CCsa1YZz*LGT5^7X)9V{=?z_ z7DVs`!50Ky5PU)K1;G~tUl4pj@CCsa1YZz*LGT5^7X)7rd_nL9!50Ky5PU)K1;G~t zUl4pj@CCsa1YZz*LGT5^7X)7rd_nL9!50Ky5PU)K1;G~tUl4pj@CCsa1YZz*LGT5^ z7X)7rd_nL9!50Ky5PU)K1;G~tUl4pj@CCsa1YZz*LGT5^7X)7rd_nL9!50Ky5PU)K z1;G~tUl4pj@CCsa1YZz*LGT5^7X)7rd_nL9!50Ky5PU)K1;G~tUl4pj@CCsa1YZz* zLGT5^7X)7rd_nL9!50D_+gZVORibX9e3?!FE=#ofT|n1>0G{ zc2=;R6>Mh(+gZVORibX9e3?!FE=#ofT|n1>0G{c2=;R6>Mh( z+gZVORibX9e3?!FE=#ofT|n1>0G{c2=;R6>Mh(+gZVORibX9e3?!FE=#ofT|n1>0G{c2=;R6>Mh(+gZVORibX9e3?!FE=#ofT|n1>0G{c2=;R6>Mh(+gZVORibX9e3? z!FE=({!0!2w;;}DaW;#yS)9${Y!+v;IGe@UEY4eu72aCzUVsfyU94saWi^;)aaeu72aCzUVsfyU94saWi^;)aaeu72aCzUVsfyU94saWi^;)aaeu72aCzUVsfyU94saWi^;*?pUppqe=h&L{PXcozKYwOT57nZ?ME0Eb#_Q zyulK0u*4fI@diu0!4hw<#2YN}21~rb5^u1?8!Yh#OT57nZ?ME0Eb#_QyulK0u*4fI z@diu0!4hw<#2YN}21~rb5^u1?8!Yh#OT57nZ?ME0Eb#_QyulK0u*4fI@diu0!4hw< z#2YN}21~rb5^u1?8!Yh#OT57nZ?ME0Eb#_QyulK0u*4fI@diu0!4hw<#2YN}21~rb z5^u1?8!Yh#OT57nZ?ME0Eb#_QyulK0u*4fI@diu0!4hw<#2YN}21~rb5^u1?8!Yh# zOT57nZ?ME0Eb#_QyulK0u*4fI@diu0vHCAH{NI8&o5k5I&Sr5oi?dmr&EjkpXR|n) z#n~**W^p!)vss+Y;%pXYvpAc@*(}axaW;#yS)9${Y!+v;IGe@UEY4i@V!*r@FfRtoivjauz`PhRF9ytu0rO(OycjSq2F!~A^J2if7%(pe%!>i@V!*r@ zFfRtoivjauz`PhRF9ytu0rO(OycjSq2F!~A^J2if7%(pe%!>i@V!*r@FfRtoivjau zz`PhRF9ytu0rO(OycjSq2F!~A^J2if7%(pe%!>i@V!*r@F#r9@S8>~!H!tSRi+S^6 z-n^JMFXqjQdGli4yqGsH=FN+F^J3n-m^Uxx&5L>SV&1%%H!tSRi+S^6-n^JMFXqjQ zdGli4yqGsH=FN+F^J3n-m^Uxx&5L>SV&1%%H!tSRi+S^6-n^JMFXqjQdGli4yqGsH z=FN+F^J3n-m^Uxx&5L>SV&1%%H!tSRi+S^6-n^JMFXqjQdGli4yqGsH=FN+F^C0Ph zqzjTRNV*{Df}{(QE=al{>4KySk}gQPAnAgn3z9BKx*+Ld^`8d+#~+d|NV*{Df}{(Q zE=al{>4KySk}gQPAnAgn3z9BKx*+L-qzjTRNV*{Df}{(QE=al{>4KySk}gQPAnAgn z3z9BKx*+L-qzjTRNV*{Df}{(QE=al{>4KySk}gQPAnAgn3z9BKx*+L-qzjTRNV*{D zf}{(QE=al{>4KySk}gQPAnAgn3z9BKx*+L-qzjTRNV*{Df}{(QE=al{>4KySk}gQP zAnAgn3z9BKx*+L-qzjTRNV*{Df}{(QE=al{>4KySk}gQPAnAgn3z9BKx*+L-qzjTR zNV*W|c*rAo$Rl{jBY4Ooc*rAo$Rl{jBY4Ooc*rAo$Rl{jBY4Ooc*rAo$Rl{jBY4Oo zc*rAo$Rl{jBY4Ooc*rAo$Rl{jBY4Ooc*rAo$Rl{jBY4Ooc*rAo$Rl{jBY4Ooc*rAo z$Rl{jBY4Ooc*rAo$Rl{jBY4Ooc*rAo$Rl{jBY4Ooc*rAo$Rl{jBY4Ooc*rAo$Rl{j zBY4Ooc*rAo$Rl{jBY4Ooc*rAo$Rl{jBY4Ooc*rAo$Rl{jBY4Ooc*rAo$Rl{jBY4Oo zc*rAo$Rl{jBY4Ooc*rAo$Rl{jBY4Ooc*rAo$Rl{jBY4Ooc*rAo$Rl{jBY4Ooc*rAo z$Rl{jBY4Ooc*rAo$Rl{jBY4Ooc*rAo$Rl{FBY3JKc&a0Ksv~%+BY3JKc&a0Ksv~%+ zBY3JKc&a0Ksv~%+BY3JKc&a0Ksv~%+BY3JKc&a0Ksv~%+BY3JKc&a0Ksv~%+BY3JK zc&a0Ksv~%+BY3JKc&a0Ksv~%+BY3JKc&a0Ksv~%+BY3JKc&a0Ksv~%+BY3JKc&a0K zsv~%+BY3JKc&a0Ksv~%+BY3JKc&a0Ksv~%+BY3JKc&a0Ks^j-3U&Zz2A&=l8kKiGX z;31FTA&=l8kKiGX;31FTA&=l8kKiGX;31FTA&=l8kKiGX;31FTA&=l8kKiGX;31FT zA&=l8kKiGX;31FTA&=l8kKiGX;31FTA&=l8kKiGX;31FTA&=l8kKiGX;31FTA&=l8 zkKiGX;31FTA&=l8kKiGX;31FTA&=l8kKiGX;31FTA&=l8kKiGX;31FTA&=l8kKiGX z;31FTA&=l8kKiGX;31FTA&=l8kKiGX;31FTA&=l8kKiGX;31FTA&=l8kK`ebHB2*KAFBxrtg#K`(*k)nZ8e@@70D2#ob{yJqX~(G@w{{%cac#%B z9rtz|+;MTo$sIR$9Nlqs$JrfscO2evdB^D;w|5-haec@69rt$};BkS+2_83i9N}?= z#~B`XcpTz!iN`4(w|E@magE109`|@0g(yNLicpFo z)S?K*C_*)gP>v$hqX-2lLPd&Dk|NZk2t_GERfxrw9cqLWPP@q9W9&2t_JFm5NZNBGjn}g(^a&icqQ|)T#)@Dnhl2P_81>s|W=v zLdA+uvLe*12t_MG)rwHIBGj!2g)2hkicq>D)UF8SDMEdUP@p1Is0bx0LXC=0q#{(Q z2xTflor+MXB2=mfr7A+LicqW~RI3Q(Dnh-AP_QCYtOzA5Ld}X$v?5fk2xTim-HK4S zB2=yjr7J@1icq{FRIdo-D?P{1Noum~kALJf;h#3EF&2xTln9g9%NB2=;nr7S`% zi%`rWRI>=>EJ8htP|zY&v12xTooU5ilIB2=~rr7c2ji%{GmRJRD_ zEkb>ZP~aj|xCkXKLXC@1Xy@kZLkogu;-$L$NNPY|1 zZz26Hdwp>V; z3;A*(VJ>9Mg_OCFGZ&KPLe^YJn+th!A#pBb&V|&ukUJNW=R)>eNS_P&b0L8)WYC2a zx{yN`lITJfT}Yz~d2}I>E@aY$RJxE$7n12hHeE=k3;A>*p)O?9g_OFGQx}rzLRMW! zs|$H`A+auG)`ir%kXsj$>q2&2NUsa|bs@nnWY~oiyO3iSlI%j3T}ZPFd3GVuE^OM> z{c}M;@AC}-@j((o$wDY;2qh1pBqEedgp!I-auG^0Ldixb=?Ensp(G@fjD(VsP;wGV zQbNf}C}{~LFQFtRl+1*Znox2RN^(NUPAKUKB|o7gD3lC^lA=&@6iSjp$xG zp(HevjE0iZP;wedQbWmVC}|BPuc0J1l+1>b+E8*EN^(QVZm1`IJn=*NL&<+A2@oX% zqNG5S9Eg$xQO`$ce1*nmXncpphiH6>#;0g}i{{TU8of{akPuNaB1%d`$%!aQ5hW|4 zq(zjxh>{plG9yZAM9GaP$q^+xqNGQZ{D_huQ8FY-ibTngC`l3}OQIw~luU?{3Q=+) zN-{*rhA8O}B_E{vnawAG|M9GdQ=@BJA zq9jO^42hB=QF0_ol0?apC}|QUPogABluU_|Dp7JJO0q=BmMG~GC10W>Oq7g?k}^?p zCQ8yo$(kr>6D4n=BuOqtN-{;srYPwYC7+@sRFsU0l2TEh&z1RJnGcrvVwq2t`DU4qmicO# z&zAkUTZZ@b@#Qj~F7xd&A20LuGM_K={W2dg^93`VFgvZ0+0j|Eqq}BDhs};I zn;o4tJGyOlblmLdy4lfrv!nZFM+eT1E}R{mI6Jy=Mib-z-+Eu`eY#t4bhzN?a>3E* zf}`67N5>0}t`{7gFF3kiaCE@n=z_t~34@~>21iE>j;bx@T~7(BSBz!O=;BqnidtM-7gy8XTQ9IJ#?ablBkNvVnY!-`{$l zjvE|ZH#j9b8vL$;ONl7(WQfblhz}t?g!mBRLx>L{K7{xX;zNiJAwGoo5aL6K4blhz}t?g!mBRLx>L{K7{xX;zNiJAwGoo5aL6K4blhz}t?g!mBRLx>L{ zK7{xX;zNiJAwGoo5aL6K4blhz}t?g!mBRLx>L{K7{xX;zNiJAwGoo5aL6K z4blhz}t?g!qu+Ly8Y6KBV}N;zNoLDL$n5km5s%4=FyR_>kg5iVrD1r1+5H zLy8Y6KBSK&`&#m!bIE$24<`F!vQH-aX0nea`)acIk>W>+A1QvM_>tmAiXSO{r1+8I zM~WXQex&%3;zx=fDSo8*k>W>+A1QvM_>tmAiXSO{r1+8IM~WXQex&%3;zx=fDSo8* zk>W>+A1QvM_>tmAiXSO{r1+8IM~WXQex&%3;zx=fDSo8*k>W>+A1Qvw|8Du;E&sda zf4BVamjB)Izgzxy%l~fq-!1>U<$t&Q@0S1F^1oaDcgz27`QI)7yXAkk{O^|k-SWR% z{&&m&Zu#FW|GVXXxBTyx|K0MxTmE;;|8Du;E&sdaf4BVamjB)Izgzxy%l~fq-!1>U z<$t&Q@0S1F^1oaDcgz27`QI)7yXAkk{O^|k-SWR%{&&m&Zu#9Uzq{pkxBTvw-`(=N zTYh)T?{4|sEx)_vceniRmfzj-yIX#D%kOUa-7UYn<#)II?v~%(^1EApcgyc?`Q0tQ zyXAMc{O*?D-SWFzes|07Zu#9Uzq{pkxBTvw-`(=NTYh)T?{4|sEx)_vceniRmfzj- zyIX#D%kOUa-7UYn<#)II?v~%(^1EApcgyc?`Q0tQyXAMc{O*?D-SWFzes|07Zu#9U zzq{pkxBTvw-`(=NTYh)T?{4|sEx)_vceniRmfzj-yIX#D%kOUa-7UYn<#)II?v~%( z^1EApcgyc?`Q0tQyXAMc{O*?D-SWFzes|07Zu#9Uzq{pkxBTvw-`(=NTYh)T?{4|s zEx)_}++FtJWnW$vKYV-H$CrJ5+2@yif7u6^eSz60n0U<$t&Q@0S1F^1oaDcgz27`QI)7 zyXAkk{O^|k-SWR%{&&m&Zu#FW|GVXXxBTyx|K0MxTmE;;|8Du;E&sdaf4BVamjB)I zzgzxy%l~fq-z~qp<#)II?v~%(^1EApcgyc?`Q0tQyXAMc{O*?D-SWFzes|07Zu#9U zzq{pkxBTvw-`(=NTYh)T?{4|sEx)_vceniRmfzj-yIX#D%kOUa-7UYn<#)II?v~%( z^1EApcgyc?`Q0tQyXAMc{O*?D-SWFzes|07Zu#9Uzq{pkxBTvw-`(=NTYh)T?{4|s zEx)_vceniRmfzj-yIX#D%kOUa-7UYn<#)II?v~%(^1EApcgyc?`Q0tQyXAMc{O*?D z-SWFzes|07Zu#9Uzq{pkxBTvw-`(=NTYh)T?{4|sEx)_vceniRmfzj-yIX#D%kOUa z-7UYn<#)II?v~%(^1EApcgyc?`Q0tQyXAMc{O*?D-SWFzes|07Zu#9Uzq{pkxBTvw z-`(=NTYh)T?{4|sEx)_vceniRmfzj-yIX#D%kOUa-7UYn<#)II?v~%(^1EApcgyc? z`Q0tQyXAMc{O*?D-SWFzes|07Zu#9Uzq{pkxBTvw-`(=NTUX;+XX9G_cgz27`QI)7 zyXAkk{O^|k-SWR%{&&m&Zu#FW|GVXXxBTyx|K0MxTmE;;|8Du;E&sdaf4BVamjB)I zzgzxy%l~fq-!1>U<$t&Q@0S1F^1oaDcgz27`QI)7yXAkk{O^|k-SWR%{&&m&Zu#FW z|GVXXxBTyx|K0MxTmE;;|8Du;E&sdaf4BVamjB)Izgzxy%l~fq-z~qp<#)II?v~%( z^1EApcgyc?`Q0tQyXAMc{O*?D-SWFzes|07Zu#9Uzq{pkxBTvw-`(=NTYh)T?{4|s zEx)_vceniRmfzj-yIX#D%kOUa-7UYn<#)II?v~%(^1EApcgyc?`Q0tQyXAMc{O*?D z-SWFzes|07Zu#9Uzq{pkxBTvw-`(=NTYh)T?{4|sEx)_vceniRmfzj-yIX#D%kOUa z-7UYn<#)II?v~%(^1EApcgyc?`Q0tQyXAMc{O*?D-SWFzes|07Zu#9Uzq{pkxBTvw z-`(=NTYh)T?{4|sEx)_vceniRmfzj-yIX#D%kOUa-7UYn<#)II?v~%(^1EApcgyc? z`Q0tQyXAMc{O*?D-SWFzes|07Zu#9Uzq{pkxBTvw-`(=NTYh)T?{4|sEx)_vceniR zmfzj-yIX#D%kOUa-7UYn<#)II?v~%(^1EApcgyc?`Q0tQyXAMc{O*?D-SWFzes|07 zZu#9Uzq{pkxBTvw-`zTc*Sdq(^1oaDcgz27`QI)7yXAkk{O^|k-SWR%{&&m&Zu#FW z|GVXXxBTyx|K0MxTmE;;|8Du;E&sdaf4BVamjB)Izgzxy%l~fq-!1>U<$t&Q@0S1F z^1oaDcgz27`QI)7yXAkk{O^|k-SWR%{&&m&Zu#FW|GVXXxBTyx|K0MxTmE;;|8Du; zE&sdaf4BVamjB)Izgzxy%l}UCHO1EyUsHTd@ioQQ6kk((P4P9w*A!nx)1^}eh3UA^z>eNXRudf(Igp5FKLzNhy+z3=ILPw#tr->3I| zdf%t_eR|)g_kDWbr}uq&->3JHuPMH!_?qHtimxfYrudrTYl^QazNYw^;%kbpDZZxo zn&NAUuPMH!_?qHtimxfYrudrTYl^QazNYw^;%kbpDZZxon&NAUuPMH!_?qHtimxfY zrudrTYl^QazNYw^;%kbpDZZxon&NAUuPMH!_?qHtimxfYrudrTYl^QazNYw^;%kbp zDZZxon&NAUuPMH!_?qHtimxfYrudrTYl^RFzN)X1>Z_#sDyhCos;`pjtEBoWslH08 zuafGkr1~nUzDlaElIp9Z`YNfuN~*7t>Z_#sDyhCos;`pjtEBoWslH08uafGkr1~nQ zzDlXDQtGRe`YNTqN~y0>>Z_FcDy6>AzDlXDQtGRe z`YNTqN~y0>>Z_FcDy6>AzDlXDQtGRe`YNTqN~y0> z>Z_FcDy6>AzDlXDQtGRe`YNTqN~y0>>Z_FcDy6>AzDlXDQtGRe`YEM;N~xbx>Zg?YDW!f&sh?8nr;9eoCpIQtGFa`YEM;N~xbx>Zg?YDW!f&sh?8nr;9eoCpIQtGFa`YEM;N~xbx>Zg?YDW!f&sh?8nr;9eoCpIQtGFa`YEM;N~xbx>Zg?YDZjtRZ4x8QeUOiS1I*X zN_~}5U!~MnDfLxKeU(yQrPNm`^;JrJl~P}&)K@9>RZ4x8QeUOiS1I*XN_~}5U!~Mn zDfLxKeU(yQrPNm`^;JrJl~P}&)K@9>RZ4x8QeUOiS1I*XN_~}5U!~MnDfLxKeU(yQ zrPNm`^;JrJl~P}&)K@9>RZ4x8QeUOiS1I*XN_~}5U!~MnDfLxKeU(yQrPNm`^;JrJ zl~P}&)K@9>RZ4x8QeUOiS1I*XN_~}5U!~MnDfLxKeU(yQrPNm`^;JrJl~P}&)K@9> zRZ4x8QeUOiS1I*XN_~}5U!~MnDfLxKeU(yQrPNm`^;JrJl~P}&)K@9>RZ4x8QeUOi zS1I*XN_~}5U!~MnDfLxKeU(yQrPNm`^;JrJl~P}&)K@9>RZ4x8QeUOiS1I*XN_~}5 zU!~MnDfLxKeU(yQrPNm`^;JrJl~P}&)K@9>RZ4x8QeUOiS1I*XN_~}5U!~MnDfLxK zeU(yQrPNm`^;JrJl~P}&)K@9>RZ4x8QeUOiS1I;Y^!by%e$vNJ`u0hmKIzLRefXsB zp7hz1zIxI}Px|IbpFHV{Cw=gw@169ylfHJ+$4>gzNuN6DODBEkr0<;cnUlV9(nn7E z#z~(z=?f=);H2-H^m&uMZqmn1`nE}*HtEYIeb}V$n)F$dzG~7(P5P!upET)pDgK%C4I1@@0IkqlD<~b$4dHENuMg|OC^1% zr0zS=?f!$V5IMh^m&oKF4D(E`nE`)7U|0( zeORRLiu75LzA9?|=-K?uv-y)}^B2$N51!56JDWduHh=AG{@B_4t+V-4XY-fN<`13C z-#ME6Xoz1%Ha=`!`~-|KTi&SogDr+Is9#M_|xR@m&xG|lf&O7hd)aW zf0Z2mC^`I1a`= zN)CUM9R4IZ{6%v3gXHk{$l=eC!(StZKSmCJiyZzGIs7GZ_(SCIcgW$-ki%aghd)9N ze}f$U1UdW#a`*$}@b|~z&yT}jABR6a4u5+b{`5Hf<#G7KK zargt`@b|;v&xgZb4~IV<4u3lw{&YC}<#71J;qZ6E;m?M{Uk!&p8V-Lm9R6fD{Kati zgW>S^!r{+_!(R)BKNb#uD;)k*IQ*q>_(S3Fcf#S%gu`D6hd&YyeIh3J!l09R4IY{6%p1gW&M@ zz~Rq-*U-gDR>J5L>8~&s>{6%m0gWmA>yy4Gz z!(a1;Kjsa8%NzcbH~b}U_(R_Ccf8@xc*9@uhCkvBf5RL8gg5*JZ}x%U+soJ+6{lR8~$WB{KanggWd4=y5Y}t!(Z!$ zKh_O@s~i4QH~gh;_(R?Bce>#jgy9v0;Sq%44TRwdgy98*;Q@r<{e$88gW>gq;qimv z?StXzgW=_a;o*bf-GkxTgW=VK;n9QP&4b~|gW<)4;lYF9y@TPogW-#|?(J4Th%;hL;V7hYf~z4Tfh8 zhF1-SM-7HI4TdKTh8GQn2Mvbz42I_nhSv;+#|(zI42Gu+hL;S6hYW^y42EY6hF1)R zM+}BH42CBRh8GNm2MmVy3x?+lhSv**#|wtH3x=l)hL;P5hYN;x3x;P4hF1%QM+=5G z3x+2Ph8GKl2MdPx3WnzjhSv&)#|nnG3Wlc&hL;M4hYE&w3WjG2hF1!PM+$~F3Wg^N zh8GHk2MUJw35MqhhSv#(#|ehF35KT$hL;J3hY5yv35I70hF1xOM+t^E35F*Lh8GEj z2MLDv2!`hfhSvy&#|VbE28O2whL;A0hX#gs28L$_hF1oLM+SyB28JgFh8G5g2L^`s z1%~GZhSvp##|4JB1%{^uhL;6~hXsar1%_t@hF1lKM+JsA1%@XDh8G2f2L*=r1cv7X zhSvm!#{`DA1cs*shL;3}hXjUq1cqk>hF1iJM+Am91coOBh8F~e2Ly)q1BT}VhSvjz z#{-791BRyqhL;0|hXaOp1BPbb2LXon0EXuPhSvaw#{h=60EVXkhL-?_ zhX96m0ETA(hF1WFM*xO50EQ<3h8F;a2LOis|6%@rSpOf!|A+1WVfufD+dkZO-US$* z1sGlh7#;-}-UJw)1Q=cf7#;)|-UAq(0~lTd7#;%{-U1k&0vKKb7#;!`-T@e%0T^BZ z7#;x_-T)Y$02p2X7#;u^_Wy_Z|6%=q82=x(|A*=SVflX;{vUS#huQyO^?w-sA2$Do z$^T*Te;E89_Wp;t|6%Qa82cZ#{)egmVd;Mu`X6@whnfFj<$oCYA2$AniT`2Ye;D{5 z_Wg%>|6$#K822Bx{fBA)VcCBe_8)frhgtt&)qfcEA2$7mN&jKde;D*1_WXxA|6$F4 z81o;t{D&$3Vab0O@*j5mhZ+B2#eW#_A2$4l3IAcie;Du|_WOtV{$ag;81Emp`-kcN zVYz=8<{x(Xhgtq%m46uJA2#`iN&aDxe;DK+_V|Z6{$Y)O7~>zd_=hR}VTpej;vaVS zhZ+81g?|{~A2#@h3I1V$e;D8&_V}}(H}z5HP=e^|>O#`1@){9!78Sjr!U@`s)LVJ3fA$sb1YhmHJU zB7a!O9|rP=ef*&xI`l(_e(2B-9r~d|KXmAa4*k%fA3F3yhkoeL4;}iULqBxrhYtPF zp&vT*Lx+Co&<`E@p+i4(=!Xvd(4ikX^h1Y!=+F-x`k_NVbm)f;{m`KwI`l(_e(2B- z9r~d|KXmAa4*k%fA3F3yhkoeL4;}iULqBxrhYtPFp&vT*Lx+Co&<`E@p+i4(=!Xvd z(4ik%T(jbu71yk|X2mrtu32%-ifdL}v*MZ+*Q~f^#WgFgS#iyZYgSyd;+hrLthi>y zH7l-Jam|WrR$Q~yH7l-Jam|WrR$Q~UbNkdwtLZbFWT-!+r4PJ7j5^V?OwFqi?(~wb}!oQMcch- zyBBTuqU~O^-HWz+(RMG|?nT?ZXuB6}_oD4ywB3ugd(n0;+U`Z$y=c1^ZTF(>UbNkd zwtLZbFWT-!+r4PJ7j5^V?OwFqi?(~wb}!oQMcch-yBBTuqU~O^-HWz+(RMG|?nT?Z zXuB6}_oD4ywB3ugd(n0;+V1t~|4)_w1)sAyoXz2E4rg;Xo5R^0&gO77hqF1H&Eaeg zXLC54!`U3p=5RKLvpJm2;cO0Pb2yvB*&NR1a5jgtIh@VmYz}90IGe-S9M0x&Hixr0 zoXz2E4rg;Xo5R^0&gO77hqF1H&EaegXLC54!`U3p=5RKLvpJm2;cO0Pb2yvB*&NR1 za5jgtIh@VmYz}90IGe-S9M0x&Hixr0oXz2E4rg;Xo5R^0&gO77hqF1H&EaegXLC54 z!`U3p=5RKLvpJm2;cO0Pb2yvB*&NR1Xf_!M8Ad{ek&s~|WEcqkYOZb7zr6h zLWYr$VI*W22^mI0hLMnABxD!~8Ad{ek&s~|WEcqkYOZb7zr6hLWYr$VI*W2 z2^mI0hLMnABxD!~8Ad{ek&s~|WEcqkYOZb7zr6hLWYr$VI*W22^mI0hLMnA zBxD!~Sw=#Zk&tC1WElxrMnaa6kYyxf83|cNLY9$`Wh7)730a;zSYA9>CPbDAk!3<; znGjhfM3xDWWkO_`5LqTfmI;w%LS&f`Stdl536W(&WSJ0ICPbDAk!3<;nGjhfM3xDW zWkO_`5LqTfmI;w%LS&f`Stdl536W(&WSJ0ICPbDAk!3<;nGjhfM3xDWWkO_`5LqTf zmI;w%LS&f`Stdl536W(&WSJ0ICPbDAk!3<;nGjhfM3xDWWkO_`5LqTfmI;v}_=?~w zg0Bd^BKV5nD}t{Gz9RUF;46Z!2)-itir_1PuL!;(_=@1GPygZYe+we`ir_1PuL!;( z_=?~wg0Bd^BKV5nD}t{Gz9RUF;46Z!2)-itir_1PuL!;(_=?~wg0Bd^BKV5nD}t{H zz9#sZ;A?`f3BD%yn&4}KuL-^;_?qBrg0Bg_Cit4*Yl5!{z9#sZ;A?`f3BD%yn&4}K zuL-^;_?qBrg0Bg_Cit4*Yl5!{z9#sZ;A?`f3BD%yn&4}KuL-^;_?qBrg0Bg_Cit4* zYl5!{z9#sZ;A?`f3BD%yn&4}KuL-^;_?qBrg0Bg_Cit4*Yl5!{z9#sZ;A?`f3BD%y zn&4}Kui4IOwzHb;tY$l_+0JUVvzqO!W;?6d&T6)^n(eG+JFD5wYPPeQ?W|@ytJ%(K zwzHb;tY$l_+0JUVvzqO!W;?6d&T6)^n(eG+JFD5wYPPeQ?W|@ytJ%(KwzHb;tY$l_ z+0JUVvzqO!W;?6d&T6)^n(eG+JFD5wYPPeQ?W|@ytJ%(KwzHb;tY$l_+0JUVvzqO! zW;?6d&T6)^n(eG+JFD5wYPPeQ?W|@ytJ%(KwzHb;tY$l_+0JUVvzqO!W;?6d&T6)^ zn(eG+JFD5wYPPeQ?W|@ytJ%(KwzHb;tY$l_+0JUVvzqO!W;?6d&T6)^I`m&^F8^E1 z<$sI0{BJRr|1IY7zr|esx0uKO7W4SutsbB8_}{G_U-kH^$5%bR>hV>Nulo3^kFWap zs*kVw_^OYu`uM7kulo2Z{%=8?&E;$^XLC85%h_Dc=5jWdv$>qjqj6!Tm}i3LBeH_a2X_A z1__rz!ex+f86;c=370{_Wsq6!Tm}i3LBeH_a2X_A1__rz!ex+f z86;c=370{_Wsq6!Tm}i3LBeH_a2X_A1__rz!ex+f86;c=370{_ zWsq6!Tm}i3LBeH_a2X_A1__rz!Y25d;A?`f3BD%yn&4}KuL-^; z_?qBrg0Bg_Cit4*Yl5!{z9#sZ;Oo$TIQ-v&2)-uxn&4}KuL-^;_?qBrg0Bg_Cit4* zYl5!{z9#sZ;A?`f3BD%yn&4}KuL-^;_?qBrg0Bg_Cit4*Yl5!{z9#sZ;A?`f3BD%y zn&4}KuL-^;_?qBrg0Bg_Cit4*Yl5!{z9#sZ;A?`f3BD%yn&4}KuL-^;_?qBrg0Bg_ zCit4*Yl5!{z9#sZ;A?`f3BD%yn&4}KuL-^;_?qBrg0Bg_Cit4*Yl5!{z9#sZ;A?`f z3BD%yn&4}KuL-^;_?qBrg0Bg_Cit4*Yl5!{zShM|g0Bg_Cit4*Yk`j?-e!rnS>kP$ zc$+2OW{J01;%%0Anb-e!rnS>kP$c$+2OW{J01 z;%%0Anb-e!rnS>kP$c$+2OW{J01;%%0Anb-e!rnS>kP$c$+2OW{J01;%%0Anb-e!rnS>kP$c$+2OW{J01;%%0Anb-e!rnS>kP$c$+2OW{J01;%%0Anqjqjqj445wi=F5QjGGM+8m@fn7%YgYZ zV7?5PF9YVwfcY|Dz6_Wz1Ln(s`7&U>445wi=F5QjGGM+8m@fn7%YgYZV7?5PF9YVw zfcY|Dz6_Wz1Ln(s`7&U>445wi=F5QjGGM+8m@fn7%YgYZV7?5PF9YVkKlv(dJM-qt zy!kS3zRa61^XALE`7&?5%$qOs=F7bKGH<@jn=kX`%e?tAZ@$c%FZ1Thy!kS3zRa61 z^XALE`7&?5%$qOs=F7bKGH<@jn=kX`%e?tAZ@$c%FZ1Thy!kS3zRa61^XALE`7&?5 z%$qOs=F7bKGH<@jn=kX`%e?tAZ@$c%FZ1Thy!kS3zRa61^XALE`7&?5%$qOs=F7bK zGH>1_U6XW8(ltrfBwdqqP0}?<*Cbt&bWPGVN!KJ@lXOkeHA&YbU6=mT;Q#nT(ltrf zBwdqqP0}?<*Cbt&bWPGVN!KJ@lXOkeHA&YbU6XW8(ltrfBwdqqP0}?<*Cbt&bWPGV zN!KJ@lXOkeHA&YbU6XW8(ltrfBwdqqP0}?<*Cbt&bWPGVN!KJ@lXOkeHA&YbU6XW8 z(ltrfBwdqqP0}?<*Cbt&bWPGVN!KJ@lXOkeHA&YbU6XW8(ltrfBwdqqP0}?<*Cbt& zbWPGVN!KJ@lXOkeHA&YbU6XW8(ltrfBwdqqP0}?<*Cbt&bWPGVN!KJ@lXOkeHA&Yb zU6XW8(ltrfA{`HTG!J<+4|y~Xc{C4sG!J<+4|y~Xc{C4sG!J<+4|y~Xc{C4sG!J<+ z4|y~Xc{C4sG!J<+4|y~Xc{C4sG!J<+4|y~Xc{C4sG!J<+4|y~Xc{C4sG!J<+4|y~X zc{C4sG!J<+4|y~Xc{C4sG!J<+4|y~Xc{C4sG!J<+4|y~Xc{C4sG!J<+4|y~Xc{C4s zG!J<+4|y~Xc{C4sG!J<+4|y~Xc{C4sG!J<+4|y~Xc{C4sG!J<+4|y~Xc{C4sG!J<+ z4|y~Xc{C4sG!J<+4|y~Xc{C4sG!J<+4|y~Xc{C4sG!J<+4|y~Xc{C4sG!J<+4|y~X zc{C4sG!J<+4|y~Xc{C4sG!J<+4|y~Xc{C4sG!J<+Pjxg;bu>?PG*5LjPjxg;bu>?P zG*5LjPjxg;bu>?PG*5LjPjxg;bu>?PG*5LjPjxg;bu>?PG*5LjPjxg;bu>?PG*5Lj zPjxg;bu>?PG*5LjPjxg;bu>?PG*5LjPjxg;bu>?PG*5LjPjxg;bu>?PG*5LjPjxg; zbu>?PG*5LjPjxg;bu>?PG*5LjPjxg;bu>?PG*5LjPjxg;bu>?PG*5N>{^YB;-aO>d zJmk?lonW}%L@}H^xXDR@iDuAXE zps5CEDgv6SfTl8_sSao=1ez*=rc$7(7HBF4nyP`Oa-gXmXetPrDuSkxps6NkDhisa zf~K;dsV-xDk7Syh^8{4sg7tWB$_ITrc$D*mS`#_nyQJW za-yl8Xeub0DvG9(qN%26Dk_?)il(xnsjg@$ESf5drqZIRwrDCYnyQPY@}jA}Xeuz8 zDvYKQqp8MdDl(d?jHWW9sm^FBG@2@nrc$G+)@Uj=nyQVaa-*rM4Bp*rc$J-7HKL*nyQhea-^vqX(~vXDw3v>r2qfc`>x)1^}eh3UA^z>eOK?h zdf(Igp5FKLzNhy+z3=ILPw#tr-_!e^-uLN!pWgTBeV^X<>3yHx_vw9~-uLN!;zx)d zA%2AT5#mRPA0d8(_z~hqh#w(-g!sV}Q9kYS(>_1#^V2>*?eo(_1# z^V2>*?eo(_1#^V2>*?eo(_1#^V2>*?eo(_1#^V2>*?eo(_1#^V2>*?eo(_1#^V2>* z?eo(_1#^V2>*?eo(_1#^V2>*?eo(_1#^V2>*?eo(_1#^V2>*?eo((EC#FOTAD02=ODtj}SjX{0Q+Q#E%d^Li`BvBgBsoKSKNn@gu~K5I;iv z2=ODtj}SjX{0Q+Q#E%d^Li`BvBgBsoKSKNn@gu~K5I;iv2=ODtj}SjX{0Q+Q#E%d^ zLi`BvBgBsoKSKNn@gu~K5I;iv2=ODtj}SjX{0Q+Q#E%d^Li`BvBgBsoKSKNn@gu~K z5I;iv2=ODtj}SjX{0Q+Q#E%d^Li`BvBgBsoKSKNn@gv2L6hBh@Nbw`Zj}$*r{7CU5 z#g7y}Qv68qBgKytKT`Zi@gv2L6hBh@Nbw`Zj}$*r{7CU5#g7y}Qv68qBgKytKN#xy z{p-Ph{{R1e`upSGq5tpkpa1{AQ-7EKZv8#_d-b>87kXdleW~}Q-j{k`>V2vArQVl% zU+R6S_od$F|NKkuE4{DuzS8?j?<>8p^uE&jO7CmEul2sx`&#d7y|4AY*85uTYrU`a zzR~+e?;E{u^uE#iM(-QFZ}h&=`$q3uy>Iot)%#ZOTfJ}fzSa9y?_0fZ^}eh3UA^z> zeOK?hdf(OiuHJX`zN`0Lz3=ILPw#tr-_!e^-uLvrr}sU*@9BL{@B8$=Pw)HmzEAJ_ z^uAB;`}Dp~@B8$=kMHAmz5o9X{hj)|^mps;(ci1Tul|1eTkjJ;R{U7;W5tgZKUVx$ z@ngl06+c$|Sn*@Uj}<>w{8;g0#g7#~R{U7;W5tgZKUVx$@ngl06+c$|Sn*@Uj}<>w z{8;g0#g7#~R{U7;W5tgZKUVx$@ngl06+c$|Sn*@Uj}<>w{8;g0#g7#~R{U7;W5tgZ zKUVx$@ngl06+c$|Sn*@Uj}<>w{8;g0#g7#~R{U7;W5tgZKUVx$@ngl06+c$|Sn*@U zj}<>w{8;g0#g7#~R{U7;W5tgZKUVx$@ngl06+c$|xZ=kZKd$(3#g8j~T=CQ&~uK01qk1Kv$@#BggSNyo*#}z;LHSzDiEtdcO_&5Leu;3p4|2y?}>F?IxqrX>w zU;VB3wca;+-{^g#_l@2+df(`MqxX&8H+tXbeXIAa-nV++>V2#Ct=_kK-|Bs<_pRP{ z^}eh3UA^z>eOK?hdf(OiuHJX`zN_~=z3=ILPw#tr-_!e^-uLvrr}sU*@9BM?-uLN! zpWgTBeV^X<>3yHx_vw9~-pBtvEVzgN{|^0~`n&XZ>+jLutG}=Qe)?PQ3%xJ&zR>$Z z?+d*z^uEyhLhsWaQm^>^oqAvDeW~}Q-j{k`>V2vArQVl%U+R6O_m$pPdSB^%rT3NI zS9)LRed5OzKd$(3#g8j~T=CQ&~uK01qk1Kv$@#BggSNyo*#}z-W_;JOL zD}G$@Q&~ zuK01qk1Kv$@#BggSNyo*#}z-W_;JOLD}G$@Qy|p7`;^k0*XS@#BdfPyBe|#}hxE`0>P#Cw@Hf_OP^v zr9CX|VQCLbdsy1T(jJ!fu(XGzJuK~EX%9<#SlYwV9+vj7w1=fVEbU=w4@-Mk+QZTw zmiDl;howC%?O|yTOM6(_!_pp>_OP^vr9CX|VQCLbdsy1T(jJ!fu(XGzJuK~EX%9<# zSlYwV9+vj7w1=fVEbU=w4@-Mk+QZTwmiDl;howC%?O|yTOM6(_!_pp>_OQH%Z}h&= z`$q2@y>Ils(fda48@+GzzSa9y?_0fZ^}f~nR_|NAZ}q;_`&RE0Kc4vU#E&O_Jn`d+ zA5Z*v;>Qy|p7`;^k0*XS@#BdfPyBe|#}hxE`0>P#Cw@HfB8;Q&-kgXVz19)>DVpQFsUQ`gp0=hjpA)>8-9Qy14$C)ZOq*HcH=Q&-nhXV+7A*HeerQE+EWMH zQy1G)C)-mu+fzr|Q&-zlXWLVE+f#?zQ8=iXEI-ctwPQy1S;C*M;y-&055Q&-

W>+A1QvM_>tmAiXSO{r1+8IM~WXQex&$O;zx-eC4Q9nQQ}95 zA0>X2_)+3Vi613?l=xBNM~NRLew6r8;zx-eC4Q9nQTD6kw`zZ%_V;OjpZ51@f1mdE zX@8&g_i2Bh_V;OjpZ51@f1mdEX@8&g_i2Bh_V;OjpZ51@f1mdEX@8&g_i2Bh_V;Oj zpZ51@f1mdEX@8&g_i2Bh_V;OjpZ51@f1mdEX@8&g_i2Bh_V;OjpZ51@f1mdEX@8&g z_i2Bh_V;OjpZ51@f1mdEX@8&g_i2Bh_V;OjpZ51@f1mdEX@8&g_i2Bh_V;OjpZ51@ zf1mdEX@8&g_i2Bh_V;OjpZ51@f1mdEX@8&g_i2Bh_V;OjpZ51@f1mdEd4EfOYsqgd z`K=|twdA*!{MM4+TJl>X2_)+3Vi613?l=xBNM~NRLew6r8;zx-eC4Q9nQQ}95A0>X2_)+3Vi613?l=xBN zM~NRLew6r8;zx-eC4Q9nQQ}95A0>X2_)+3Vi613?l=xBNM~NRLew6r8;zx-eC4Q9n zQQ}95A0>X2_)+3Vi613?l=xBNM~fdVezf?};zx@gEq=84(c(voA1!{g_|f7=iytk1 zwD{5DM~fdVezf?};zx@gEq=84(c(voA1!{g_|f7=iytk1wD{5DM~fdVezf?};zx@g zEq=84(c(voA1!{g{K_kT^``&t@}uOhUiqt6{_2&#dgZTP`Kwp{>XpBG<*#1(t5^Q& zmA`uBuU`49SN`gizk21bUiqun|M|*az4BMD{M9Re^~ztp@>j3?)hmDX%3rj3?)hmDX%3rj3?)hmDX%3rj3?)hmDX%3po*SD*aVCx7+HUw!gdpZwJ)fAz^iey)hB=T$zOf)SD*aVr~biH|KO>A@WhWNemwExi62k=c;d$sKc4vU#E&O_ zJn`d+A5Z*v;>Qy|p7`;^k0*XS@#BdfPyBe|#}hxE`0>P#Cw@HfQy|p7`;^k0*XS@#BdfPyBe| z#}hxE`0>P#Cw@HfQy|KJnudKR)r}6F)xj;}btV@#7OeKJnudKR)r}6F)xj;}btV@#7OeKJnud zKR)r}6F)xj;}btV@#7OeKJnudKR)r}6F)xj;}btV`&#l*_5FP4`}xrK^P%tOL*LJb zzMl_$KOg#jKJ@*3===H5_w%9e=R@DmhrXW=eLo-iem?a5eCYf6(D(D9@8?6`&xgLB z4}Cu$`hGt2{e0;A`Ox?Cq3`EI-_M7>pAUUMANqbi^!pAUUMANqbi^!pAUUM zANqbi^!pAUUMANqbi z^!4-4H73*8S3-46@h4-4H73*8S3-46@h4-4H73*8S3 z-46@h4-4H73*8S3-46@h4-4H7OWhAk-49FM4@=z-OWhAk-49FM4@=z-OWhAk-49FM z4@=z-OWhAk-49FM4@=z-OWhAk-49FM4@=z-OWhAk-49FMr%BzXN!_PO-KR<2r%BzX zN!_PO-KR<2r%BzXN!_PO-KR<2r%BzXN!_PO-KR<2r%BzXN!_PO-KR<2r%BzXN!_PO z-KR<2r%BzXN!_PO-KR<2r%BzXN!_PO-KR<2r%BzXN!_PO-KR<2r%BzXN!_PO-KR<2 zr^#dgO8%+#ue5)q{VVNXY5z+5SK7bQ{+0Hxw11`jEA3xt|4RE;+P~KRwf3*If35v% z?O$vETKm`9zt;Y>_OG>nt^I55Uu*wb``6mP*8a8jueE=z{cG)CYyVpN*V@0<{_OG>nt^I55Uu*wb``6mP z*8a8jueE=z{cG)CYyVpN*V@0<{_|^CDtMB1g z-@~uIhhKdUzxp12^*#LRd-&D&@T>3PSKq_0zK36Z55M{ze)T>4>U;Rr_wcLl;aA_o zufB(0eGk9-9)9&b{OWu7)%WnL@8MV9!>_)FUwse1`W}AuJ^bo>_|^CDtMB1g-@~uI zhhKdUzxp12^*#LRd-&D&@T>3PSKq_0zK36Z55M{ze)T>4>U;Rr_wcLl;aA_oufB(0 zeGk9-9)9&b{OWu7)%WnL@8MV9!>_)FUwse1`W}AuJ^bo>_|^CDtMB1g-@~uIhhKdU zzxp12%X=iBRo}y}zK36Z55M{ze)T>4>U;Rr_wcLl;aA_oufB(0eGk9-9)9&b{OWu7 z)%WnL@8MV9!>_)FUwse1`W}AuJ^bo>_|^CDtMB1g-@~uIhhKdUzxp12^*#LRd-&D& z@T>3PSKq_0zK36Z55M{ze)T>4>U;Rr_wcLl;aA_oufB(0eGk9-9)9&b{I>Yf;zx@g zEq=84(c(voA1!{g_|f7=iytk1wD{5DM~fdVezf?};zx@gEq=84(c(voA1!{g_|f7= ziytk1wD{5DM~fdVezf?};zx@gEq=84(c(voA1!{g_|f7=iytk1wD{5DM~fdVezf?} z;zx@gEq=84(c(voA1!{g_|f9Wh#w<U;|BYuqdG2+LFA0vK@ z_%Y(gh#w<U;|BYuqdG2+LFA0vK@_%Y(gh#zC0N`9;M_-T)y z_V{U!pZ54^kDvDVX^)@w_-T)y_V{U!pZ54^kDvDVX^)@w_-T)y_V{U!pZ54^kDvDV zX^)@w_-T)y_V{U!pZ54^kDvDVX^)@w_-T)y_V{U!pZ54^kDvDVX^)@w_-T)y_V{U! zpZ54^kDvDVX^)@w_-T)y_V{U!pZ54^kDvDVX^)@w_-T)y_V{U!pZ54^kDvDVX^)@w z_-T)y_V{U!pZ54^kDvDVX^)@w_-T)y_V{U!pZ54^kDvDVX^)@w_-T)y_V{U!pZ54^ zkDvDVd5=ea>&R~%`K=?rb>z2>{MM1*I`UgbeS}dTVbn($@ngh~5kE%!81ZAoj}bpc z{21|L#E%g_M*JA@W5kaUKSul*@ngh~5kE%!81ZAoj}bpc{21|L#E%g_M*JA@W5kaU zKSul*@ngh~5kE%!81ZAoj}bpc{21|L#E%g_M*JA@W5kaUKSul*@ngh~5kE%!81ZAo zj}bps{8;g0#g7#~R{U7;W5tgZKUVx$@ngl06+c$|Sn*@Uj}<>w{8;g0#g7#~R{U7; zW5tgZKUVx$@ngl06+c$|Sn*@Uj}<>w{8;g0#g7#~R{U7;L-$sn?yWxETYb8>`gCvg z>E7zoz163Ct55eIls(fd~KTfJ}fzSa9y?_0fZ^}f~nR_|NA z@9KS5@4I^6)%&jAclExj_g%g3>U~%5dwSo~`<~wS^uDL}J-zSgeNXRudf(IgKE3bL z`#!zz)B8TX@6-D}z3_{5J-{P@I=PyG1Ak5Byg#E(z>_{5J-{P@I=PyG1Ak5Byg#E(z> z_{5J-{P@I=PyG1Ak5Byg#E(z>_{5J-{P@I=PyG1Ak5Byg#E(z>_{5J-{P@I=PyG1A zk5Byg#E(z>_{5J-{P@I=PyG1Ak5Byg#E(z>_{5J-{P@I=PyG1Ak5Byg#E(z>_{5J- z{P@I=PyG1Ak5Byg#E(z>ppWtYyzbH8p}$jqm;P@3J^Fj~_toD|f9rjr_l4dUdSB>$ zq4$N}7kXdleWCY--j{k`>V2vArQVl%U+R6S_od#KdSB{&rT3NIS9)LRed_&)FW>j) z|E}JIdO+gKC%$~*%O}2k;>#z#eB#R|zI@`#C%$~*%O}2k;>#z#eB#R|zI@`#C%$~* z%O}2k;>#z#eB#R|zI@`#C%$~*%O}2k;>#z#eB#R|zI@`#C%$~*%O}2k;>#z#eB#R| zzI@`#C%$~*%O}2k;>#z#eB#R|zI@`#C%$~*%O}2k;>#z#eB#R|zI@`#C%$~*%O}2k z;>#z#eDeK{e7_^#@5uK%^8Jo{za!u8$oD(){f>OUBj4}H_dD|aj(oo(-|xuxJM#UG ze7_^#@5uK%^8Jo{za!u8$oD(){f>OUBj4}H_dD|aj(oo(-|xuxJM#UGe7_^#@5uK% z^8Jo{za!u8$oD(){f>OUBj4}H_dD|aj(oo(-|xuxJM#UGe7_^#@5uK%^8Jo{za!u8 z$oD(){f>OUBj4}H_dD|aj(oo(-|xuxJM#UGe7_^#@5uK%^8Jo{za!u8$oD(){f>OU zBj4}H_dD|aj(oo(-|xuxJM#UGe7_^#@5uK%^8Jo{za!u8$oD(){f>OUBj4}H_dD|a zj(oo(-|xuxJM#UGe7_^#@5uK%^8Jo{za!u8$oD(){f>OUBj4}H_dD|aj(oo(-|xux zJM#UGe7_^#@5uK%^8Jo{za!u8$oD(){f>OUBj4}H_dD|aj(oo(-|xuxJM#UGe7_^# z@5uK%^8Jo{za!u8$oD(){f>OUBj4}H_dD|aj(oo(-|xuxJM#TbxR=4bOYL21?^1i0 z+Pl==rS>khcd5Ng?Okf`QhS%$yVTyL_Aa$|sl7|>U25-Adzad~)ZV4`F12^5y-V#~ zYVT5em)g73-lg^~wRfq#OYL21?^1i0+Pl==rS>khcd5Ng?Okf`QhS%$yVTyL_Aa$| zsl7|>U25-Adzad~)ZV4`F12^5y-V#~YVT5em)g73-lg^~wRfq#OYL21?^1i0+Pl== zrS>khcd5Ng?Okf`QhS%$yVTyL_fCFu%5P5j%_+Y*dpOl;524 zn^S&s%5P5j%_+Y*T)d)C^s)}FQYthHyYJ!|b*YtLGH*4neyp0)O@ zwP&q8YwcNU&suxd+OyW4wf3yFXRSSJ?OAKjT6@;ov(}!q_N=vMtvze)S!>T)d)C^s z)}FQYthHyYJ!|b*YtLGH*4neyp0)O@wP&q8YwcNU&suxd+OyW4wf3yFXRSSJ?OAKj z+IyyZ#jShAt$W3-d&RAL#jShAt$W3-d&RAL#jShAt$W3-d&RAL#jShAt$W3-d&RAL z#jShAt$W3-d&RAL#jShAt$W3-d&RAL#jShAt$W3-d&RAL#jShAt$W3-d&RAL#jShA zt$W3-d&RAL#jShAt$W3-d&RAL#jShAt$W3-d&RAL#jShAt$W3-d&R9jQ>)L^>NB)L^>NB)L^>NB)L^>NB)L^>NB)L^>NBw{8;g0#g7#~R{U7;W5tgZKUVx$@ngl06+c$| zSn*@Uj}<>w{8;g0#g7#~R{U7;W5tgZKUVx$@ngl06+c$|Sn*@Uj}<>w{8;g0#g7#~ zR{U7;W5tgZKUVx$@ngl06+f={am9}-eq8b6iXT_}xZ=kZKd$(3#g8j~T=CQ&~uK01qk1Kv$@#BggSNyo*#}z-W_;JOLD}G$@Q&~uK01qk1Kv$@#BggSNyo*#}z-W_;JOLD}G$@Qy|p7`;^k0*XS@#BdfPyBe|#}hxE`0>P# zCw@HfQy| zp7`;^k0*XS@#BdfPyBe|#}hxE`0>P#Cw@Hfx{r|Um6YMoS`nLc7R&Rp6hDYD_|KI9iu-EYD z+y4Js?`yrU^}f;jM(-QFZ}h&=`$q2@y>Ils(fda4TfJ}fzSa9y?_0fZ^*;Z<@9KT4 z_pRP{^}eh3UA^z>eOK?hdf(OiuHJX`zN_~=z3=ILPw#tr-_!e^-uLvrr}sU*@9BM? z-uLN!pWgTBeV^X<>3yHx_vw9~-lrZ=s0S440fl-%p&n4E2Ndc7g?d1t9#E(U6zTzm zdO)EbP^bqK>H&p%K%pK`s0S440fl-%p&n4E2Ndc7g?d1t9#E(U6zTzmdO)EbP^bqK z>H&p%K%pK`s0S440ntDE|F_;(dY||a;zx)dA%2AT5#mRPA0d8(_z~hqh#w(-g!mES zM~ELGeuVfD;zx)dA%2AT5#mRPA0d8(_z~hqh#w(-g!mESM~ELGeuVfD;zx)dA%2AT z5#mRPA0d8(_z~hqh#w(-g!mESM~ELGeuVfD;zx)dA%2AT5#mRPA0d8(_z~hqh#w(- zg!mESM~ELOex&%3;zx=fDSo8*k>W>+A1QvM_>tmAiXSO{r1+8IM~WXQex&%3;zx=f zDSo8*k>W>+A1QvM_>tmAiXSO{r1+8IM~WXQex&%3;zx=fDSo8*k>W>+A1QvM_>tmA ziXSO{r1+8IM~WZvHIDq|6Mz2i>P@HzB)|E{Z$9#ykNoB%zxl{-KJuH7{N^LS`N(fR z@|%zR<|DuP$ZtOKn~(hFBft5`Z$9#ykNoB%zxl{-KJuH7{N^LS`N(fR@|%zR<|DuP z$ZtOKn~(hFBft5`Z$9#ykNoB%zxl{-KJuH7{N^LS`N(fR@|%zR<|DuP$ZtOKn~!|J zBOma{2R!ltk9@!*AMnTrJn{jLe83|g@W=-|@&S*0z#|{<$Ok;~0grsZBOma{2R!lt zk9@!*AMnTrJn{jLe83|g@W=-|@&S*0z#|{<$Ok;~0grsZBOma{2R!ltk9@!*AMnTr zJn{jLe83|g@W=-|@&S*0z#|{<$Ok;~0grsZBOmaXFY*D8e83|g@W=-|@&S*0z#|{< z$Ok;~0grsZBOma{2R!ltk9@!*AMnTrJn{jLe83|g@W=-|@&S*0z#|{<$Ok;~0grsZ zBOma{2R!ltk9@!*AMnTrJn{jLe83|g@W=-|@&S*0z#|{<$Ok;~0grsZBOma{2R!lt zk9@!*AMnTrJn{jLe83|g@W=-|@&S*0z#|{<$Ok;~0grsZBOma{2R!ltk9@!*AMnTr zJn{jLe83|g@W=-|@&S*0z#|{<$Ok;~0grsZBOma{2R!ltk9@!*AMnTrJn{jLe83|g z@W=-|@&S*0z#|{<$Ok;~0grsZBOma{2R!ltk9@!*AMnTrJn{jLe83|g@W=-|@&S*0 zz#|{<$Ok;~0grsZBOmaT`xxp2Y5!9Dm)gJ7{-yRWwSTGoOYL83|5E#x+P~EPrS>ni zf2sXT?O$sDQu~+MztsMv_Aj-6sr^gsUuyqS`nif2sXT?O$sDQu~+MztsMv_Aj-6sr^gsUuyqS z`nif9d^` z51sO%Q$BRchfev>DIYrJL#KS`lnDIYrJL#KS` zlnRP{c7!3Yrk6i)!MJtezo?iwO_6MYVB8Rzgqj% z+OO7rwf3vEU#RP{c7!3 zYrk6i)!MJtezo?iwO_6MYVB8Rzgqj%+OO7rwf3vEU#RP{c7!3Yrp!>eyIo4>H)QSK&>87s|VET0kwKStsYRT z2h{2TwR%9U9?+-H&>U;|BYuqdG2+LFA0vK@_%Y(gh#w<)k1>NT}`O|4#2tJl=(HMM$8tzJ{B*VO7YwR%mhUQ?^r)ao_0dQGif zQ>)k1>NT}`O|4#2tJl=(HMM$8tzJ{B*VO7YwR%mhUQ?^r)ao_0dQGifQ>)k1>NT}` zO|4#2tJl=(HMM$8tzJ{B*VO7YwR%mhUQ?^r)ao_0dQGifQ>)k1>NT}`O|4#2tJl=( zHMM$8tzJ{B*VO7YwR%mhUQ?^r)ao_0dQGifQ>)k1>NT}`O|4#2tJl=(HMM$8tzJ{B z*VO7YwR%mhUQ?^r)ao_0dQI(KlYHQn54`e$S3dB{2VVKWD<62}1FwAGl@Gk~fmc58 z$_HNgz$+hkh zuYBN@54`e$S3dB{2VVKWD<62}1FwAGl@Gk~fmc58$_HNgz$+hkhuYBN@54`e$S3dB{2VVKWD<62} z1FwAGl@Gk~fmc58$_HNgz$+hkhuYBN@54`e$S3dB{2VVKWD<62}1FwAGl@Gk~fmc58$_HNgz$+hk zQy|p7`;^k0*XS@#BdfPyBe|#}hxE`0>P#Cw@Hf;}btV@#7Oe zKJnudKR)r}6F)xj;}btV@#7OeKJnudKR)r}6F)xj;}btV@#7OeKJnudKR)r}6F)xj z;}btV@#7OeKJnudKR)r}6F)xj;}btV@#7OeKJnudKR)r}6F)xj;}btV@#7OeKJnud zKR)r}6F)xj;}btV@#7OeKJnudKR)r}6F)xj;}btV_axN!(H=kT@zWkZ?eWtdKkf0; z9zX5z(;h$V@zWkZ?eWtdKkf0;9zX5z(;h$V@zWkZ?eWtdKkf0;9zX5z(;h$V@zWkZ z?eWtdKkf0;9zX5z(;h$V@zWkZ?eWtdKkf0;9zX5z(;h$V@zWkZ?eWtdKkf0;9zX5z z(;h$V@zWkZ?eWtdKkf0;9zX5z(;h$V@zWkZ?eWtdKkf0;9zX5z(;h$V@zWkZ?eWtd zKkf0;9zX5z(;h$V@zWkZ?eWtdKkf0;9zX5z(;h$V@zWkZ?eWtdKkf0;9zXB#r@qgp zzR#z=&!@i6r@qgpzR#z=&!@i6r#{@LKHR51+$Vl~;>Ra`eB#F^ethD`Cw_e5$0vS# z;>Ra`eB#F^ethD`Cw_e5$0vS#;>Ra`eB#F^ethD`Cw_e5$0vS#;>Ra`eB#F^ethD` zM}GY9arAfS@6_L=zgvHg{$Blk_4m`?dSB>$q4$N}7kXdleWCY--WPgb=zXF0rQVl% zU+R6S_od#KdSB{&srRMcmwI36eWmx6-dB2G>3ya5mEKo+U+I0N_qE>FdSB~(t@pLw z*Lq*;eXaMk-q(8H=zXL2jovqU-{^g#_l@2+df(`MqxY@ew|d{|eXIAa-nV++>V2#C zt=_kK-_`rB-gouBtM^^K@9KS5@4I^6)%(X9 zd{RB1RL>{X^GWr5Qaztk&nMOMN%eeEJ)cz1C)M*w^?Xu2pH$B$)$>X9d{RB1RL>{X z^GW}|r+PlAo=>Xhlj`}TdOoS1PpapW>iML4KB=Bhs^^pH`J{S2sh&@&=acIBqXhlj`}DdOoF|PpRiq>iLvUgQ|kGY zdOoF|PpRiq>iLvUgQ|kGYdOoF|PpRiq z>iLvUgQ|kGYdT^y4T&V|F;zx-eC4Q9n zQQ}95A0>X2_)+3Vi613?l=xBNM~NRLew6r8;zx-eC4Q9nQQ}95A0>X2_)+3Vi613? zl=xBNM~NRLew6r8;zx-eC4Q9nQQ}95A0>X2_)+3Vi613?l=xBNM~NRLew6r8;zx@g zEq=84(c(voA1!{g_|f7=iytk1wD{5DM~fdVezf?};zx@gEq=84(c(voA1!{g_|f7= ziytk1wD{5DM~fdVezf?};zx@gEq=84(c(voA1!{g_|f7=iytk1wD{5DM~fdVezf?} z;zx@gEq=84(c(voA1!{g_|f7=iytk1wD{5DM~fdVe#o~t@|BN#fh1??=A! zk*|E@D)1`N~JW@{zB6)1`N~JW@{zB6)1`N~JW@{zB6Y?|0<;9r=DozTc7W zcjWsW`F=;f-;wWkY?|0<;9r=DozTc7WcjWsW`F=;f z-;wWkY?|0<;9r=DozTc7WcjWsW`F=;f-;wWkY?|0<; z9r=DozTc7WcjWsW`F=;f-;wWkY?|0<;9r=DozTc7W zcjWsW`F=;f-;wWkY?|0<;9r=DozTc7WcjWsW`F=;f z-;wWkY?|0<;9r=DozTc7WcjWsW`F=;f-;wWkY?|0<;9r=DozTc7WcjWsW`F=;f-;wWkY?|0<;9r=DozTc7WcjWsW`F=;f-;wWkeOa zpHu#G%70Gz&nf>oU^~D}Jo_vEs*y zA1i*W__5;0iXSU}toX6w$BG{-eysSh;>U^~D}Jo_vEs*yA1i*W__5;0iXSU}toX6w z$BG{-eysSh;>U^~D}Jo_vEs*yA1i*W__5;0iXSU}toX6w$BG{-eysSh;>U^~D}Jo_ zvEs*yA1i*W__5;0iXSU}toX6w$BG{-eysR$#gFTLhWbF-x7NP3_N}#Vt$l0lTWjB1 z`_|gG*1om&t+j8heQWJoYu{S?*4nq$zP0wPwQsF`YwcTW-&*_D+PBuewf3#GZ>@c6 z?OSW#TKm@8x7NP3_N}#Vt$l0lTWjB1`_|gG*1om&t+j8heQWJoYu{S?*4nq$zP0wP zwQsF`YwcTW-&*_D+PBuewf3#GZ>@c6?OSW#TKm@8x7NP3_N}#Vt$l0lTWjB1`_|gG z*1om&t+j9MeY@%dUG;&k`aoBGpsPO6RUhc84|LTBy6OX6^?|PXKv#XBt3J?GALyzN zbkzsC>H}T%fv);MSADswzT8z`?us8*{J7%B6+f={am9}-eq8b6iXT_}xZ=kZKd$(3 z#g8j~T=CQ&~uK01qk1Kv$@#Bgg>N&M~POY9(tLN0}IkkFDt)5e>=hW&s zwR%pio>Qym)ap64dQPpLQ>*9H>N&M~POY9(tLN0}IkkFDt)5e>=hW&swR%pio>Qym z)ap64dQPpLQ>*9H>N&M~POY9(tLN0}IkkFDt)5e>=hW&swR%pio>Qym)ap64dQPpL zQ>*9H>N&M~POY9(tLN0}IkkFDt)5e>=hW&swR%pio>Qym)ap64dQPpLQ>*9H>N&M~ zPOY9(tLN0}IkkFDt)5e>=hW&swR%pio>Qym)ap64dQPpLQ>*9H>N&M~POY9(tLN12 zIms_x`Nb>0c;y$b{Nj~gyz+}ze(}mLUirl#@{3n~@yah=`Nb>0c;y$b{Nj~gyz+}z ze({xGeB~Ek`Nda$@s(eEG}_dUJu)B8TX@6-D}z3W>+A1QvM_>tmAiXSO{r1+8IM~WXQex&%3;zx=fDSo8*k>W>+A1QvM_>tmA ziXSO{r1+8IM~WXQex&%3;zx=fDSo8*k>W>+A1QvM_>tmAiXSO{r1+8IM~WXQex&%3 z;zx=fDSo8*k>W>+A1QvM_>tmAiXSO{r1+8IM~WXQex&%3;zx=fDSo8*k>W>+A1QvM z_>tmAiXSO{r1+8IM~WXQex&%3;zx=fDSo8*k>W>+A1QvM_>tmAiXSO{r1+8IM~WXQ zez-^0>i@L*Kdt^xtN+vL|Frr)t^QA||I_OKwE91-{!gp_)9U}U`aiAyPpkja>i@L* zKdt^xtN+vL|Frr)t^QA||I_OKwE92o|Myb=r`7*y^?zFZpH}~;)&FVre_H*YQU7Pu z{~7gvM*W{r|7Xw{8;g0#g7#~R{U7;W5tgZKUVx$@ngl0 z6+c$|Sn*@Uj}<>w{8;g0#g7#~R{U7;W5tgZKUVx$@ngl06+c$|Sn*@Uj}<>w{8;g0 z#g7#~R{U7;W5tgZKUVx$@ngl06+c$|Sn*@Uj}<>w{8;g0#g7#~R{U7;W5tgZKUVx$ z@ngjg`4mS!@|i#Xcl9IG_mPi$h^`N&5;@{x~xh^`N&5; z@{x~xh^`N&7U-jT0&mB)eN50;XuXp6@9r=1kzTT0qcjW6G`Fcmb-jT0&mB)eN50;XuXp6@9r=1kzTT0qcjW6G`Fcmb-jT0&mB)eN50;X zuXp6@9r=1kzTT0qcjW6G`Fcmb-jT0&%oq83N50;XuXp6@9r=1kzTT0qcjW6G`Fcmb z-jT0&mB)eN50;XuXp6@9r=1kzTT0qcjW6G`Fcmb-jT0&mB)eN50;XuXp6@9r=1kzTT0qcjW6G`Fcmb-jT0&mB)eN50;XuXp6@9r=1kzTT0qcjW6G`Fcmb-jT0&mB)eN50;X zuXp6@9r=1kzTT0qcjW6G`Fcmb-jT0&mB)eN50;XuXp6@9r=1k zzTT0qcjW6G`Fcmb-jT0&mB)eN50;XuXp6@9r=1kzTT0qcjW6G z`Fcmb-jT0&nif2sXT?O$sDQu~+MztsMv z_Aj-6sr^gsUuyqS`nif2sXT?O$sDQu~+MztsMv_Aj-6sr^gsUuyqS`|S@|9D*a>`du`N}C@ zIpr&-eC3p{obr`ZzH-V}PWj3yUpeI~r+np|S@|Ayo>wV(K6F;8#@x+fOemwExi62k=c;d$sKc4vU z#E&O_Jn`d+A5Z*v;>Qy|p7`;^k0*XS@#BdfPyBe|#}hxE`0>P#Cw@HfQy|p7`;^k0*XS@#Bdf zPyBe|#}hxE`0>P#Cw@HfRa`eB#F^ethD`Cw_e5$0vS# z;>Ra`eB#IF-i7);+Pl`?wf3&Hcdfl^?Oki{T6@>pyVl;d_O7*et-WjQU2E@Jd)L~# z*50-DuC;fqy=(1VYwucn*V?<*-nI6wwRf$(YwcZY?^=7;+Pl`?wf3&Hcdfl^?Oki{ zT6@>pyVl;d_O7*et-WjQU2E@Jd)L~#*50-DuC;fqy=(1VYwucn*V?<*-nI6wwRf$( zYwcZY?^=7;+Pl`?wf3&Hcdfl^?Oki{T6@>pyVl;d_O7*et-WjS-KW0Kr@qgpzR#z= z&!@i6r@qgpzR#z=&!@i6r@qgpzR#z=&!@i6r@qgpzR#z=&!@i6r@qgpKHR51+^0U= zCw_e5$0vS#;>Ra`eB#F^ethD`Cw_e5$0vS#;>Ra`eB#F^ethD`Cw_e5$0vS#;>Ra` zeB#F^ethDG`b(|;Qmenz>MynWORfG=tH0FhFSYtht^QK0ztrk4wfak~{!**I)aoy_ z`b(|;Qmenz>MynWORfG=tH0FhFSYtht^QK0ztrk4wfak~{!**I)aoy_`b(|;Qmenz z>MynWORfG=tH0FhFOB+3qyEyUzclJEjrvQY{?e$wH0m#n`b(q!(x|^Q>MxD@OQZhM zsJ}GoFOB+3qyEyUzclJEjrvQY{?e$wH0m#n`b(q!(x|^Q>MxD@OQZhMsJ}GoFOB+3 zqyEyUzclJEjrvQY{?e$wH0m#n`b(q!(x|^Q>MxD@OQZhMxW6P{_{tZ)@`bN_;VWPG z$``)!g|B?!D_{7^7ryd^uYBPvU--%wzVd~yeBmo!_{tZ)@`bN_;VWPG$``)!g|B?! zD_{7^7ryd^uYBPvU--%wzVd~yeBmo!_{tZ)@`bN_;VWPG$``)!g|B?!D_{7^7ryd^ zuYBPvU--%wzVd~yeBmo!_{tZ)@`bN_;VWPG$``)!g|B?!D_{7^7ryd^uYBPvU--%w zzVd~yeBmo!_{tZ)@`bN_;VWPG$``)!g|B?!D_{7^7ryd^uYBPvU--%wzVd~yeBmo! z_{tZ)@`bN_;VWPG$``)!g|B?!D_{7^7ryd^uYBPvU--%wzVd~yeBmo!_{tZ)@`bN_ z;VWPG$``)!g|B?!D_{7^7ryd^uYBPvU--%wzVd~yeBmo!_{tZ)@`bN_;VWPG$``)! zg>Q-W>+A1QvM_>tmAiXSC@l=xBNM~NRLew6r8;zx-eC4Q9nQQ}95A0>X2 z_)+3Vi613?l=xBNM~NRLew6r8;zx-eC4Q9nQQ}95A0>X2_)+3Vi613?l=xBNM~NRL zew6r8;zx-eC4Q9nQQ}95A0>X2_)+3Vi613?l=xBNM~NRLew6r8;zx-eC4Q9nQQ}95 zA0>X2_)+3Vi613?l=xBNM~NRLew6r8;zx-eC4Q9nQQ}95A0>X2_)+3Vi613?lzS8E z{b+wb?eC}k{j|TI_V?5Ne%jwp`}=8sKke_Q{r$ARpZ53D{(jouPy736e?RT-r~Uo3 zzn}K^)Bb+i-%tDdX@5WM@2CC!w7;MB_tXA<+TTz6`)Pka?eC}k{j|TI_V?5Ne%jwp z`}=8sKke_Q{r$ARpZ53D{(jouPy736e?RT-r~Uo3zn}K^)Bb+i-%tDdX@5WM@2CC! zw7;MB_tXA<+TTz6`)Pka?eC}k{j|TI_V?5Ne%jwp`}=8sKke_Q{r$ARpZ53D{(jou zPy736e?RT-r~Uo3zn}K^)Bb+m-%{_V)cYy*eoDQcQtzkK`ziH)O1+;_@2Av@EA`?^ zy|@xTO8hACqr{IAKT7;)@uS6$7C&12Xz`=Pj}|{#{Alr`#g7(0TKs77qs5OFKU(}~ z@uS6$7C&12Xz`=Pj}|{#{Alr`#g7(0TKs77qs5OFKU(}~@uS6$7C&12Xz`=Pj}|{# z{Alr`#g7(0TKs77qs5OFKU(}~@uS6$7C&12Xz`=Pj}|{#{Alr`#g7(0TKs77qs5OF zKU(}~@uS6$7C&12Xz`=Pj}|{#{Alr`#g7(0TKs77qs5OFKU(}~@uS6$7C&12Xz`=P zj}|{#{Alr`#g7(0TKs77qs5OFKU(}~@uS6$7C&1281ZAoj}brIA6xZ)R=uB9?`PHf zS@nKay`NR@XVv>z^?p{ppH=T?)%#iXepbDoRqtoj`&spVR=uB9?`PHfS@nKay`NR@ z=c@N})%&^X{apRO*H!Q5s`qo%`?>1it~xey(~ySG}LB-p^I<=c@N})%&^X{ap2au6jRLy`QVz z&sFc|s`qo%`?>1it~xey(~ySG}LB-p^I<=c@N})%&^X{ap2au6jRLy`QVz&sFc|s`qo%`?>1< zT=jmgdOugaxT{{=RWI&}A6NXi;>Q&~uK01qk1Kv$@#BggSNyo*#}z-W_;JOLD}G$@ zQy|p7`;^ zk0*XS@#BdfPyBe|#}hxE`0>P#Cw@HfQy|p7`;^k0*XS@#BdfPyBe|#}hxE`0>P#Cw@HfQy|p7`;^k0*XS z@#BdfPyBe|#}hxE`0>OK`4vaL^0|Nh@9IgY_ak5V$X7n{m5+SoBVYN*S3dHUk9_4L zU-`&aKJt~1eB~ow`N&s3@|BN#nif2sXT?O$sDQu~+MztsMv z_Aj-6sr^gsUuyqS`nif2sXT?O$sDQu~+MztsMv_Aj-6sr^gsUuyqS`nif2sXT?O$sDQu~+MztsMv_Aj-6 zsr^gsU;f!Y^?u|pr~KuTzg+T{Oa5}nUoQE}C4af(FPHq~lD}N?mrMR~$zLw{%O!ug z-vV`O77Lx#TaG{N<9rT=JJo{&LA*F8RwPf4Sr@m;B|Dzg+T{ zOa5}nUoQE}C4af(FPHq~lD}N?mrMR~$zLw{%O!ugIot)%#ZOTfJ}fzSa9y?_0g^>U~%5yL#W%`>x)1^}eh3 zUA^z>eOK>$df(Igp5FKLzNhy+z3=ILPw#tr-_!d(z33^{*Cr;w11=h8|~j{|3>>a+P~5M zjrMP}f1~{y?cZqsM*BC~ztR4U_HVR*qx~E0-)R3v`#0LZ(f*D0Z?u1-{TuDyX#Yn0 zH`>3^{*Cr;w11=h8|~j{|3>>a+P~5MjrMP}f1~{y?cZqsM*BC~ztR4U_HVR*qx~E0 z-)R3v`#0LZ(f*D0Z?u1-{TuDyX#Yn0H`>3^{*Cr;w11=h8|~j{|3>>a+P~5MjrT9q z{|WVfLj9jm|0mS{3H5(M{hv_(C)EE5^?ySBpHTlN)c*5WHv z5WHv<7x4u#g7(0 zTKs77qs5OFKU(}~@uS6$7C&12Xz`=Pj}|{#{Alr`#g7(0TKs77qs5OFKU(}~@uS6$ z7C&12Xz`=Pj}|{#{Alr`#g7(0TKs77qs5OFKU(}~@uS6$7C&12Xz`=Pj}|{#{Alr` z#g7(0TKs77qs5OFKU(}~@uS6$7C&12Xz`=Pj}|{#{Alr`#g7(0TKs77qs5OFKU(}~ z@uS6$7C&12Xz`=Pj}|{#{Alr`#g7(0TKs77qs5OFKU(}~@uS6$mS2VH@2dW;>hG%l zuIlfq{;ulps{XF(@2dW;>hG%luIlfq{;ulps{XF(@2dW;>hG%luIlfq{;ulps{XF( z@2dW;>hG%luIlfq{;ulps{XF(@2dW;>hG%luIlfq{;ulps{XF(@2dW;>hG%luIlfq z{;ulps{XF(@2dW;>hG%luIlfq{;ulps{XF(@2dW;>hG%luIlfq{;ulps{XF(@2dW; z>hG%luIlfq{;ulps{XF(@2dW;>hG%luIlfq{;ulps{XF(@2dW;>hG%luIlfq{;ulp z{;9w63u^fVwfurwenBn2pq5|Im0!@6U(l6b(3RiZmEYWz-`o{HuK01qk1Kv$@#Bgg zSNyo*#}z-W_;JOLD}G$@Q&~uK01qk1Kv$@#BggSNyo*#}z-W_;JOLD}G$@Q&~uK01qk1Kv$@#BggSNyo* z#}z-W_;JOLD}G$@U;|BYuqdVgIrxKcFW+peH||CqJOKen0t5 z$Pehr59rAc=*bUg z)%UUT16uh3t^9yien2Zfpp_rc$`5Ge2ek47TKNI3{D4+|Kr26>l^@W`4`}5FwDJR5 z`2nr`fL4A$D?gx>AJEDVXypgA@&j7=0j>OiR(?P$KcJN#(8>>Jl^@W`4`}5FwDJR5`2nr`fL4A$D?gx> zAJEDVXypgA@&j7=0j>OiR(?P$KcJN#(8>>Jw{8;g0#g7#~R{WseES7q+f9udXwJxncty}A>^=SRH z*8W2K3+*qoztH|d`wQ(aw7<~)Li-EtFSWna{!;r(?Ju>z)c#WYOYJYUztsLp`z!6Q zw7=5+O8YDAue86?{!05R?eEk6KJD+*{yy#R)BZl~@6-N1?eEk6KJBlyzt;X*`)lp5 zwZGQ>TKjA5ueHC{{;u|SwZE(VUG48`e^>jv+TYdwuJ(7eztR3i`y1_Vw7=2*M*AD> zZ?wPB{zm(I+TYXup7!^&zo-2@?eA%SPy2h?-_!n9`&;dAwZGN=R{LA+Z?(VG{#N@F zKf-=Ld|nLcnL~Q!ke)fDXAbF^Lwe?ro;jpv4(XXgdghRxIizO}>6t@%=8&E_q-PH4 znL~Q!ke)fDXAbF^Lwe?ro;jpv4(XXgdghRxIizO}>6t@%=8&E_q-PH4nL~Q!ke)fD zXAbF^Lwe?ro;jpv4(XXgdghRxIizO}>6t@%=8&E_q-PH4nL~QMke)B3=L_liLVCWC zo-d^53+eendcKgJFQn%S>G?u>zL1_Tq~{Ci`9gZWke)B3=L_liLVCWCo-d^53+een zdcKgJFQn%S>G?u>zL1_Tq~{Ci`9gZWke)B3=L_liLVCWCo-d^53+eendcKgJFQn%S z>G?u>zL1_Tq~{Ci`9gZWke)B3=L_*Aq~{Ci`9gZWke)B3=L_liLVCWCo-d^53+een zdcKgJFQn%S>G?u>zL1_Tq~{Ci`9gZWke)B3=L_liLVCWCo-d^53+eendcKgJFQn%S z>G?u>zL1_Tq~{Ci`9gZWke)B3=L_liLVCWCo-d^53+eendcKgJFQn%S>G?u>zL1_T zq~{Ci`9gZWke)B3=L_liLVCWCo-d^53+eendcKgJFQn%S>G?u>zL1_Tq~{Ci`9gZW zke)B3=L_liLVCWCo-d^53+eendcKgJFQn%S>G?u>zL1_Tq~{Ci`9gZWke)B3=L_li zLVCWCo-d^53+eendcKgJFQn%S>G?u>zL1_Tq~{Ci`9gZWke)B3=L_liLVCWCo-d^5 z3+eendcH{eVaV@C^{rIjO7*Q&-%9nZRNqSVtyJGi^{rIjO7*Q&-%9nZRNqSVtyJGi z^{rIjO7*Q&-%9nZRNqSVtyJGi^{rIjO7*Q&-%9nZRNqSVtyJGi^{rIjO7*Q&-%9nZ zRNqSVtyJGi^{rIjO7*Q&-%9nZRNqSVtyJGi^{rIjO7*Q&-%9nZRNqSVtyJGi^{rIj zO7*Q&-%9nZRNqSVtyJGi^{rIjO7*Q&-%9nZRNqSVtyJGi^{rIjO7*Q&-^%q(dgYQ{ zxujPv>6J@*<&s{xq*pHKl}mc%l3ux_S1#$5OM2y!Ub&=KF6os^dgYQ{xujPv>6J@* z<&s{xq*pHKl}mc%l3ux_S1#$5OM2y!Ub&=KF6os^dgYQ{xujPv>6J@*<&s{xq*pHK zl}mc%l3ux_S1#$5OM2y!Ub&=KF6os^dgYQ{xujPv>6J@*<=ouZ z^7ASA`IP*8N`5{iKcAAHPsz`xB^3em*5XpOT+X$4isn;gMc=q!%9Pg-3efkzRPD7ar+_M|$CrUU;Mz9_fWg zdf|~?c%&B|>4isn;gMc=q!%9Pg-3efkzRPD7ar+_M|$CrUU;Mz9_fWgdf|~?c%&B| z>4isn;gMc=q!%9Pg-3efkzRPD7ar+_M|$CrUU;Mz9_fWgdf|~?c%&B|>4isn;gMc= zq!%9Pg-3efkzRPD7ar+_M|$CrUU;Mz9_fWgdf|~?c%&B|>4isn;gMc=q!%9Pg-3ef zkzRPD7ar+_M|$CrUU;Mz9_fWgdf|~?c%&B|>4isn;gMc=q!%9Pg-3efkzRPD7ar+_ zM|$CrUU;Mz9_fWgdf|~?c%&B|>4isn;gMc=q!%9Pg-3efkzRPD7ar+_M|$CrUU;Mz z9_fWgdf|~?c%&B|>4isn;TiE`#E%g_M*JA@W5kaUKSul*@ngh~5kE%!81ZAoj}bpc z{21|L#E%g_M*JA@W5kaUKSul*@ngh~5kE%!81ZAoj}bpc{21|L#E%g_M*JA@W5kaU zKSul*@ngh~5kE%!81ZAoj}bpc{21|L#E%g_M*JA@W5kaUKSul*@ngh~5kE%!81ZAo zj}bpc{21|L#E%g_M*JA@W5kaUKSul*@ngh~5kE%!81ZAoj}bpc{21|L#E%g_M*JA@ zW5kaqemwExi62k=c;d$sKc4vUq)!d$T|;`;klr<0Lv5*O1;dq<0PJT|;`;klr<0Lv5*O1;dq<0PJT|;`;klr<0Lv5*O1;d zq<0PJT|;`;klr<0Lv5*O1;dq<0PJT|;`; zklr<0Lv5*O1;dq<0PJT|;`;klr<0Lv5*O1;d<(H81OGx=8r1+8IM~WXQex&%3;zx=f zDSo8*k>W>+A1QvM_>tmAiXSO{r1+8IM~WXQex&%3;zx=fDSo8*k>W>+A1QvM_>tmA ziXSO{r1+8IM~WXQex&%3;zx=fDSo8*k>W>+A1QvM_>tmAiXSO{r1+8IM~WXQex&%3 z;zx=fDSo8*k>W>+A1QvM_>tmAiXSO{r1+8IM~WXQex&%3;zx=fDSo8*k>W>+A1QvM z_>tmAiXSO{r1+8IM~WXQex&%3;zx=fDSo8*k>ZEukwWw7q51UCe0pd;Jv5&lnokeS zr-$a#L-XmO`Sj3ydT2g9G@l-tPY=zfhvw5m^XZ}a^w4~IXg)nOpB|b|56!2C=F>y- z>7n`b(0qDmK0P#_9-2=N&8LUv(?j#=q51UCe0pd;Jv5&lnokeSr-$a#L-XmO`Sj3y zdT2g9G@l-tPY=zfhvw5m^XZ}a^w4~IXg)nOpB|du49#zb<~Kw0o1yv5(EMg-els+` z8Jgb=&2NV0H$(HAq4~|w{AOr=Gc>;$n%@k~Z-(YKL-U)V`OVP$W@vshG`|^|-we%f zhUPaz^P8dh&G=I;$n%@k~Z-(YKL-U)V z`OVP$W@vshG`|^|-we%fhUPaz^P8dh&CvX2Xnr#^zZsg}49#zb<~Kw0o1yv5(EMg- zels+`8Jgb=&2NV0H$(HAq4~|w{AOr=Gc>;$n%@k~Z-(YKL-U)V`OVP$W@vshG`|^| z-we%fhUPaz^P8dh&CvX2Xnr#^zZsg}49#zb<~Kw0o1yv5(EMg-els+`8Jgb=&2NV0 zH$(HAq4~|w{AOr=Gc>;$n%@k~Z-(YKL-U)V`OVP$W@vshG`|^|-we%fhUPaz^P8dh z&CvX2Xnr#^zZsg}49#zb<~Kw0o1yv5(EMg-els+`8Jgb=&2NV0H$(HAq4~|w{AOr= zGc>;$n%@k~Z-(YK~_>81Jf(tLVpKD{)bUYbuY&8L^< z(@XQ|rTO&Ie0pgU*nolpyr~_>81Jf(tLVpKD{)bUYbuY&8L^<(@XQ|rTO&Ie0pg< zy_Dx$%JVJd`Ihp0OL@MfJl|5DZz<2Wl;>N@^DX81mhyZ{dA_AQ-%_4$DbKf*=UdA2 zE#>)^@_b8qzNI|hQl4)q&$pE4Tgvk-<@uKKd`o%0r99tKo@6OcvXm!T%9AYRNtW^? zOL>x|Jjqg?WGPRwlqXrrlPu*)mhvP^d6K0($x@zVDNnMLCt1pqEagd-@+3=nlBGP! zQl4ZfPqLIJS;~_vx|Jjqg?WGPRw zlqXrrlPu*)mhvP^d6K0($!hDHq}NnEYt^$>J!{poRy}LgvsOK8)w5PTYt^$>J!{po zRy}LgvsOK8)w5PTYt^$>J!{poRy}LgvsOK8)w5PTYt^$>J!{poRy}LgvsOK8)w5PT zYt^$>J!{poRy}LgvsOK8)w5PTYt^$>J!{poRy}LgvsOK8)w5PTYt^$>J!{poRy}Lg zvsOK8)w5PTYt^$>J!{poRy}LgvsOK8)w5PTYt^$>J!{poRy}LgvsOK8)w5PTYt^$> zJ!{u9<>R*Uaa;Mgt$f^8K5i=?x0R3E%ExWxR*Uaa;Mg zt$f^8K5i=?x0R3E%ExWxW&wpA=k0d?jlOFR)kNKp>e9~h+ z=`o-5m`{4lCq3qq9`i|$`J~5u(qlg9F`x99PkPKJJ?4`h^GT2Sq{n>HV?OCIpY)hd zddw$1=93=tNssxY$9&RbKIt)^^q5b2%qKnOlOFR)kNKp>e9~h+=`o-5m`{4lCq3qq z9`i|$`J~5u(qlg9F`x99PkPKJJ?4`h^GT2Sq{n>HV?OCIpY)hdddw$1=93=tNssxY z$9&RbKIt)^^q5b2%qKnOlOFR)kNI5Rq{n>HV?OCIpY)hdddw$1=93=tNssxY$9&Rb zKIt)^^q4C>=1Pyb(qpdlm@7TzN{_kHW3Kf4D?R^8&%ffwiXSU}toX6w$BG{-eysSh z;>U^~D}Jo_vEs*yA1i*W__5;0iXSU}toX6w$BG{-eysSh;>U^~D}Jo_vEs*yA1i*W z__5;0iXSU}toX6w$BG{-eysSh;>U^~D}Jo_vEs*yA1i*W__5;0iXSU}toX6w$BG{- zeysSh;>U^~D}Jo_vEs*yA1i*W__5;0iXSU}toX6w$BG{-eysSh;>U^~D}Jo_vEs*y zA1i*W__5;0iXSU}toX6w$BG{-eysSh;)n7;pYlMT@<5;RK%eqJpYlMT@<5;RK%eqJ zpYlMT@<5;RK%eqJpYlMT@<5;RK%eqJpYlMT@<5;RK%eubQ~K4Eel?|EP3c!l`qh$t zwWMDy=~qko)slX-q+c!RS4;ZUl76+MUoGiZOZwH4ezl}uE$LTF`qh$twWMDy{r*e( z)slX-q+c!RS4;ZUl76+MUoGiZOZwH4ezl}uE$LTF`qh$twWMDy=~qko)slX-q+c!R zS4;ZUl76+MUoGiZOZwH4ezl}uE$LTF`qh$twWMDy=~qko)slX-q+c!RS4;ZUl76+M zUoGiZOZwH4ezl}uE$LTF`qh$twWMDy=~qko)slX-q+c!RS4;ZUl76+MUoGiZOZwH4 zezl}uE$LTF`qh$twWMDy=~qko)slX-q+c!RS4;ZUl76+MUoGiZOZwH4|3S(BpyYp0 z;zx-eC4Q9n@rfUw`0lS{C#NtJ~V$Hn!gXt--qV!L-Y5c`TNlPeQ5qZG=CqO zzYopdhvx4?^Y@|o`_TM-X#PGle;=B^56$0)=I=xE_o4aw(ENR9{ysE+ADX`p&EJRS z??dzVq51pJ{C#NtJ~V$Hn!gXt--qV!L-Y5c`TNlPeQ5qZG=CqOzYopdhvx4?^Y@|o z`_TM-X#PGluNsRqYcmFiuo-j(WIsos_9U8&xc>RqYcmFiuo-j(WIsos_9U8&xc>RqYcmFiuo-j(WI zsos_9U8&xc>RqYcmFiuo-j(WIsos_9U8&xc>RqYcmFiuo-j(WIsos_9U8&xc>RqYc zmFiuo-j(WIsos_9U8&xc>RqYcmFiuo-j(WIsos_9U8&xc>RqYcmFiuo-j(WIsos_9 zU8&xc>RqYcmFiuo-j(WIsos_9U8&xc>RqYcmFiu&-f2F+G@oCZ&o9mAm*(?J^ZBLu z{L*}WX+FO+pI@5KFU{wd=JQMQ`K9^%(tLhtKEE`dUz*P^&F7cq^GoyjrTP5Qe12&@ zzcim;n$IuI=a=U5OY`}q`TWv+erZ0xG@oCZ&o9mAm*(?J^ZBLu{L*}WX+FO+pI@5K zFU{wd=JQMQ`K9^%(tLhtKEE`dUz*P^&F7c$s7iTMr97%q9#tuis+31n%A+dfQI+zj zN_kYJJgQP2RVk0Elt)#{qblW5mGY=cc~qr5s!|?RDUYg@M^(zBD&4RJP;FdnPr4MfDgIoIGmOi+p4{qs$Tl(OZKDeb1Zs~(t`rwv6 zxTOzn>4RJP;FdnPr4MfDgIoIGmOi+p4{qs$Tl(OZKDeb1Zs~(t`rwv6xTOzn>4RJP z;FdnPr4MfDgIoIGmOi+p4{qs$Tl(OZKDeb1Zs~(t`rwv6xTOzn>4RJP;FdnPr4MfD zgIoIGmOi+p4{qs$Tl(OZKDeb1Zs~(t`rwv6xTOzn>4RJP;FdnPr4MfDgIoIGmOi+p z4{qs$Tl(OZKDeb1Zs~(t`rwv6xTOzn>4RJP;FdnPsSo~l9J*X{zPYJy{kKl7OY2YT z*7|BaT5Er){gw7t+FxmZrTvxmSK41`f2IAE_E*~9r~Q4}->3b3+TW-BecIor{e9Zs zr~Q4}Uu%D@{k8Vj+Fxsbt^Kw3*VrS+$EYkjpIt)JG~Uub`!{e|`y+FxjYq5XyS z7usKFf1&-Q_LtgUYJaKyrS_NFUuu7;{iXJo+FxmZrTvxmSK41`f2IAE_E*|pX@8~t zecIor{e9Zsr~Q4}->3b3+TW-BecIor{k8Vj+Fxsbt^Kw3*Vblhz}t?g!mBU&)4rqdB6dzK2Nbw=XhZG-Dd`R&j#fKChQhZ49 zA;pIjA5wfsJzu1rFH-zS@gv2L6hBh@Nbw`Zj}$*r{7CU5#g7y}Qv68qBgKytKT`Zi z@gv2L6hBh@Nbw`Zj}$*r{7CU5#g7y}Qv68qBgKytKT`Zi@gv2L6hBh@Nbw`Zj}$*r z{7CU5#g7y}Qv68qBgKytKT`Zi@gv2L6hBh@Nbw`Zj}$*r{7CU5#g7y}Qv68qBgKyt zKT`Zi@k9P!SN>mD{$E%AUswKLSN>mD{$E%AUswKLSN>mD{$E%AUswKLSN>mD{$E%A zUswKLSN>mD{$E%AUswKLSN>mD{$E%AUswKLSN>mD{$E%AUswKLSN>mD{$E%AUswKL zSN>mD{$E%AUswKLSN>mD{$E%AUswKLSN>mD{$E%AURVBJSN>jC{$5x9URVBJSN>jC z{$5x9URVBJSN>jC{$5x9URVBJSN>jC{$5x9URVBJSN>jC{$5x9URVBJSN>jC{$5x9 zURVBJSN>jC{$5x9URVBJSN>jC{$5x9URVBJSN>jC{$5x9URVBJSN>jC{$5x9URVBJ zSN>jC{$5x9URVBJSN>jC{$5x9URVBJSN>jC{$5x9URVBJSN>jC{$5x9URVBJSN>jC z{$5x9URVBJSN>jC{$5x9URVBJSN>jC{$5x9URVBJSN>jC{$5x9URVBJSN>jC{$5x9 zURVBJSN>jC{$5x9URVBJSN>jC{$5x9URVBJSN>jC{$5x9URVBJSN>jC{$5x9URVBJ zSN>jC{$5x9URVBJSN>jC{$5x9URVBJSN>jC{$5x9URVBJSN>jC{$5x9URVBJSN>jC z{$5x9URVBJSN>jC{$5x9URVBJSN>jC{$5x9URVBJSN>jC{$5x9URVBJSN>jC{$5x9 zURVBJSN>jC{$5x9URVBJSN>jC{$5x9URVBJSN>jC{$5x9URVBJSN>jC{$5x9URVBJ zSN>jC{$5x9URVBJSN>jC{$5x9URVBJSN>jC{$5x9URVBJSN>jC{$5x9URVBJSN>kN z#g7(0TKs77qs5OFKU(}~@uS6$7C&12Xz`=Pj}|{#{Alr`#g7(0TKs77qs5OFKU(}~ z@uS6$7C&12Xz`=Pj}|{#{Alr`#g7(0TKs77qs5OFKU(}~@uS6$7C&12Xz`=Pj}|{# zd}#5Z#fKIjT6}2np~Z(5A6k59@u9_s79U!CXz`)NhZY}Nd}#5Z#fKIjT6}2np~Z(5 zA6k59@u9_s79U!CXz`)NhZY}Nd}#5Z#fKIjT6}2np~Z(5A6k59@u9_s79U!CXz`)N zhZY~M_;AICD?VKD;ffDee7NGn6(6qnaK(o!K3wtPiVs(OxZ=YVAFlXt#fK|CT=C(G z4_AD+;=>gmuJ~}phbul@@!^UOSA4kQ!xbN{_;AICD?VKD;ffDee7NGn6(6qnaK(o! zK3wtPiVs(OxZ=YVAFlXt#fPisi>v30D}G$@Q&~uK01qk1Kv$@#BggSNyo*#}z-W_;JOLD}G$@ zHXz#D@_dMtm6YVZ?_KA4Yr_@nOV=5g$f;81Z4mhY=q}d>HXz#D@_dMtpeU!xJB# z`0&JsCq6v!;fW7Ve0bu+6Ca-V@Wh8FK0NW^i4RYFc;dqoAD;N|#D^z7Jn`X)4^Mn} z;=>akp7`*@hbKNf@!^RNPkead!xJB#`0&JsCq6v!;fW7Ve0bu+6Ca-V@Wh8FK0NW^ zi4RYFc;dqoAD;N|#D^z7Jn`X)4^Mn};=>akp7`*@hbKNf@!^RNPtO-m&lgYpc;d$s zKc4vU#E&O_Jn`d+A5Z*v;>Qy|p7`;^k0*XS@#BdfPyBe|#}hxE`0>P#Cw@Hfw{8;g0#g7#~R{U7; zW5tgZKUVx$@ngl06+c$|kpBBh|9z$ZzS4hR>A$b^-&gwYEB*JC{`*S*eWm}t(tls+ zzpwP)SNiWO{r8pr`%3?PrT@Ote_!dpuk_zn`tK|K_m%$pO8A$b^-&gwYEB*JC{`*S*eWm}t(tls+zpwP)SNiWO{r8pr z`%3?PrT@Ote_!dpuk_zn`tK|K_m%$p`@6nckJeA?<(fSCqzC_`2mho8|D*^1qzC_` z2mho8|D*^1qzC_`2mho8|D*^1qzC_`2mho8|D*^1qzC_`2mho8Z|T8XdhnJWyrlA_oi@RlCDr3Y{6!CQLpmL9yN2XE=YTYB)89=xRoZ|T8XdhnJWyrlA_oi@RlCD zr3Y{6!CQLpmL9yN2XE=YTYB)89=xRoZ|T8XdhnJWyrlA_oi@RlCDr3Y{6!CQLp zmL9yN2XE=YTYB)89=xRoZ|T8XdhnJWyrlA_oi@RlCDr3Y{6!CQLpmL9yN2XE=Y zTYB)89=xRoZ|T8XdhoU$T=n>>9$(ett9pD@kFVfIL_^KXX)#Iyrd{vLH>hY)^kLvNL9*^qrs2-2%@u(h;>hY)^kLvNL9*^qr zs2-2%@u(h;>hY)^kLvNL9*^qrs2-2%@u(h;>hY)^kLvNL9*^qrs2-2%@u(h;>hY)^ zkLvNL9*^qrs2-2%@u(h;>hY)^kLvNL9*^qrs2-2%@u(h;>hY)^kLvNL9*^qrs2-2% z@u(h;>hY)^kLvNL9*^qrs2-2%@u(h;>hY)^kLvNL9*_JvM*bWle~ytq$H<>!!293LVTfA-Yw#C~PZ(F==@wUa=7H?a;ZSl6n+ZJzIylwHe#oHEdTfA-Y zw#C~PZ(F==@wUa=7H?a;ZSl6n+ZJzIylwHe#oHEdTfA-Yw#C~PZ(F==@wUa=7H?a; zZSl6n+ZJzIylwHe#oHEdTfA-Yw#C~PZ(F==@wUa=7H?a;ZSl6n+ZJzIylwHe#oHEd zTfA-Yw#C~PZ(F==@wUa=7H?a;ZSl6n+ZJzIylwHe#oHEdTfA-Yw#C~PZ(F==@wUa= z7H?a;ZSl6n+ZJzIylwHe#oHEdTfA-aR(@G6zpR#DR?9D|<(Jj+%WC;$wfwSLepxNQ ztd?I^%P*_tm(}viYWZcg{IXhpSuMY;mS0xOFRSI3)$+?~`DL~IvRZywEx)XmUslU6 ztL2x~^2=)ZWwrdWT7FqAzpR#DR?9D|<(Jj+%WC;$wfwSLepxNQtd?I^%P*_tm(}vi zYWZcg{IXhpSuMY;mS0xOFRSI3)$+?~`DL~IvRZywEx)XmUslU6tL2x~^2=)ZWwrdW zT7FqAzpR#DR?9D|<(Jj+%WC;$wfwSLepxNQtd?I^%P*_tm(}viYWZcg{IXhpSuMY; zmS0xOFRSI3)$+?~`DL~IvRZywEx)WzKd-5Org}W8$D?{Ys>h>xJgUc|dOWJfqk255 z$D?{Ys>h>xJgUc|dOWJfqk255$D?{Ys>h>xJgUc|dOWJfqk255$D?{Ys>h>xJgUc| zdOWJfqk255$D?{Ys>h>xJgUc|dOWJfqk255$D?{Ys>h>xJgUc|dOWJfqk255$D?{Y zs>h>xJgUc|dOWJfqk255$D?{Ys>h>xJgUc|dOWJfqk255$D?{Ys>h>xJgUc|dOWJf zqk255$D?{Ys>h>xJgUc|dOWJfqk255$D?{Ys>h>xJgUc|dOWVj@-H0u7moZ3NB)H) z|H6@f;mE&mUH zzi{MVIPxzX`4^7-3rGHiBmcsYf8of#aO7V&@-H0u7moZ3NB)H)|H6@f;mE&mUHzi{MVIPxzX`4^7- z3rGHizqR%!e#qZ&4M+ZlBY(q@zv0N=aO7_|@;4m$8;<-9NB)K*f5VZ#;mF@` z4M+ZlBY(q@zv0N=aO7_|<$jR*XX0&(w=Lebc-!J_i?=P_ws_m(ZHu=p-nMw# z;%$q!E#9_x+v07Dw=Lebc-!J_i?=P_ws_m(ZHu=p-nMw#;%$q!E#9_x+v07Dw=Leb zc-!J_i?=P_ws_m(ZHu=p-nMw#;%$q!E#9_x+v07Dw=Lebc-!J_i?=P_ws_m(ZHu=p z-nMw#;%$q!E#9_x+v07Dw=Lebc-!J_i?=P_ws_m(ZHu=p-nMw#;%$q!E#9_x+v07D zw=Lebc-!J_i?=P_ws_m(ZHu=p-nMw#;%$q!E#9_x+v07Dw=Lebc-!J_i?=P_ws_m( zZHu=p-nMw#;%$q!E#9_x+v07Dw=Lebc-!J_o44{4Zutqf{DfP6!Yx1HmY;CTPq^hL z-0~A{`3blDgj;^XEkEIwpK!}hxaB9@@)K_P3Ag-&TYkbVKjD_2aLZ4)|c|NA-AAk4N=*RF6mXcvO!^ z^>|c|NA-AAk4N=*RF6mXcvO!^^>|c|NA-AAk4N=*RF6mXcvO!^^>|c|NA-AAk4N=* zRF6mXcvO!^^>|c|NA-AAk4N=*RF6mXcvO!^^>|c|NA-AAk4N=*RF6mXcvO!^^>|c| zNA-AAk4N=*RF6mXcvO!^^>|c|NA-AAk4N=*RF6mXcvO!^^>|c|NA-AAk4N=*RF6mX zcvO!^^>|c|NA-AAk4N=*RF6mXcvO!^^>|c|NA-AAk4N=*RF6mXcvO$)Pd!%uO#bd8 zfA^8U`^evY_Q(ANjkF{M|?X?jwKqk-z)M z-+kooKJs@T`MZz&-ADfJBY*dizx&AFedO;x@^>HkyN~?cNB-_3fA^8U`^evY_Q(ANjkF{M|?X?jwKqk-z)M-+kooKJs@T`MZz& z-ADfJBY*d4@uS5L`M;0+-$(xMBmei2|NF@QedPZ>@_!%szmNRiNB-|4|M!vq`^f)& zBk^lS1|9#~DKJtGb z`M;0+-$(xMBmei2|NF@QedPZ>@_!%szmNRiNB-|4|M!vq`^f)&N>- zDy@8#R=!FrU!|3=(#lt9<*T&vRa*Hft$dYMzDg@!rIoMJ%2#RStF-b}TKOuie3e$d zN-JNbm9NsuS83&|wDMJ2`6{h^l~%q=D_^CRuhPm_Y2~Z5@>N>-Dy@8#R=!FrU!|3= z(#lt9<*T&vRa*Hft$dYMzDg@!rIoMJ%2#RStF-b}TKOuie3e$dN-JNbm9NsuS83&| zwDMJ2`6{h^l~%q=D_^CRuhPm_Y2~Z5@>N>-Dy@8#R=!FrU!|3=(#lt9<*T&vRa*Hf zt$dYMzDg@!rIoMJ%2#RStF-b}TKOuie3e$dN-JNbm9NsuS83&|wDMD0`6;dZlvaL9 zD?g=`pVG=tY2~N1@>5#*DXsjJR(?t=Kc$tQ(#lV1<)^grQ(E~ct^AZ$eo8AprInx3 z%1>$Kr?m1@TKOrh{FGLHN-IC5m7mhePif_+wDMD0`6;dZlvaL9D?g=`pVG=tY2~N1 z@>5#*DXsjJR(?t=Kc$tQ(#lV1<)^grQ(E~ct^AZ$eo8AprInx3%1>$Kr~KC1pZK7B zl~%q=D_^CRuhPm_Y2~Z5@>N>-Dy@8#R=!FrU!|3=(#lt9<*T&vRa*Hft$dYMzDg@! zrIoMJ%2#RStF-b}TKOuie3e$dN-JNbm9NsuS83&|wDMJ2`6{h^l~%q=D_^CRuhPm_ zY2~Z5@>N>-Dy@8#R=!FrU!|3=(#lt9<*T&vRa*Hft$dYMzDg@!rIoMJ%2#RStF-b} zTKOuie3e$dN-JNbm9NsuS83&|wDMJ2`6{h^l~%q=D_^CRuhPm_Y2~Z5@>N>-Dy@8# zR=!FrU!|3=(#lt9<*T&vRa*Hft$dYMzDg@!rIoMJ%2#RStF-b}TKOuie3e$dN-JNb zm9NsuS83&|wDMJ2`6{h^l~%q=D_^CRuhPm_Y2~Z5@>N>-Dy@8#R=!FrU!|3=(#lt9 z<*T&vRa*Hft$dYMzDg@!rIoMJ%2#RStF-b}TKOuie3e$dN-JNbm9NsuS83&|wDMJ2 z`6{h^l~%q=D_^CRuhPm_Y2~Z5@>N>-Dy@8#R=!FrU!|3=(#lt9<*T&vRa*Hft$dYM zzDg@!rIoMJ%2#RStF-b}n)xdDtpDTx|DT`nf4=|!`QHEM`~IKr`G5Yr|L4z&pA#R9 zpAbJDem4GK|L1~7ZvW>3zwG~W2f!d02mpe9Kp(gV=z(~^9asm@fpS0`_y)LvY`_|r z2B3jvKp8j&h=E|h7uW@Gfm%Qocm-I2RKOG%1wesLKohtGD1k`85m*EefkHqK_yhQW zJirc^1K@x*Kn*wp$bc}w3)lj%fGR)=cmkM!B)|w50)T)XKnJ)1Xn+{N1y})4fD%9i z_y9PN4CDf-Kqim~0iO)`T)?LSJ`?bXfX@Sb8sM`4p9J_Ez^4E{1Mmre_y4^6 z=e~v~;B*gXd$1SYxAU%@_w2l5=lweG)_JeaJ9XZt^DdqD=)6Pc{Wd2h}; zbKaNpuAKMe%lk0gh@tM%i;GTN^x>il7d^P>z(xNpx@*x}i_Ti~)uO8wJ+1}qFWZdvgnjWpDem$(Iblv zS@g%EI~KjM=!`{QEV^RR6N`>m^uwYX7QL|Oghd}Lx?s@*iw;=yzoPpUy|3teMc*sB zUeWW4j#u=%qT3a{uIO|{pDVgt(c_8^SM;}{yA{2y=xjw_E4o_I(~1ro^w*%f2E8@t ztU+H5x@yoX)1aFMy)@{gK_3meXwXB04jS~&pnC?rGw7T_-we8D&@+RM8T8Ab zTL!%{=#)X947y~{BZCeZ^v9q(2E8%pj6q)vx?<21gN_*V!=M`my)fv6K_3jdV9*1D z4jA;mp!-Fs5rDn06~JBqn*rl z7WA~BqXqpe=w?AL3p!cQ$AT^v^su0V1^p}NUP12)I#c7^~Y2nQ+rJ1F?GjO9aD4s zso#pdu;a>>D|@bNx}vsHV@!oH^~F>dQ(H`BF?GdM6;o48MKSfnR1;H6OeHaO#8eSe zLreuR^}|#TQ#(xMFm=OJ4O25r#W3~4R0~rpOrnLPqeO)g^+{AGQJX|%5_L&bB~g<^MH2N$R3lM~L?se+ zNK_$FgG2=q^+!}6QF}z?5p_pY9Z_>c#S!&JR2xxiM5PgRMpPM5V?>1!^+i+{QCmc1 z5p_jW6;V?}MG^HxR1;B4L?sb*L{t$`Lqr7;^+Qw-Q9DHC5OqUT4YiKEuooVB@zjgQ zUZ}g&2~j0PjSv+=)CW-=L~Rh2LDU6N6+}%C6+zSkQ4K^b5S2jG0Z|1+4G4G$GO)bCKeL+uWgJJjt^wL{Gg6+6`HP_09)4wX98=}@IZjSdw$ z)aOv0Lv0R~In?D)l|#)96*tt|P;Eo44V5<3*-&LejSUqx)Ynj5Lv0O}HPqEmRYOe; z6*bh;P)$QE4V5(1(NIN04Gk4E)Xz{oL+#AfWC8ZV*#Zt1pte#kL$wUGGE~Y?CqtDC zH8NDlP#;5e47D*-#!weSRSY#TRK!pZLp2PwFjT@&2SXJMH851bQ2#>p3$-s)zEJl< z)eAK*RJ>5{LbVIEE`!=SsI64NPy<5+4D~Nmzfk)^ycv>q4aq zbuLu7P~$>{3-v8jw@}+cWeasJRJBmkLPZPpEL5{l%R(g!bu3h|P{Tq63-v2huTZ-} z$@K}7}i6jW1COF<h7ZMF6!>0?k?)?qV6v0?xOB4>h7ZMF6!>0?k?)?qV6v0?xOB4>h7ZM zF6!>0?k?)?qV6v0?xOB4>h7ZMF6!>0?k?)?qV6v0?xOB4>h7ZMF6!>0?k?)?QoTU= z0`&_NFi^oj2?I3@6fsc6AQ^*n3=}d*$sj3%v*_ zL!HB+(BV+&a42;+)H)oB9S+qFhjNEQy~Cm4;ZX5#D0w*4JRFK14pk3_vWG+6!=dou zQ2B5ueK^!U9Eu+f)endAheQ3t;gIgKZS`;cwB{Hnn^*tVp>=9qT7O!%*4kfce^MT@ zo%LIn_E*|pX@8~tmG)QKUul1({gw7t+TW-BecIor{e9Zsr~Q4}->3b3+TW-BecE4Z zf35ws_Sf29Yk#f%`R}i_zt;X*`@7oT)&8#bceTH({ax+vYJXSzyV~E?{zm&7?QgWd z(f&sJ8|`njztR3i`y1`=X@5`qd)nXA{+{;tw7;kQJ?-yle;P9%4VsTe%}2xLqjB@m z!1-w8d^~invGdX3Ia_PFY_9!Vht{ceY5i&4T3@X-7EhtivfcJuhxQlRUub`!{e|`y z+FxjY;>Qy|p7`;^k0*XS@#BdfPyBe|#}hxE`0>P#Cw@Hfakp7`*@hbKNf@!^RNPkead!xJB# z`0&JsCq6v!;fW7Ve0bu+6Ca-V@Wh8FK0NW^i4RYFc;dqoAD;N|#D^z7Jn`X)4^Mn} z;=>akp7`*@hbKNf@!^RNPkead!xJB#`0&JsCq6v!;aQd>K0NVZ#fKFiR(x3TVa10P zA69%=@nOY>6(3f7Sn*-ShZP@Id|2^e#fKFiR(x3TVa10PA69%=@nOY>6(3f7Sn*-S zhZP@Id|2^e#fR1N#p?ND#g7#~R{U7;W5tgZKUVx$@ngl06+c$|Sn*@Uj}<>w{8;g0 z#g7#~R{U7;W5tgZKUVx$@ngl06+c$|Sn*@Uj}<>w{8;g0#g7#~R{U7;W5tgZKUVx$ z@ngl06+c$|Sn*@Uj}<>w{8;g0#g7#~R{U7;W5tgZKUVx$@ngl06+c$|Sn*@Uj}<>w z{8$THEp3&!^+?`&Byc^FxE_gIk7TY#Lf0dy>yg-%4bR^?w7<~)Li-EtFSNhV{zCf; z?Ju;y(Ed{UOYJYUztsLx`%CRFwZGK3b3+TW-BecIor{eAlUPoMwk^IM-W>zUu%D@{ax+vYJXSz zyV~E?{;u|SwZE(VUG48`f1~}4_BYz!Xn&*qjrKR%-)MiM{f+kbw7;kQJ?-yle^2{+ z+TYXup7!^&zo-4J_P5&KYJaQ!t@gLt-)euW{jK&VK7`E=#D@?cLVO7EA;gCeA3}Tx z@gc;A5FbK(2=O7rhY%k^dN;I&ck+9-H!6udSH zUK<6kje^%k!E2-7wNdceD0poYyfzA68wIb8g4agDYop+`QSjO*cx@ECHVR%F1+R^Q z*G9o>qu{ks@Y*PNZ4|sV3SJupuZ@D&M!{>N;I&ck+9-H!6udUqf3FJ0R`A*=cx@EC zHVR%F1+R^Q*G9o>qu{ks@Y*PNZ4|sV3SJupuZ@D&M!{>N;I&ck+9-H!u5DFitg7I( zQSjO*cx@ECHVR%F1+R^Q*G9o>qu{ks@Y*PNZ4|sV3SJupuZ@D&M!{>N;I&ck+9-H! z6udSHUK<6kje^%k!E2-7wNdceD0poYyf&j?>`^ebg4ssFY@=YdQ83#mm~9lyHVS4N z1+$HU*+#)^qhPjCFxx1YZ4}Hl3T7JxvyFn;M!{^OV75^(+bEcA6wEdXW*Y^wje^-m z!EB>owox$KD41;&%r**U8wInCg4ssFY@=YdQ83#mm~9lyHVS4N1+$HU*+#)^qhPjC zFxx1YZ4}Hl3T7JxvyFn;M!{^OV75^(+bEcA6wEdXW*Y^wje^-m!EB>owox$KD41;& z%r**U8wInCg4ssFY@=YdQ83#mm~Fowox$KD41;&%r**U8wInCg4ssFY@=Yd zQ83#mm~9lyHVS4N1+$HU*+#)^qhPjCFxx1YZ4}Hl3T7JxvyFn;M!{^udB5fH((-g^ zdAPJZTUs71EywMbFSgsI)v&S{^AaPn4DiO3U-4<#Ey;#lKKN#j~X4QPT1xX?c*eJV#m{ zBP~ynmWN2oGoNzn2jXn78_JO)~x0xb`LmS;fABcSC8(DDFidH%CJ{#l;>EDwK{ zXFtoMpXJHV^5AEA?z24hS)TeV4}F$rKFcGY<%!Soz-M{hvpnuup7tycdzNQC%cGv< zNzaDDy;$3M%(Fb@SswB%&v=$cJj)ZFMYN6mPb0v6P@LO&hk8Gd7QI6%~>AiY|64go&`27;I3p_Am0KR7s$Cl z)&=q|U~T6C&hq?bd3>`xy;&aKEYEJ1M>orpo8`gH^4w;5Y_mMISsvOf&uo@QHp>&6 z<$=xeyk>b^vplU?9@hMB8Lro%b!uH&e_FTJSL@OGX}w&J_BYz!Xn&*qjrKR%-)MiM z{f+iF+TYXup7!^&zo-2@?eA%SPy2h?-_!n{_P5&KYJaQ!t@gLt-)euW{jK)5+8>(* z!Rq^O9a^W>rS+$EYkjpIt)JG~pAs0;8H`F{Oyw}9k{DB2jHxunR32k0k&y-Yw@&RZ zwZGKz(*8>OEA6kezta9n`z!6Qw7=5+O8fh?zfb%7w7*aL z`?SAL`}?%NPy73{zfb#9z2@QizqR()+Fxsbt^Kw3*VtmAiXSO{ zr1+8IM~WXQex&%3;zx=fDSo8*k>W>+A1QvM_>tmAiXSO{r1+8IM~WXQex&%3;zx=f zDSo8*k>W>+A1QvM_>tmAiXSO{r1+8IM~NRLew6r8;zx-eC4Q9nQQ}95A0>X2_)+3V zImK7}DDk7jj}kvh{3!9G#E%j`O8hACqr{IAKT7;4@uS3#5vvA3k&5_(JKZOMvLUYs)Hv>PXP zB|lCXa>|iYmYnkBWH*bPTVifWxh3S5j9VgZNw_88mV8^{ZArH!+?H%xqHRgGCD@i+ zTVicVwI$S+Oj{ytNwg)m8Bn@RC|yRBE;CB^^8@O zI)M1Zk5Byg#E(z>_{5J-{P@I=PyG1Ak5Byg#E(z>_{5J-{P@I=PyG1Ak5Byg#E(z> z_{5J-{P@I=PyG1Ak5Byg#E(z>_{5JEKU(}~@uS6$7C&12Xz`=Pj}|{#{Alr`#g7(0 zTKs77qs5OFKU(}~@uS6$7C&12Xz`=Pj}|{#{Alr`#g7(0TKs77qs5OFKU(}~@uS6$ z7C&12Xz`=Pj}|{#{Alr`#g7(0TKs77qs5OFKU(}~@uS6$7C&12Xz`=Pj}|{#{Alr` z#g7(0TKs77qs5OFKU(}~@uS6$7C&12Xz`=Pj}|{#{Alr`#g7(0TKs77qs0%#WG`{M zr0o*6OV%z?yCm%rv`fw|F}tMf60%FiE)lyV>=Lj`zAo{)r0ahA<*>U&v@XfI1nZKk zORO%bx`gVIsY|3TiMj;plBY|YE@`@i>5`>OlrBlS1nH8aON=flx`gPGp-Y4=3AzO6 zlAlX_F6p_1=aQXEbS}xc1m}{QOKdKwxrFADnM-6YiMa&kl9x+dE@`=h<&u?4R4z%m z1m%*GOH3{)xrF4BkxN7_3AqI1l8;L~F6p?0XB7;Sz*P4lXgcq~H>QO9n0xxFp~bfJ^=@@wcSk5`Ihe zEz!3m-wJ%q+TN_~&D!3q?akWWtnJO(-mLA-+TN_~&D!3q?akWWtnJO(-mLA-+TN_~ z&Dwrh+b?VTWo^H#?U%LvvbJB=_RHFSS=%pb`()w5>Nd5yO|5QItJ~D-HnqANZ_< zo36S|SKX$oZqrq_>8jgw)or@!HeGd_rrdAo(Y-9p}OA#b;kw_C{DE#&PM@^%Y(yM?^n zLf&p6Z?}-QTgcljrrdAo(Y-9p}OA#b;kw_C{DE#&PM@^%Z?V|lxUyxl_HZXs{CkhfdN z+b!hn7V>rrdAo(Y-9p}OA#b;kw_C{DE#&PM@^%Y(yM?^nLf&p6Z?}-QTgcljrrdAo(Y z-9p}OA#b;kw_C{DE#&PM@^%Y(yM?^nLf&p6Z?}-QTgcljrrdAo(Y-9p}OA#b;kw_C{D zE#&PM@^%Y(yM?^nLf&p6Z?}-QTgcljrrdAo(Y-9p}OA#b;kw_C{DE#&PM@^%Y(yM?^n zLf&p6Z?}-QTgcljF|kbA_5K(_E3}N;FrXx$?{vXRb7Jg_$eMTv6sqGFOnfa?BNDt`u{Hm@C6v z5#~xTSAetSFW^j zg_SF-Tv6ppDpydsa>^A`u9R|xlq;iL5#>rKS3tS)$rVqobaI80E1O)=bG>m0Y3Z$|P4Lxf010NUl6`#gQwGTw&zO zB3Bf-lE@WAt{ig3kSm2;A>_&+R|L5d$Q3}Y{BgyPD}7wyS=#VjV$QU|g3>`9t4jDs-jG;ru&>>^! zkTG<~7&>GO9WsUv8AFGRq2sCf^wfNMYCb(RpPrgePtB*N=F?O2>8bhj)O>nsK0P&` zo|;cj&8Mg4(^K>5srmHOe0pj=JvE=6nom#7r>ExAQ}gMm`SjF$dTKsBHJ_fEPfyLK zr{>dB^XaMi^wfNMYCb(RpPrgePtB*N=F?O2>8bhj)O>nsK0P&`o|;cj&8Mg4(^K>5 zsrmHOe0pj=JvE=6nom#7r>ExAQ}gMm`SjF$dTKsBHJ_fEPfyLKr{>dB^U3x4srmHO ze0pj=JvE=6nom#7r>DbQy|R{U7;W5tgZKUVx$@ngl0 z6+c$|Sn*@Uj}<>w{8;g0#g7#~R{U7;W5tgZKUVx$@ngl06+c$|Sn*@Uj}<>w{8;g0 z#g7#~R{U7;W5tgZKUVx$@ngl06+c$|Sn*@Uj}<>w{8;g0#g7#~R{U7;W5tgZKUVx$ z@ngl06+c$|Sn*@Uj}<>w{8;g0#g7#~R{U7;W5tgZKUVx$@ngl06+c$|Sn*@Uj}<>w z{8;g0#g7#~R{U7;W5tgZKUVx$@ngl06+c$|Sn*@Uj}<>w{8;g0#g7#~R{U7;W5tgp zKk|?tzjbJxT9?**ietBnbX~%A$<`%Wmt^E<(^B;&m}&W^jyMo$<8G@m*iZ6bIHvm zHkZ^~LUYN?B{G-9Tmo~+%Ox(Cv|Pe+$;u@vm!w>Ra>>ahCYO|4LUPH-B_fxETmo{* z$0Z(@bX>x5$;Krbmt(za{>b^jpGj$-X7}mgHN4Z^^wS_LkIJLT|~uCGwWU zTLN#%yA|$^aCd~eBitR~?htqX)~)r`dbEC8Yk#5rh4vTPUub`!{e|`y+FxjYq5XyS zm)c)yf2sYY_LtgUYJaKyrS_NFUuu7){gw7t+FxmZrTvxmSK41`f2IAE_V;OjpZ51@ zf1mdEX@8&g_i2Bh_V;OjpZ3?jv+TYdwuJ(7e zzpMRS?eA)TSNpr#-)MiM{f+iF+TUn@qy3HcH`?E5f1~|9?eA%SPy2h?-_!n{_V={E zr~N(c?`eOl{jK)5+TUt_tNrnDDOHbC^*B|JQ}sAik5lzHRgY8kI8~2R^*B|JQ}sAi zk5lzHRgY8kI8~2R^*B|JQ}sAik5lzHRgY8kI8~2R^*B|JQ}sAik5lzHRgY8kI8~2R z^*B|JQ}sAik5lzHRgY8kI8~2R^*B|JQ}sAik5lzHRgY8kI8~2R^*B|JQ}sAik5lzH zRgY8kI8~2R^*B|JQ}sAik5lzHRgY8kI8~2R^*B|JQ}sAik5lzHRgY8kI8~2R^*B|J zQ}sAik5lzHRgY8kI8~2R^*B|JQ}sAik5lzHRgY8kIRDh+)&5rdTkTKnt5EwY)V>O} zuR`rB29f?-x7Jtd(fVnv{fQqTeuVfD;zx)dA%2AT5#mRPA0d8(_z~hqh#w(-g!mES zM~ELGeuVfD;zx)dA%2AT5#mRPA0d8(_z~hqh#w(-g!mESM~ELGeuVfD;zx)dA%2AT z5#mRPA0d8(_z~hqh#w(-g!mESM~ELGekf`$6}6X&+Dk?4rK0vyQG2PVy;Rg*Drzqk zwU>(8OGWLaqV`fzd#R|sRMcK7YA+SFmx|g;MeU`c_EJ%Msi?hF)LtrTFBP?yirPy> z?WLmjQc-)UsJ&FwUMgxY6}6X&+Dk?4rK0vyQG2PVy;Rg*DrzqkwU>(8OGWLaqV`fz zd#R|sRMcK7YA+SFmx|g;MeU`c_EJ%Msi?hF)LtrTFVp>?)$^MAY2xn@e~P zd&J)({vPr7h`&etJ>u^Xe~Pd&J)({vPr7h`&etJ>u^Xe~Pd&J)( z{vPr7h`&etJ>u^Xe~s*L>Tjz4rs{91{-)}0s{W?x zZ>s*L>Tjz4rs{91{-)}0s{W?xZ>s*L>Tjz4rs{91{-)}0s{W?xZ>s*L>Tjz4rs{91 z{-)}0s{W?xZ>s*L>Tjz4rs{91{-)}0s{W?xZ>s*L>Tjz4rs{91{-)}0s{W?xZ>s*L z>Tjz4rs{91{-)}0s{W?xZ>s*L>Tjz4rs{91{-)}0s{W?xZ>s*L>Tjz4rs{91{-)}0 zs{W?xZ>s*L>Tjz4rs{91{-)}0s{W?xZ>s*L>#w55Q&HoosPRnOeGDbk_J;rgQ=v!RMKE7X)u*Em`WNnOeGDbk_M0Xd&J)({vPr7h`&etJ>u^Xe~^6yXmGx7I`zeoH%;_nfEkNA7U-y{AW@%MPd&J)({vPr7h`&etJ>u^Xe~Pd&J)({vPr7h`&etJ>u^X ze~Pd&J)({vPr7h`&etJ>u^Xe~m9(BpT2Cddr;^rF zN$aVk^;FV&Drr5Hw4O>@PbICVlGam6>#3ymRML7XX+4#+o=RFzC9S8D)>BFAsigH( z(t0XsJ(aYcN?K1Pt*4UKQ%UQor1ezNdMar>m9(BpT2Cddr;^rFN$aVk^;FV&Drr5H zw4O>@PbICVlGam6>#3ymRML7XX+4#+o-KZ~_|f8rvJ+F;iK*y_#8h@-DmyWiotVl_Ol2pgvJ+F;iK*y_#3TM5@%MPd&J)({vPr7h`*=(`&0i+{5|6D5r2>P zd&J)({vPr7h`&etJ>u^Xe~Pd&J)({vPr7h`&etJ>u^Xe~Pd&J)( z{vPr7h`&etJ>u^Xe~Xe3V^$lwEw3U3`>Xe3V^$lwEw3 zU3`>Xe3V^$lwEw3U3`>Xe3V^$lwEw3U3`>Xe3V^$lwEw3U3`>Xe3V^$lwEw3U3`>X ze3V^$lwEw3U3`>Xe3V^$lwEw3U3`>Xe3V^$lwEw3U3`>Xe3V^$lwEw3U3`>Xe3V^$ zlwEw3U3`>Xe3V^$lwEw3U3`>Xe3V^$lwEw3U3`>Xe3V^$lwEw3U3`>Xe3V^$lwEw3 zU3`>Xe3V^$lwEw3U3`>Xe3V^$lwEw3U3`>Xe3V^$lwEw3U3`>Xe3V^$lwEw3U3`>X ze3V^$lwEw3U3`>Xe3V^$lwEw3U3`>Xe8$gj>Yu6prs{91{-)}0s{W?xZ>s*L>Tjz4 zrs{91{-)}0s{W?xZ>s*L>Tjz4rs{91{-)}0s{W?xZ>s*L>Tjz4rs{91{-)}0s{W?x zZ>s*L>Tjz4rs{91{-)}0s{W?xZ>s*L>Tjz4rs{91{-)}0s{W?xZ>s*L>Tjz4rs{91 z{-)}0s{W?xZ>s*L>Tjz4rs{91{-)}0s{W?xZ>s*L>Tjz4rs{91{-)}0s{W?xZ>s*L z>Tjz4rs{91{-)}0s{W?xZ>s*L>Tjz4rs{91{-)}0s{W?xZ@T^}J3f^ipURF;Wyhzo z<5Su3sqFYvc6=&3K9wDx%8pNE$EUL6Q`zyU?D$l6d@4IWl^vhTj!$LBr?TTy+3~6D z_*8a$Dmy-v9iPgMPi4oavg1?P@u}?iRCatSJ3f^ipURF;Wyhzo<5Su3sqFYvc6=&3 zK9wDx%8pNE$EUL6Q`zyU?D$l6d@4IWl^vhTj!$LBr?TTy+3~6D_*8a$Dmy-v9iPgM z&k;XH{21{=@sFwa$5i}dD*iDQ|Cow@OvOK@;vZA-kE!^_RQzKq{xKE*n2LW)#XqLv zA5-yu;VZ;yC;#=kH1%f#Cw-X8Jxh_^?)J>u;VZ;yC;#M>j@9`W{ww@17^;_VS{ zk9d2;+aul{@%D(fN4!1a?GbN}czeX#Biu;VZ;yC;#M>j@9`W{ww@17^;_VS{k9d2;+aul{@%D(fN4!1a?GbN}czeX#Biu;VZ;yC;#M>j@9`W{ww@17^;_VS{k9d2; z+aul{@%D(fN4!1a?GbN}czeX#Bi%SUm` zM{&zXamz<>%SUm`M{&zXamz<>%SUm`M{&zXamz<>%SUm`M{&zXamz<>%SUm`M{&zX zamz<>%SUm`M{&zXamz<>%SUm`M{&zXamz<>%SUm`M{&zXamz<>%SUm`M{&zXamz<> z%SUm`M{&zXamz<>%SUm`M{&zXamz<>%SUm`M{&zXamz<>%SUm`M{&zXamz<>%SUm` zM{&zXamz<>%SUm`M{&zXamz<>%SUm`M{&zXamz<>%SUm`M{&zXamz<>%SUm`M{&zX zamz<>%SUm`M{&zXamz<>%SUm`M{&zXamz<>%SUm`M{&zXamz<>%SUm`M{&zXamz<> z%SUm`M{&zXamz<>%13d^M{&wWamq(=%13d^M{&wWamq(=%13d^M{&wWamq(=%13d^ zM{&wWamq(=%13d^M{&wWamq(=%13d^M{&wWamq(=%13d^M{&wWamq(=%13d^M{&wW zamq(=%13d^M{&wWamq(=%13d^M{&wWamq(=%13d^M{&wWamq(=%13d^M{&wWamq(= z%13d^M{&wW`I4c0$xyyzC|@#^FB!_04CPCP@+CugW}!T@P@Y*R&n%Q@7RoaV<(Y-@ z%tCo)p**uto>?f*ER<&!$}?f* zER<&!$}?f*ER<&!$}D{3Dxo}; zP@YOCPbHM663SBv<*9`7R6=D{3Dxo};P@YOCPbHM663SBv z<*9`7R6={#>ZacW2^D8)A-nEeC#tmwizG0jE_ym#~$Nji}A6;_}E~4>@PmH7azNekIlu$ z-r{3x@v*b`*jRk*D?YXrAG?Z=O~uEa;(Pkan!9T5>T*|?ySm)f<*qJwb-Am{U0v?# za#xqT8t!VitKqJOyBh9lxU1o=hPxW>YPhS%T|MsVaaWJKdfe6Ht{!*wxU0uqJ??6` ztL3hiyISsQxvS-_mb+T+YPl;^tI10Q>V zkFCJRPT*rB@Uaj0*am#;0zNhYAA5k0Ex^YP;9~>uvH$nj{(J2HJvRRydw-9uzu%I( zVr^$%@UboU*cE(i3O@D(A6tTt9l^(j;A21Vu^sr>4SZ|{KK241TY-ldu;M8dwk0l z-?GEEZ164nd&~CTvb(oz?k#(J%hukqv$t&QE&F=Qw%)R3-m;^&Z0Iff zdCPX*vYWST<}G`9%U0g9lecW-E&F)OHr}#}w`}4qdw9zh-m-(YY~U^Xcl-Zq-~E4n z|NE$({x6sBzgW)y%kusg%lg5n86@i3xzR8c(0 zC?0|ok5`I^FU6yo;=xVv45)ZQR6I^99yS$^qKc zQF>5RCKQDZMIA&@CQ(#T6h#+B%|=n;QB;5w1tLZLNKuYbRI3!lEJf{0Q5sWJ))a*} zMV(Gj##2=J6h%WtO;J&DR8%Mx1y4nNR8c-vR96+nS4AyXQR-Dxf)#~hMO|4@rdCw3 z6-91E4O~$oS5({;1$jmNUQymxRR0ylghj1kQHoepDi)0dioOIz8-t?DLD2-E=&4Y& zTqrs+6b&1S{tiWZh@zWB(QKmVMNzb>C^}yhjWLS88AY3oq6QM3dpI))St zMT&kUMLUzC`$^FprRcR%v|=ebwG@q9iaswz+nA!OOwoj<=uuO&uqis?6b*BVemX_F zouV60(cGu#{Zq6GDmn=jjfRT8aYb9XqU&7Il&le!D)y0z-JW6tr`VGz_9%*Nhhit8*bOK?q!*uXi`i?%sJ3FVTQLZ(m_t{Ltt+P7 z6+`ig8G6NtzhWX_F;K9WKUj=AET$tC!xf8JjK%22Vp3!=XtJ0~S&XqPrd}39H;Wma z#YoR$0%$QXw3t^~j5jT&p%%kbi`nPJDE4BKdolREm=j-&r7x!1|CYriKZqA>@(WFV zp-IvuFUjQBo%}+RUug0=O5WxrucPD_n*2hOH*!hhB){(D7n-C-@);+2Zw>}7HEvp6YQ z94sx)p%%whi&L}3A=~0iZgGUSI00N7I4;g77ss26)6m7?=?ZcyW-tICowg zi!V;m7l-bPGx|k|Kv7Xp6d)A!3Prg?QC(3KV-&RoZl$Uek6xXk`pP(X_n;ZOL9&oIgpba=1ETeB*%o3 zGeyZ+rsUvLa!M*WQk9&)O3r;Hhs2T-Wy$fgM?6~IY`#t<1T))Zj(d5`_vWc1;h)qt{3kt$z&v1;AN7q@Dx3Nl~Ji`}(dAmE?b z>W+f99~@0AP8w5)6Pkkcr+WszShow`p7{8DboCs{7xQn!&#-#n<=u7hR=oJPQAdyPY#s3SvUo3A{M)EfQ-Yt% zeysZ&YKHJnrTC{(d?G0RZPcn4q{b08`LQy5{M-2WxAF0BgK5Xb@Zw??aWPuBm=s(L z;w|Rt7GrXYskg-t+hQhcG2*p8HBr@o2NH=fQH0CJ)yNK>x^ZCeKadqR>t9PGY#|=8F;EQ$Y!M34~SNMeMLmwX)>7YtS zQ91(CNgPkabP~t&1t0n8Y>}0LE~7=$&_BncI<)nHuMV2|L97ga4!CvBu9I{QA?Uc@ zqXL>(&`g46Av9&dxdQ%e@EPaNIESZ7G|j>nYq*6-r3Uctd=7qmK4}&oON&yqqWY}( zuw2xL6=h#VfmKm6Rg^&$l}$ylQc(v~l=c+WI>mNJ^WLo4l4|y^nkEp?elq3QV0 zG;nD8H#F@UnoYK*Swpj5*R)n>Ix94d6q=pDrY%CVZP>h@Z1xzNmIh5ngQlTDvwPXJ zFKBi?n|G>B?}6rxYtwn4*>P>&$2Qxu&E{=W^xxF@Hzoc}g@04f-@IY|;rDRKcfNe9 z%S~SX+ArUE_*TPB4&QnBYahN9uLXAFe@nhIuZ4KrB(DYDgXug-vGgoq1^_FO5WDY(9E)@>yE$~tLx8z&#THu3r7cY$>7mAk#M;OTII2s6Ji`A096HY$J;~mt;JE)JB#>Y$JE4~#kjql?-^U`25}v-AQwJf8Xqr>kJR|c z1$JG2OTP2fTXB=TG(KJ$-|($?X?%E0YtDT&MatcJF)u65sx|Yfn^SPjk-27gbaULU z**R&pN}55;&CW@4Vz8Nt+#F48&M`LIEX@StW}Bs%FWl_9GzT@CO_=6@XmfnDnH1R! zhHMU|Hb-8YDU8kO*=ELKGXk-hc-S2AZ4Ll8;|rUk#m#WRW|m-cXt|jb*qn}TwuYK9 zea%i$b4a_{DQZUKH5*6G!SZIlU9+v!Ov7n*nVQ4!&8Aax%)Z%lYUau`V`Q4gl+8oT z<`HKzqNSP0(rj2Yk5-!puT51&Gwh_9P0|#1G?Pb~r^8LzNK=8*)TcC$R-0$m&6Cz< zqpTUI(d?Twk7b)}w5F|GQ$y4=j%x~znmxDXFhH~C)*K{gda^Z7c$*Uo&3T5V&Z}v6 z))bUAB*^lOANYjR^+3Rc0oHSd1&4cEq;Z^gXx!EUd zdRR5vhD~d!rk7OHJgPaa(=>5vHWiyTP0b!-(}<~Qm()DbZWomoFWbFAbG1{gW^4jW6AdFU^ZDkNYpJi7&gvmtEpZ zAK**d-^kfY1Y_VQW3tu)-F9#Sdo2Zvf)XT2v<+Q_PU-WnT%opq1 z=Myf)(SIN!09}DkWGC8=?l*Ybqni_ckZDdaK zq^72MX5EylbgLV{J%3LRfO^tYH-L|p>bb4@D(sDLlf_NC6~+%zbI+Ym^;Ffo;JxEctQj;G;0}<74=D z`9V}Be(W>+aZZyDm5h31d%_R;xaW_1W&`Kr75aVrAdU%rRIYu+4=R2To2VZ<%pY6X zADiSKoAV#%zCNC|d=w3RJTUrr6!rUouqz%2e#GL(6VH#P>>p2-KOSL!9Bce|%k=Sf z>*F2cM=!@uGoQ53{BzvTX*51f{Fzwb7?`X}JcGe^wzUP{*&`@&Lg1j6EeK>3pn)82 z(js&z$EEM)R403Gb@@S;AJqIHy1f@(m8sGq5;0jg~YCN%k0%Vcj|0 zGrP5U`9YT-booJ-1N>C?CnfpGwtsSdGI_zKta(x{N54N=DhobJ-F|Dn{tLaH6EMH^ za$WlO(!YQDyicFkdi~YwuU`FKJV%CBqO}~`7aZjQZ0tm^ZLKPbpENc{{GgjHFZyy zhnjg?uW!Bn>h)KjKl*%WhwjtsRk-fz^H(}8CJg=7t+igy*`D8;pFjG249|RjYw5LG z_3!H6NB=&p^}2p?y?VWLrjL%*zcoMqu|xH%-ghtNd zlTX4s1?!{&QBpw``7E$vlkvwzOG5Q}c8mTg!K78y>r1cy^!iVq-}-$0{5$&h(dX&+ zv&{AQty^o|ywLyrpT?PMDThw#^{Lm()8dp=;h$dLdVO0(uFscl;K(}c%%7Vxt@Zi* zeU%a&ONo1>1ghVf5`idr0+e1~di^J}0^PO$a4$`mUau#UkP0E>r4}+$3{9rT>h%<^ z|8Aq@mZ9HA>i3a<-8HzDo+>$F^IOYrr1g3UWl}arDa*5zS|VlBmrtKBr^(dZdmTx< zbo*~DVX<3R_EY(zFDt#?Rz3QC+1JUAS;c9J-?N)cXC%d_r`NAuuRcN1*`Pj|VyGAL zTkG{%TJ!TiiPyeQumAM=*5|iAU%!t}o_L?^1%J28T+2W6TfKf|ti>kkZ!HrmCNh>9 z#lN*)UwVE0X&`Xjdc7MCHUg7ut0kxEv2mDQPixksA7j#SF-JFJl$modCVeMGK{t;` zeoH+q@-EpAkIN+=JgKs$BVTgx>9@p>e`$+Bm?eML$;qzYl8>~yG1k*DhL%%z>4eJf zA%xFdyal8QTQVdfc?OhJCMLbXlkKdecXZNPIsN3t8yo%||DF@MXPR|<_#;iWvVTi_ zE{6}3Ku!u#ljD@XB|n#@C?8JYg}US<>Tk&x|LFr@i9y`dL%;4xSwfQsWXXPe(y=W$ z4xJpOP7Ycp=e?6>ZOKXUr0ZJhxr10tp2Q_3+sOg`OxmqbWsp=TB=rkP{ABWCry?WS zpG;~!k~)!OVNa@8eoKBXFZQHa2h}>MM_k^KB_&14gN)=EeNrTqJl{_a1Ee0E*hEcE zEF{MplFGH@SVa@qN z$*8R4(P}clD``s=?3e{_ID&a?L6eYRdR;IGFF48-Y`y47A$X)4RILVw(}L$m!Pv*3 zH%rhbKRA6C9MucX^#w-+gWbqr^B{Py8#Gf24m}3D;=zH*;B;lM>ld8r44N7RCqjec zqrq9z;8aQQaV9wO8azG=4$}seY9eD7ZE@(6z zoXQRg_Jd;oU@uaS&cU(s;Ea0kj6P_O{9Arp;zwYQE9g8N9JvX)C-D(;}{uhEGjX(0@0*T+`O*-;J$~TRt7f z=+2jOW&e(Ixc+!=`1u}{e!jmo??gU&`20K0;$1AqWB=Wm@)q&wR#oj@d0hY2x{1{B zjSOm1XT~Wr)Rei??;h=H&0DQx=Ed)J@oLS#x1@1;`ZO^qp-bOFC!LIbYyQ0@M=pQ& zeoyy;y3dkfV8|mc7$^8!>rRYg1pn^&wAO7N+h4z1x7C_Y0cUkXCd8*ksw z)vXh~yMK3OJdQXz9y>Il`gdc-`?lcm+~M)u@720Fc$mV}d3zZ;n5dUU_T z8HwLr%H^F$^X~Y5zBADsQt|Ka()Z(c`@#F^%LkX=4an7+H|vtGg_MB5HShMCj`$(D z{lA+B-c0c6_P5r}2A}BSqbexW&i&Dsj|3mv@KQ{dRc+_wjfw zzQfyW4ek9qO~UV|-e}FgKlyC^p2(8T0g8^FcS*Zja#o)^E)x zrJ%vUZ_P*Jpe!VGT=`q$e(Q%@KExhQe*OM)K7AZU$i(6C{cp{`zr*I~?{1s*>To>g zw>>_PAC5c4q2>58LT>rsyc{Q6-g_A@gBX9{LRq!+(dFCc`HGG=QP;nW5pp zu$?}H*YI9>crrR9+Y}1_aO`P_y`gQ(&@N^Y-|uh&V)z$6?CTEgkA}{xLqn&bjBD63 z9bPKKG4kO#^zb=m*sB~44Glf>FWu>N%W`Qcf7unh>(e0G^fc3Cek`$Cs#R`*g5$e^gp~3)7|D)8$;+rC8%~_WSa# z`}b?6ED?p`azgNrSW>!6Dt2iDc-igvBZif(7KP%{WV@HfSu_T3woRIrqZdv-!Sm~oTh5c~k!$Sf79}Oc#ist zX3}26&W$wtviYP;FX^Yjk=CZhym>0uJa=oJyM3JI`WE*LAH54co_~F8ji?j*QC9r1 zZ}L~eNYP^Q#laWjow|5v3*St3fo$0LuAhZ-Lr#J-C{@ij`p$FBIt z(}X`s?v^D|X^4A<-}5*2Ona-3wpt$@$-crp!$;TjkM3Ro12t#qu(uo;S`HK~UGkRJ{Yw?d(!6eIUblQKT^`x}dG6+KgX5daY4N2? z!16A4Ss9iN1Ite6a*kMI_`fCInGdbMCAQJHkz1Cp-;(dlUk=BYm+h6V>@rQO7D$#>>&gH{#GhpDAa+;VbP!inV! z=h7E$c|W-{&{(EdFK0)Wlch_fF74Wu&yvgI*X8NeayWJw*t9&lSPF-hVNc7x_40vt zd6TeoQCm8b?Bj3a;m&+{V7PohSh|@l?aP+4=1Zx@a%N_Evc0rkS*A)YpJJA_Ez7Z= z>i;lM z51vcW@v^tMtKWvY$vRE^t<~LSH~Y6{H<|H_zcst#ho)Em*6c2Gy5rwk@6V~0L$UV1 zwSJzf_aD7KPbUsds($O$&*R^hy5*tY7w^gbt@-nWeqT%r_^tK(;)v$I!-CXW@85cV z{eD8fpV04zS+l>jem|k#Pw4j(K26{-*h&3-SaqeU*M+rR%eJdUr~{ zuhQ?U^!s9${!xqeD1CjLgI${VX|Q2AQoD3YS^Dzr(Qywc#ty@8SzMxEvIn?2-eSX0 z!x_u7>t*p-4mK}+W0rGcOMkp&9opbo7*7_KBNwaZM3$swK*`eMZkb539Hv=H#ePd} zl8ocQ^KwpP>CdyQL(42VhMtmC23iamcbdz)1T#3|8nNvhm6d#Sn9Ty{cw%> zEbG)Vckl1RJAVZJ5wc`0^%2WJhd;-rk3Rx$9{xPqYsp7!-dXWj{Lh1bYRRSn@4MUw z`1rKW>zCsk%X;;D(&vkL{c@D@x8x7Vze%?Rc>QuJY1zoykN2oNHeDwAEuHq4*7|?C z1iZTGjj*&D*-z*1ZW?gUJj&;}@PBF=a7f_e?)%eHV5hEmbyI4*oJIfJa9~@&4F^7t6C(eq z|G*a)`v7Fhe`-hY#k`!^4`14B{_Rfi#coINN8sNiQwWzf^&V^CN!j1-1p5HI*eA98 zzc(w`f8fPFpO)%faL>&>^KbG`?}D4;-{kOblBXfdobIJqW_f(MoIU%~%HZE5ywT+3&x#{+*9|p~t-_4Kwmq>ED{aFM3z}TXVlu5C2US_uCqFZGFEIiAC%CmBcKV>=q{p$Ob+AUh&FYhA$t+}78@B8Ze zzN!~j-|ubzyFOpvFQaOHYkj|0-|yA;d-eTF85tVZjnVfz`hGo(tl{by4>Rlg9eux? zw~gQ_Ls(xL^Zf7HkG|jMVQYQA63WKY_xtqyKGol+?^}{yD}LkY`+oX)3K_*tM7L`%&zZO0`FMW-_`fK`hNHM-|_p(qv8I;@AsdYO6%d8 z-*0kkHKeist=nsg1Jur!`v2cr|1PnD+WRT7gM11?HU|IJ6ep;CpBnB@_Q(E-6ZG$& z_QUU2;=tc}wBONwp7tX#h3=nH_fM(&C)rknTrGZU@-}tCEGtC52D}N zVkGi5C4|q`_ucxwbw8ETmds82AH-l3zvZa@awOuPcTHaxqhCmM~cBx&pYzc`d1HheU|Eh z_|Mk$zj~zlD6t@0?EmVe>g`oM6hHO+n-a@rX+P$Hj$`FD^^Z^Dq3Rt2EB>`3#5?o# znAf^L(EZt;`-9f~hWPsD{-gAHsxP7Xa#UY7#%@OTFA8=8|2^;M`KJ85q|ftyj-J=_ z{4=BHz3b;kJ&&rt!r8fh&$oL1)$<@TSi}B4cJv9EDFnw2|2_Zn^ZxV;^gJdrosYP% zd9r&uqj55sp&ZU1|ML>i=cPW6_3_Az?Og3g<_$9I`K@{XbL6Ua{d?^FHp!)U4B&#&u8#&hTD z`=oI4-|wIMy&9Ljy56qF8LxhydOkYpZy!Bxo$=qhzE5WE{rh{>-|wgE;q%|WKCktD ztiOkY@&Cj1M&6w#^NlBS3)w)Pr}4?BzxSu>RsHDmG>-dJA7yv>w|=ebo$Bpzy;ObG z{^bFB)Q|qH^?9m~>OcS1s?WN9_v!ssJQCkDZXO)_{I?dLbiaIbzb~_%IJ&<(I{(u6 zc(gCli*!2!kp={#NcuX-M!!tj5%U;TW4tM}LaROxx<&;7FQ7fa8J zpZ2T%ZK$6TpYB&b-9LVxpBHpq-T&cv!{_0;|NhVC-+G?X^ZeiQZk<2t`SZ{7mG;}Z z|5v{?Xq+E5T>X1q&3;H+)u$VN9@X=!&iiz~6j$}>&-wNJ56{nfUc9PL>WBV$9{=eZkA zj_Q-l>+F54Y@dH?oe%#xKURNB_3H0=vEEPhYE`dR^-AW9vFcAA_22)V7puRge(~S) zVZGnyexlER^?vHt{^?KZJmRQ7E;-kKKL1s}GSr_Boo|HBw?qBo(0R?D^IxB5>wMww zd9Th_S-(zrt^0f3&-@<$Etj)A&T>2bJHL;ke#JSuf7E&Zks0byKjY|r?i@WY=>G5M z@9P{r&;F4o>iO}c*Oo`>{q+2M^n9fI-=pj4e~@eH=jXiaY6g^3%WR%6V$1f6Eze-tsIn8~zw#QbEVehUIDb-+_Kj z|8vjQ25`^P6>vD7&R>5d3Y@|peklxx_}yw%8{8BD|?HuL&Yc@f5328ql zsN>Y=ACU>aBl-(3@9_VqS)_I0G(4@Xmq`VGMK$~p_#N3FL}DI^rue1MLH3Vp$cc>v?j0Gb9+{tTdLV8jDy9$@i6i3V=| z46tbcQZVGdfwTYn02)P}|JMF@4a8|+OM;D5G*YSo_IE%| z->=df^6L9#SpEMnkf-lg4Yic}e_nmRa!=2zfhwgRqz=&X>igw=Mktx|Uq4&_uJ8BH zz@oli9oVK0bW;t!KLe7Z?^g$SIcnIbL71cOTWKh%0inMGocexsfSsf7cRv5SK40Ij z4$S`!oT~ngzF##cQv*}KwSJ!(C>=GNbikiFpiITT{T-my_xtqyK7GGW-|wG+U46ew zpZGiQtNN>f2ZeF}TkG@n{ZcynZ>`VwK(fAH9kAxp{fGwY{tiIv`+XXCRXRti!JALt zuhI)s1AzHdf1keJr-rHqSbl2{fO9{q?|1e6YG5YSU``D>z$zV(m(qTI>(+X>=J#7VKrb~soeopne)#>C({eO0tTeZOYwd>zlBEH+ zqQCpUwf3Vl9W|s&2mB>JxKe}rx7N?Yr#2fEvBgTY*0Q<3!S2T&lOtIm@tyey&-mfL z7D)5}k|PrPIXppF4oGgwlGtuabS7eRkF)YiDgScP;E&f5AJRF7uvD!r{hI&yG_iY0 zgSo#hPJCy(IB{1tb>dsGt(0t%b5P>99Y|gp zpPHFGAY&KsuMIli+4g6AE8Cy(t?Y&->omO;e_mc1%-Q|xq{esVrNOS_e>7+3JF}H- z>vmomY)>xjPL=^D%jkt=>+-J)y!_qxRTk@U% zgRdOl*;e&xzq8NJOXH6x9ZvyxyTy*`pECkp8h;Gz_*Ogv`eTvLcjlGy$3UNNNOhb`G1RQc!A)0xI)OIf%j z`Ebd8s!C69rKdO9DU`Bm3Ev2>bNvgazDuKjKr_1~)-Kq~!P`@h%kQ{943 z-2&NB{MP#KpI)!-!Kc4JjYk(D^KZ@nUfIyq`ul7B{pqx>bUIf$oh!*xf9tC?e}9rw zl}_(Ur+1~(yOP~T=`^pDRa@;WWc@w1x>2oqCp)x~ysLEjSCaRZPWwu7;F5h>DciU- zj$Rr^FP8g%YyJ27d;F6Ht@@|a!qREsZ>|4cf1k<@F4>WkP7mwq?@_0P#k|blT7QpM ze~(vxk2+nft1N5Dze}f$rPIdW(_Uy@TI;_r|M&WNRR3g8SCXsO)!(D+>q_#y(rIK# z4p5T!mE@Tvd41`$vUFNmk^`3HfIqzJ_xJSQ>+esenWfXqx~hMYE0pAirPI!0((Au< zYkjrWf3Lqs+4a?^{^|6yWan3sPt>T}E_rUrUg5Xaf3Lqs$t_Cq$&y{*Z>|4ce~&sn zEuEg$h{rljEy*!Ur>UjW)ZbdK*WaV;35)rp|JM5Xb$>YedzAf9$<{Cz&1{z$%BvRnME_4DX{QKz}3)7;-$|GoYm zWhYfneAa1i>9n_W+FO$6m%NqzDDC+=KGXTBT+i%!qF1kgtr9l})H+~~LVX4fwQ_#z zZ=9YQ8ftfNlbjX*GX%&F`VTXKYT#%@P>mx^406xzBdN>84^qZ}`&a62srSWqR-;U9 zG-VsO=cbMvH|cI3KS&V^Y7BBn`%k7Le*||R)%jF!lY8bYF~=zXwn^2V<&U8J2?{0t zWLT<~t4=RJhzBT+!ElFJ{bzNj)dpw3-HmQ`)ZGeY);*i^J_X>9 zptup8An;)B&tU{VNP(uD%KmfAp+gWIk>~)0AEeV3pT+2;h9Bf(9i1-moqc}9cjovv zO^TLHwZG?9{9K=iag$D{-TpmO7`qde3^7{RA;LCGoogFK^n3uE9oqcA_J^O&0AH?hT&sn)n z(sjOeCI{2=o8Pdc?eNMjWmy5J@qoyB+7NCtuX zXSzePAN=L;`b`4YLna6d)35FCz?H_a+>LOz!rcTteY*Sbr(2+>YjsEe zKK<+I)lcK<2K{~d_S3RHU){jJPrrIv^V6=MPSp*OS>^xmw5fmh)1kVvGG8c}x7Abk z|EW9h)SY_j2545dWVTeArLEOz%&Oa{+1b)8Y{`tYR!_T{eJ#zpmS$N?v#TYu|I+Mg zX;$@<*!#O%4eLlPYpX${obIOlXpa0hUe3@H*B+0+* z9ezJD&-{qi--p1Z^(XD+r_<<9+RN|L^{gTq;>pc{{Knq_(|LNNvrrtoA~K;^wa6**Sel%mi}>0<=>j0FYVyR8Ij-m>hrJv zU7xS(S*LNoU!OjY{QYDW`CIe%q0_NXr(vJWBtM;YeLCIxuC7;|PJJ?K{;jniUC%ll z`lN09bQ<*OwCB@l&nIo#CoS4zR1^X1P^wD$T z-}=*h6CstNdhc^(jp0Q+NH7dF3Z<)%SG0DqQK47V3Mto@IvhN!#>E z%k-`IySkp2_tW3%`jz(Rlh){y8Sy8r(I<1P-&&t19?R_VlUAsu{b^}^nhEH?HLvfM zxlkWozb$P}%k1b^F14T5zq2oR_!xTlD9In@ck8U>!4j4KQKa?wdw-OaJu1!~zx2m@ z@<;#E$2paeQ{+4_(?iGGdrud0CY5O>4^>~Q)cueN2J(2U@ilKo;wNH0zzbd}OmlgB{(OPkgGXz>$7GjB zo#La6^3i7R(M9i(8;_FFM+>}1YCQH79;q>Oq#IIW$loC~e#^k};}TcOkQ%>bVu>qd zc%L$KXd5cPhc`t-gYV&h!%z)BOcxm5zYV4R!@I*_^KR%fFdUp2b`FO(xX4pp_X4(uL1crkDp&{_^C*nKvFNbNd6UARFH|gK=r>_jZE4F%vu|mHkKUaUi z{EleRKgsWxXPCqD&!Omicp5xBSRQ6C`NH9i+)({I96A^(n1>S?!{LshXlWP;IBc>G zCvJuUpx=^(k$>KtPWn?ARW^C;bD(TEn>L)M8&2vCr~HO((4jBjP_i*pXbc4yL%qdN zKJf=$m+vg_35)3M4WG+?OTwC!kW>7_49;N$+|ZkI=+8BGbiZ-9<)BL${)orbAdWk1C z?uMc0!(-B6 znN24e&b*_C$D@ZhKH{+pLG$^?Eb`yOBi~uip~JU&e5=RLh-MS$vcXKAyQf3IHGVevb!kk5cJJ zG4yZAw_-1d&vbv_<;YXX{?s=<2KZe=ANl|c|4MObPs$S_<6LAe;ogLbnbgh&iyU<&fGIwGrb!8HJyhI-rpKOXDU`O6&sj}4NS!-{??cKzr+3Tq%_Zb+z-Ft-+H(o zHj92wqKEroHo(8s6z-2Fd3hFG`j@K0*YmvRx5oWK2HSwYHU5l#>*DJ3{JXakV+Zs9sf&x;eI$_or)3rm%7cLhfv7iQ_8>8Ztm}E{(L+n_?I%n z*K;!BU+N6^$6NbU3|1-zD;2B!FNIv|$Nll-{$Gj>_eb61Z~eGm7MTp{;(qJm>*=cb zy8+?#8Ep6d)}LO_>oa*0`&;9_`d_Nf$2G6lo=)m%_P`TuC*!G+25MiYw{WBw{HGC%+pD^lBQfmQ?8r8 zHLq8>UjEj+UW=2DzxCCc*K2WrJBw52zcsJd;z`|aO+FXz&VOrOzw)`s;+^qt&Fi%o zlm1)tdX-N`%E#ij{`m8-Q-D^TKejJ_YhJ$}55#|KUcVm?M)Kp+*>BD3_2co*Z_VrVqwmRY&Fl4R zUay)g{1;Ne{oJcHuh)-_v|qXB_4?80;9qD3_sdDQA0Bx9YLe}@{`7j@|9;Es_hXCc zU&w`C&+E78y8K)7`fbL*{lnMen%8ghQ02Gg_1nSgw;9{}FEoR%rwQ$E&Fiw3Rf=sS25oA(pTX&Cl@ar(SB=A+Fq*be{|ub=vB<_El$E+O@3eA zpZ`l`<@*IEUoUz0TXS;rDweVpi`~4x{M|lt(z7*L*@_i!O{O-_#sA%&bFxo9H7(zr z)}(XGx2lz5)AC7c9`yXyoRn?FN;Xfg{!I#MQjwGJZ6%3f!T(HRK26$svUM~$D&^BR zo@}Ns#U~%4kG-USli!-m*5s{Hh`!R~a%nQSG>Kc9Tu&(zLP`L|fd5gZ94Wj0D1GE( zrAgU(R(eTErI_9t`Ff0eOsQu|8M5#n2B|%AfQ6*oWly6MR_eM&W-|Z%U z9w~+Xy3#9UYL(KlPT}C>a6Y9@DYp1eiil#JQ1o9=zPqDIpQn8dWAz#Ln zuVg9)UeBxlVr}_(@h1kD@2AP=-{VDdtr%^`C`*YL^8E=-@`rqyLQ2oLjxDEhC*(sH z^4$yh2L6eCFU5YBVxKv(62WtkP`~T9R?NLp5JF14quA&pW#dtd^^r2*C?)1dS#j9e zjvsD|YsKtxKr2owCRZtEhsH3!HS5Wda_Z3KJ&u%TCl&LXis22`m%lN`ydOF2XD(B< zm-Ft+X2I`KA8Ke$0q)Q!KpCL)m)k2tV2Y!ga()FJ+KWcU_;M7HX$EZ~tiq;m52sp} zfg6vVj>p{L$2j2MUo8jvd0t4futzVnM{Bo7tt5~8j|Dq9j86mZW#WafA>OsXO5Np8KL8#8z;Ryti~Vk&ww4r?>KUFIDbY0 z45iw`=bvHun#18O-rNsWr?$ef)WOgWY>!_0Fs7puj%hCDEDbGtg z^h@i%%R}AEnS;y0`pXlIOC$NqyRXYJ*2}z@OXIl9V4BM*{L4=D<-zskbKm7q{-yib zr3=}myTau_@a1^cr5o6#E0@}Tm#0aWV_}!~_Lr^GOHYc+f&NQJi%Y|c%iFEXA%e>u z!{wCrr5DHL1M{V3?9%At27g}8j$b}&Uy1=PJx6Zv=jFMy+NPIX?cWkVZ}8`({OnS1 zc6nESIRtsx&iWNIY=h01%W?n9SQ^gXL!5uQ{gE)YmtgSidDch@!aP5=lYvh(kX4dy@W=ARko-T%^=Xj=mJ z3CBb^?8W$#>?S z*~Gkb-}wVRZjz>;m-Ahh;|Z7Xd6b z&F^x)^dfq?Jq%FlN`ER4&d%02UuwmFIWw|euPxo7&tH|^$| z+2p?^-YtlWn$6+qrcGlrP{G*3dB0BQoL383cGbdxwu;xviVBu-N@*CbApI8EZ{ zaJ<=%Z4#$RoF;La#A&*YHv!ZHP!m8+0R33acjnhk05t*B1W?n%xe1^qfSLf(ph?rg zuL+mz_Z0!Zdu9|82yTJ2zPP#P=AN7PS07*e@x>p1!2j@6&L5CX2_986 zU84UyvnOB7mId>W{^JcKUwruD!yoYQuFrRy&82Qlc`u3vdsF`19Q|n;_cRA-oB3Hy zS;F6&-j7TEM64V18vk>L%(}sG(dH0BQ<~T`q5CblXOEL~IHln{z--8PA_*9F`GgJ2o}F%>%OLB+PHgO|ouqTC^Ez z*>vG;PAq6r_wSgM?i^1B&tSFxeZp);obGn0AV;Y=8;!U!w(1BhEGq! zN3CHM9oEUe-eENDq9q@z=&(-yv8L6CFSo%e`o|%cE_*A_Uz>&D=VhH73KfPO$YI~? zuN@c%p6&ldOD`JPuui^%8=F6))HKUx75(GkMOSWnI{)7-y<{G3OD}#94U$+zAM2zi z$^V#k(cz26J*<o(P^40gFln=vgM*()E^%%d=QK3V^Mu9n}0mn zxeXT8$D;aJHs43bItd4OK=xQvAD^BcAH@DB6sSR9owTby)i{q;^s$OQ)=3K`XdLrc zMIWo^Uq3ABH7L-agzJwz7PrC9*yGLTA7O|>BWM&zso7u0D=JXv!N;;`eTvg5mLEjh zHRoQ?!pyoDZi7X2S!9+cV#|uO!PHBR2rhN1%j2_UC0w4SEpIQE%7~?l;&McAsYhPg zpDe5QQp~^9O)TdLmqLl9P+~b=xRi-5o$;6Rgn#o?xGT!O1B^ICb*Q9<@61LpsF(aC1+YN^W^KM~_9 z{|3fz8!8ke9Uo~Qk2&5`$PLRik2(Hk&uGb_%D*cfRXmD29^*#;)L7oD{K>76n;~~4 zbHh@p=Zi~R;8HG%{JUbG|FA{4xy;4i= zihoz7&?eay^ETj5S`r`P_-Dpw`$sqbM~BPDL9ai)%)AXqJ^>~Ft~eF;sHu63P5<*8 z%3_`pN3)o7gzVA7<}qaaQGoN9xA8bp_Nch|E%}hnVovY8$7!@j-SJ~z^iSXui#g>v zf84g(mVJrriw-3tzvtx8n)W=0deGmsZTpncnszvc_cp(^ z>`+?vFS5@%m-be&v;M7V4|PU%EMxl_+MOK*-<~J?80zSMYub$+*|{A)^ZwSf(+YYR z{MNGPk^M`AUeC(>TYp;9zU)`IFV~;eS8Lk|$v#QJ<59HFiBEPx6kYw+v_tzbOKZKJ zt;h%kVMoi3NkP%U8L5boosxo_g9COy(uLNgHSLb#$?()d~ zivW$3o!jp|Mr#H$r|ip;ll8xKYt8zTwA+p3`C}w?3%@_V&{}p~pX|?M*CadV-@3Jy zz1aVky+2FRoyU=`(J7Gx*v)@p2h6KnORw$tqjpx+cg!m8Tu-Q@BoIX7&PD5^I}@h8 z8JqTIaLxbd)QAu5&0qn+qh}KqeHkgNUzvw$z8C6*Hm+bxe(s43yQ<(IK$+%YG5Ek9&lCV1qe0wr@`RF`(z4YS? z^LlB=QjzViJR-EGj>e(nVA4dc`Ad(!tN zdQdL=9=#}!FS_AaKYjnY{-x_@Rchgoz0viju0M7CsqwQt3|(*NdPVOTL*uFWJhXm@ zq3adhX7v1F{?PR|U4PSfQXb>cpVHsC@^`NM5jNTs_WZm&AM^O^b10nh2ShI_dfKsc zy^G^Hmi(Qm>z%sZsq3Ahe`PuDbLZth>H1UG9~w{D*AcpYtq;f0^$+x z`n#^bYkZsNUqAk@u3vQ9oe9e>d6q~2D*s&TG12e#qxFmC@6URH@<@f{@3WPpOYWwy zu7BzJMR(e#)@Re}EnV-_^`_`!Q{yTBy(>OY2AjvH9P3+8YJGaV9_8_eu3vP(y=y!* zzxA%`@4Eh`@oXB;HvYJNZ)6>ObhnIO$bW0hAQu*W?N+>HD;~0!;v0%rY+i3;I`Gls z^7u>F`=QI#^-jf`|&K(!6FWJ6|pkamn-5A1F>TEyk_?C zUoO5r_TnGbtA6TbnC?&p!-Xm?31OLoL>rmh+mf(k+K|zePxS$>nam<`-z}1C7Hu&cd@sD`%V6K7RN+750=Twse3u?|IBgN7 z1)pt{Ew$}cu7Q0~j|yG3UABLTntdtOt+j2FbVfVMU1%-X$U^iFv2(?GKe2G;26`9u z{!r{TM0FqXB_v(YQ&It1yY(qwhA;FB&D+{e?puI0y(l*W-%h1ZH#vF@wbU6b@j z*V?};me>_*rFwO}Zi=0T*4Bz;MRI&YYrU9S+b_i;mtwP)QegpGx$crKk#x={RwG|G z$R_e)i@3k+ifzfaN=YH)Q0(y&3*`HmiVdfH%~Y&6CB>1-735OvOu3^xiZv=1lt-~U zPRdWJ2-h=NTl|zi8nL9LSS-?~q<>~`{H9_>ljOc#J*R~^-s;cp>OlVoX(>rNDfTTM zkgxO=3wH~1d{<-abSW3MBn`8a8(l-vVJwn((qTc&Sngu+Sz$??DK;qn&6?f$zKY_z zt7Gsz_vP0s7S zmHXYKkL#Pg%7vMG+$%z6&CV5O@=6W{0=E9UVI&2(xp`T66T<;+jbFb+N_fwa{ z<106IlH+@nJ3VAM=#sVXD#GpRuj#5YE_Z|e-;s2hs;0X7NV=-@yBZz}(;vKw{awMzXkzGS+J z%h+0|Y^yNrMO@|*3S&Mf9NeGmg@vc~U)mqvbB(UfH+4SJUxaGK3hR7R=bJvC#$#$c zmd?L){-yITjmOe>ES-Pp{LAOp^Vk}nt@CZ2Z)-fZ#$)SzlrjwII~CS=@O>e3Tr&B&VP0OtMgwykE`*yI{(%Aug|aXA+LJ2hVvK3`5!*Ssr@lNg^%_>+F$2q zuB@=m*LA+G^HF!G@aFw=op0!T^sftRJO=#xR4lcy&Oda1#?cCEJVNIWoj-K`(D;PL zCv^VM`NQYe_%Ie-Nas(TKXv}p_@u@sb^g@(r_Mh$K2zf}b^fXIPn~~ie5S@{>3mD) zTN)2?Y88h6T3F{>KHt&)8jr2>Z=HYZ{9EI(H6B~%-#Y)+`H!9li;AC8D)&D+|Izu6 z#^-2!uFiLLzN_)L8jq{Rk7er9JIX?EGm&Ysk#tgihz9indTE3EOD8jq>- zFP(qs{7cV+@$>9U=a>CdE%U?LU*ofN{;l(GonQ8|7T~R_H$M|UBA*}zW z#AD$ljQBxHBC#+@xFvL0nD3ML&r)~*!R|FiZflZ}vtmoV*NvaPc45>DYE%=si)`;^Is z(89EjNzUWKoKKRxw}qE*3ez4YNl6Q*u>L=24^#eDvVXgF*Z-3atMC-2eax=^@3fby z1usl{ne5#yOnaHEPX&*!D;UE!3Vg3K4 zeT+%_7>t(|*8ivfpY}203Wu=%f6zXL))z5oAEWF4Ndi|mg@-WhV=Y$>@ernc9E{f$ zUc$7OgZb6MOE`rGF==HAr|=MN#LSlzrac`~zmx9zwZagSo};k-pZT%ELzwn)g#MrO z5QS+U$2h`EnEvDFM|cURFyjYHkTc_sp{}?PXW|u(OxqnQO&^6tB7*004mhf6qpCx)(oV{D?XS zFP_JEChPfLe3Sd9;@9jYfALwzk9mC~7@t&prgc7_)*au6{}`&aefb9EKk)v%{0H+t zd-8bO`KHa|vA(2mP^%zRHB4&!lXKWjBUc|6uPmbrjKSj3&j<9fR= z?0G#MysvdN%+HDWwlMRPWqxo!A_^|d_jR+ks4(Bx&3ct`k5F&*+@(g$_j9wiH$=5v znD6IiKCE!^`Wxoc*IGM?`MxglW0t$d_jPgp4oT;R#Oe!UzAB9PPb|hCLvrUK&GOeO z$m0#ZuZx64AxYAZICWvZuZ!!ia!29sghZG^ibmzGVt#++gOeEk*lTU%{>tA=DKWGj zF84QheS!HVCgWqF_3Ue%KKGjI9C?#Z_t%9we{^?u~N2hG(qvF8v8ta3jQ3$Xw-&f_MU%5K&r;+=!FyGh4cxYk1uZ{Xdq56nW zZ(d>6?=<$Jhq$R~w&eTTNa9?dAK%wTO5~w>p0bvFA2;joL-neKxBQbU|Ao(MGoDeH z&r4z|g*o2q%0HlX0^@rq7E;48q zUh!BP&u{AaO+CM< z=Qs8Irk-Ew`K6v;Uim|Me(Ki?+2#N0`DK^?%lgMiJ-mYRj6E#Mj?a3e11*OPh7Eu`TQD1jcUEZ=hyW7 znx0?N^J}uVP0zFGc{V-IrsvsYZ`BX*);k;zJZYrI4I>wVjy{q??Wy>Ha% zdRX^Aw13zBdf&F*H|FymJ%G;Fw13n7djGcGKZ?LS9Ku6b`|JH%NBisjTYCSPza7d~ zKCJzh_SgHj^!_paRCwxqQ~OWtulJGiG(-KD5BvXh{d)gM{PM8ITkjv^u@CF|hxXU| zxAguk*}v6w|FUnKA0O6u=>1!=hwRsXcnPQQ5a#tvy>IjA|BwD(?;Go<9_I6!df%q( zAJya^*8Wrb>wTMg-=^##x$_>@{;B=-{!P7qjITYc^C`YLL;LIfo0?yz-oL5$kNK8| z_5XU`rrx(H`^S3vhq-@heoeh^srM~q{}}IhSo(*0i60Hc z@4tBJdXz7xQuf371KvMipI_s%^!yaxYs&w(l|SGqtm|32o~7$izW$5j2dwMax}NQL zpsr{8dW4UzNA(D*Cmc=jRrZ_w6V~+|U7zBqR1bT&|A6xctm|?7l=ps>w09*yX<+FJ@2yLU##nsJ@2yTiuVrXyZ-^}dSt)5?01*_?y}!~`~1SP z-(B`w^-bS;Fxl%b*7eDLciHbQ``u-)zc_!ux*pl@Zu_nL<&gdUV&&^(&xh>!kUbx= z-(Rf!pX~XNJs+~?L-zZNl|NBFM)l)us(x!KpEOm^MXj#FL;v6P|IYt#JjIt&c#>)Q(*KA4pYu}-)%&TQ zzYUEi`vqUS9-q(9_^RGk{7^4Hc7%2Qq5oI@Oz*S!t6qMM_b>a^WG_GYH`)KD`KD?9 zZJOVk{0a3Ng|t0izEAl)jko;cCV#vs-`lkQX>E7kFBYGRQ(vODV}l_ue*wm zT*WJ{;uBZ#*QsmwWa*&()fP!NBV!~gA|Wi zicc=ZgO=irOYxoMc(%s(lh4rjPQC9_`L3z)of_Xy{9o~|Pds1woo{^I@o&XzKk;eD zj~#DzJXrD9Pdr!u|B2Tsp62-LCtj-mcl=ZF_0aqMb39T1|A`Osc@2&4(D)AJ8;0V$ zL-_;t-w!EX6juCx=zSl0--pI~XuP||yQ`j|D}LQI-d*F}6+iCEPjtnDyW+E5`OmKL z?t0&w-uI^QZ5rREcz9F3rD=Sd^1n^_j;4GXwZ5Ocz#``O9_m>X=KFrg&sTWr|5N{; z`hUjL5BuE;cl|%}cZdDX5A*!&-zm)TKJ1S!%<(?#_bJTx{iuHEu-~(=#``p#pZTD( znV&iA=YLrL&wO%W_7^PmXRu!5^=GjEg2#XTt?Z`|-|f2YM^QgZW4vV#u9x;;{AQC+ zD7MxETm5z7Lksbzy?$c$&#>O0tT)$7d#nHWsGpeH$c5cc&Gq(CKI14JeW+*h`nTDy zJy`#LsBQS@W!UdczOOjs2aCgciPyg_%y`ZbKfpPBUyk^c^}QCJ!dyRG#e1*fb64?~ znm>dY&r-fKl>dEv;CMg2{NH@PR1XyLA78n@F#9R4`r|L{pZT9(yQ}aLCO?SQpO5cY zn0y{he435^%j3TwKLzVmAOEiSG{{fXD7Ezu{IlZcpnZ9KAHcnl??-w{|G=k8{+~{J z^Y}%{U&4OC?9}dl{GY;yc!}SX^<9tel9=|UF!{tf#ktB`6@Tp&Uyb+?KYTRwe^T+n z9OBy=;^&fA9{$tH|Vc@`Jw3V zW<(PvKU0WbHj?jGh~G2BuM}MW6()aDus$m7KQo# zZ0Qs1!TD;--fY>EO+B^854?y|nD5t?J=t6Qzgv94vNv1&m;3VlIpSMC;`2SMCn!w& za|Y+DBmT%EJq7V4<&ph4vMqo^B|fJroZail+_51BdG0zVRI$U*j_rj~E)C(HtLgJm?7Xd3B9X*Z6de zPgnev)vnK^m{7@oE~cZ@fy! z*LXF>3!BEPrQ=sySjYdwvvfSgbLk(yc-IkjJdE+1tN0Q5V2AkT3M;-uzM_X6FVp!_ z$4?zU6d(J<&vZV;qi)ynyW(}t@ifJ&G+syJ^@+DBet0xq-*}wjdm1nDtG#%f;%^$S zqw!LI;32>FKgI8Ke2v%fcqu-qe!R0aK3n6Xe!HXoxubr$qyD(FH9qQ>JL-2k>TfHo z<7<4>&vwWkIutJ|tnt|%AH_eFe>+R#rGBwPJ+nur%jc#1+)@A5SsI_E?@|3%XK8$t z&pS)wv-CYGpLdqVNBvP}X?&)>$EopAf74Mvll`AF^*yQ|>8StcsNd*JjhFI=XKK92 zFa7AobUzv|^#h%$@tPX1sqs?2@i4wsSmUMso0A$Z_1l#B;q|Cr=A_0eHD0OlQvUL! z#z*}tNBt`De+z4T)PHi+Z*r91JfZI~G(PGNIqC;Fq47~Zw9Ih|>-fqag~mty8b|rk z6B;k|S16wTu#T_s8XB*m@fsShq3?5OyawwF&d_)bjn~lkseJ4i8n2=88XB(=x*m+z z?BsWxPe7Y^^Zz-%g%N+4eIWidJL7Fn?t$l{KVNw9_^cN$jQGhE-^7!T;Q85)Qn>Sc z^dHOmpkApk;*C@KXS0zXwlLxug*o2s;Cr2o^)ru7l;>mow5%c4%Y_fGhw;V2$@6jk znvM3qaPs`D_n6WHEHpWO?AI&YIX;XB6+S#4^MQpM@0Z_S;mz|=zou~G^|M~M@Z$Aw z{Z}}6KGwfYrYOe=a>GY@^OV34^#hdsy{W=PnzoQEX?@m4aVm$>r+$xcaNTz=ac^3 zrM)i9_}*1~Q1OYvj8|UiorwRuF!dp>^enFOBUgHfWv&=+Q$FF4@3S!DkG0-oJnkqS zCq1o0zLCO=Umxiw9o8?T_}Ekaeae5&qj>0He|}-c{}21y3p3w!$iGyW@zlfob74Mz z`ByK#$?KOsqWqCZ|Ht#O-nlUIAxHWHNAdb2{kEfg>mlDyVcMr7eVx;2pSJXtHtT~5 zD?ZM8g50d{Da`n>_~;)!Htu&z|F6s&&qsch^8Tskwz=Lcta$jco*(rjYI)B4mp;a3 z|H0D(Fr66Wl1kre78?OkmuWc{%bow#pk<}pIM9cYLTD3 zF!N1oDxW63|{NqaH$E1hy&DTjkk?YM|?8krfqP&0UYyX2DAn+8vkLo#3*ZQp=tv9tEr9SUaeb`&yD}E4) z7pPv4dXg``qHf)wDc@wi z4D~yF_$99inuACUVm^-x9&KFy~p zKKy=lKhnGE*L)P}cQv2Mr{CWF+UoBe>h~Y&4-fU9OTTGIA7!Na;Zyz7(nA{39~n#i z;7k3(Tm8z?9~wveB}e_z>`!}oBA1xgZ$9M^A4I)yJlN*b))z7RJyZR~nbPxNy=Uaq{VeGfE%DonPd6EFdVKcW|41K5_2v;h} zV(Aey>eWo~b$s*zST9qU_cK`!^z;ZY{xf+$Go>$}e)2g#dI}i-xuj>H{)Q>O<4OM4 zn1}P{yt@9&`LmWAIR9e&GM4ybmim#G_$rq84nF#0?0+lF`&o?NJ-rX^FTJldrQb1i zKg)V0(od6~*4nzCZGAZ1&*6N!q+hkJ`de7<@(=nhoKLs-a<=#y>h)0n4ew`5|7@#Y ze=EM#$w&J1L0Mm~_-;?>nI_{8UGcFV(i4?FY*+lDch%pkKC`pm{q+~9AA#|a#`>6S zM|!1-N42ef3+a0|t>>HK<87+nL-o^5@yDimQSmzzc7KTUO^5VH$%pdvN_jrTW>k4*Kx zh3W4d@#mB~!0Xrg>BxU7_l*61(i7N>=N0CBx7mMLSbCtWZ+iWOJRbRh3(MYWe&6J4 zEqg2dQt6d#@u6(_(_8cBX1(dtJLUB=KmPQ>)c;9;VQKxar1!Rt^tPl|xuoa0q^GdR zxBT?JczpHmt(3nZy|<5*!EHjnhGtcOZ| z^uqEl$;Vy(BK@05zUSwU(!XUq@#$;vd?EcV@l#Fl)lBi%P4O2_@mr?WOUeF}^8dBo zkiOYhFO}=t&-#P)5O4j$^N|m{ym{$&rPf<1y)Ef~rs6X>v_2DGP>Rnj#s6o$s<>W{ z(%<~7Po)Q124Cx6=~c1*v)0dAe=q5kF0aR}H>LHs^e_KGPnG9mJyT)YgAo5+h#xVu zK752f#Ny}s=&4HY)bSXtzs6R4W{Hn(DPA)bZ<^9;O7Z1=^i>s)3iGu|UulTnaY)a~ ze1T1Ru!_HRt;f6cY5SEv>XBa4=K82_+K0XsUtQ8~{pu_6c+wB;(yLWJaJ@eC2fFx{ zKk-?`WBO2i=3)QExvFRU^ar@VK>R=E!}|1Rh_9xZUq$(hF8-FTe8mty^$efC+mw%KTYAo# z-`ml69qT>Qe{4tjJgx6lFB__-eR7SwKCR!n^x%7w{kNXC__IIy*5X6#;$!UczxtrR z)8(&q<%eoM(0oCEtL6{=KmCi&{`xonIDe<#l|P))FBBhYpPHYf*WR^$_~@6*f9^}; zCI7H1pVGJTpGV_$G+syJbu?aw{&{Ere_@T^)%acWxy#@A@bjCW{>ncxfBBJrWqw}q zjmw`MLwwZs-;^&K@=xk;^e4mqqWHp7`lpIdeDp=9_=Bg$OY6@uHD1csk0pOxdc|XF z{IZ0gKrDUTru?b&A42*Eq4jf!|31i%Q&{UW zuaBfZAIcBJ(0Wk(T%r6{gz~$r=YRUayq-@!S9-tFlMcmyLVBAqHD2Ps3a#&BDL<`x zhG6{ut$%sGt?{xx^HDxs`Jgz`Gd>zW<>!L+i*J3;{}(@7kpH&49gbf*{?e3RmcB%G zu0K=y11bLX6yJENz9O~W%z^pp$>&{SyuY=YgnfK+c-)`-HFq)p^W^C^F`n;}qZ`DS zpPoD%&Of=Hf9(x8|K$4NPOev8`54bne(l1I=ht{mKA&3NEM5=mvv2Z!xr(=4>B&Di zGF(scdwFtWxSk|m?UVn)^{D>f$$jy9SpQzjy~*=Y@8;3B;{3sSp~86o9-ZqZhW)(3 z`RB=5;rzk=f*b7Te9PHMoV*_9Tc2DM&qx3D2KiMUod~XH(4W48{X%!J9{JIS;Qj~K z6NTX)T4hAM<Cx5l{N$@Yhw=C${>&r(hew}*>rwvps9yWgWo%&9gP%jb%p-oK zbJ*{3)KC1<&-|TwXoWF9mA8!NT^RQ4rLXxr^ZSJne|U5k7=QJ@oK3yM!olN@!}AZ= zuSZ{Th$pXy@rbi7{Jp}kUq|&pg<+qL`a23E9(t4yJ&Wq^TM!aNBPIX zn4eCPFX+)nbYj@ElNf6&^Ze|WdGruGKl!;%GCy4y^U(?B zV@@zWa$Z3SF!dPl`id9NG= z*E87fRjV`FtFbvh4f4x8dInt2V0~tpPtH$+{0W5zuZR7_h57uYFL=cNTJ8lCh;IWd zL~YMFzYVy^+HmmIz71C!*!S=@;Kg|xV0rt${EL1zYVD?CA;YxpXLoQv|;hW zq-ggx9A4NM-5YXi!{&tz$^!S72K9!}m9nz&ZMfP%F5O4{(Y+ycrEBX-Sr7L%T)vir zia2k>(v_0K`n`mMSE^On+mKTmHm@`&l=(IUuQW&{`ma~LX~We9W>@=Lo#?`-H~v1C z<_)0>qe-s~YwE(5KJ}m%`!>i*#g>(dBO6pRlQ)E}^vX)bm6gh-3ro`RzYVi#!{L?Y zklD(RQBu#v+pwlK$ZW|Cs0~v_DQD_R+4)u*R`&*(t=uw7N$>P+xY}@Jw(`hq<&{x- zoFdpT*;Dj3XwsTZc5L>`E1m57eH&!9=8)N%A)_=|OY}B`E-YoX=4{>|qcpidc^js# zbjfVZEwfcma%)5AO3%`j9+|CqWwut6QCchoc^eL|boK5HGFyxEO>e{1hLDw7AuF|V z>cZH=RU263^ft(BscoY+EE%Q6!i(C#&f2#@W@~L(hjnC>#1{QF$aSi-@=AAW-XNp2 zyIiN;klrOcXe5t*`#*+z-!gMxL6J@gC`RThf z;wemfl_U^;4iYg5V0y~2m4L-_!GxzU9iL=Ct*2h8$9O}0g!x%3Vb>qy3Na0*5_(+) z@`aZ$XIcBPNy69X_zKh5Wzt|I3w8_ht4tP@7Y<>5FG+#Oa@FZ1B?CCmw-@I3k`!!w zs|Dhr??Y~E4t*aKEbdC67uNSN^nDC{A1t^myoL3BXn~Yn--n_)$plGPfbuQLg!O%p zkfboZlx+GwwDeB-U%>|E&!SNc`#o5I{Z^gAoCJde>MzgyB6X7+9F|N06YetAOU|Z3!=7&L0v;>aQb=)_Y)*w z>p_C*{ul*_kMK6;_{*^f#l3sswSl>(6_tN#fP|&PP!0Uy{goiQ5Uk+!m0sIB#g!R2NeJ@&O2L&m5u%NWA z?h5mJc?!(L^tXnhUa!M~nBU7&R3PSjGAJPa_?m@NnDfbCf!imZI<%-BEGT;YGMzYt zhcLen6^OowuQ0!tL4nj4G8X3iF(~-gC2;o%N~_?tFz1h z{9XnN=w94inDd7N^@D^Ag^%zS=KL|3pe;Ov`MnH9QwJNW3NK-OFIpXr;qQh1$Dkn0 z!?aIL^NoU;(PXdXe}n=LLBZySY44im2l+ce0`r$y(D5|Ck#OK)9Z&Nk3kDz7@oXP8 zzq5e!*;gG;_Wfgzua2kZ!2+s>HDBm?D5e=*&!cPnyT-q3{FRmog-D~P=Pw;k}I^NasH2zv2Mrb_~@~1-U0om8kdi3KD zhWxkC_(#~k)bUcsOC3++|M6etPilP~q35AYvl_&^3QT%vJugAm!g?N|=b_ZytC%O& z_$!_e6hP=Ii0KlPed`6UABM1wr}0;OIa1@VcuA1(_pM)aJdHmaHs1P2$J6>DHU6pb z|E#aBj;HaT`W~j&hnlY^Q#iHW3t{cA^~BWpPmTZ7`b+*_DE<|SBFEJDPp$Xn@_JXt z)A-9D4F#!H!2Ym~r}3BnTkCPH|C`tAJl@jytA-<#PmQJLvGhEap2yOBE&o0g{}f@x zD`wE~rsEeno}S0j^H`d%Kk*Qa|I+-tG(Ru-M~=sAjsMpCto2hUo*YUHh3b|t6kkw5Mx5dJp^m5LaTMP>dLF7Vi=*dpNRZHif@+QBbwh-75A%1vs}_H|8IP2>ON^{2$iuByeb4{}&GZ9{N4?I|&~e=ZlB+ z`_}JUzfR{yjRG1&^{bCxhT}(jI@I7&_~>^M){BtwVg26q zI|<<%?PKB4@2THOz}_U#*+lTJ+S`Zq`{8(zAm?HJ&iuf`953cCAJ*@o-&4QOA&%331W8&#y_n8 zr@eUfcEnr1Z~aa}pjR40Oo5Ef{6*oe-)S$p1kpR=8>36Gu}hG>Grsw-eqZ{1>v!6V z&UnL61^wu3NFAN=f+4}j(b=H#u>PO+LG}2flQ7_6{l4`(?L(J;(dA!s38ZuufsQW! zqRYSN@-Nf?Iy%?eg=sIk{EIICqH}#bL_jvW{EIICqRYSNQ}%-Sgu=2H@-Mpli#}y9 zCJDv5{EIICqRYP!A={9k#OU%by8Mg2WG|NNg$TGh*L$POzbLHVmwuPMSh5%LFZz((dA!s3Bq&{hIRQDUH(Ovf6=$>1siG$%U;O8=<+Z6k-d;0dY6CEUY@>`4?UOMVA15Xa4i)e}pi{`^tVuu&A>kyRhts{Ebe3W6QrRjQ4BH-`b50 z!&~#~?j&g3o$>O*cptay@7DLUCGfI4*Xwolc%S#6|FQ@3MSD;nyu?LIopPt zBp#`P}}?iR-Sc#B|WFA@yynr;>`=J&n0{(b%L zmw5AhtdHHB{{L2c!PB?I^>21N7RLH(ZxYV#O@gk%o5yEoO08C5?+U}7?$c;*4!a+= z1W*fOKH7)%3^iNP{v7%n`!N2u59gPCD4@JWu(~e^g7+P?KbQHux_$T;`!e5C81uFzxwoTc|H+Jh>-AzfVeRHfg=LI^A>o-B-9YW-Vq`58O+bT z77{h2li*<-H?U*AdHTR=h-ZWNr-7b?JV*J;!fa3v67V0@gBBK{0J~*Rus-b(5a4{- zO=v&@iVP7fh%g{YKy;G%pTcbLSH1m760)5nq4#UPA)y58pU)g3xL_;vnXJb=ll5O` zE)p(G5lBoDQWX|~#TCJa2w7Ga;Rx%!AAbx9Oh_PFm;?@sy+(yaAaX>oB0`;VSVZ7L zLX1rTm%<`sVgBXpV}cnH=136aoJPWnLxIJb9!TJFSl{*tb#VO>NI4QLfBfDg*kL~B z>8G0@2%zh9UHF=}->H$_my{B2ElB3xp>NnsHrQE;JVPZE4J)?XD?J){eU zhbx+gOQ?rL`)Y%V%!jM|huaW_`yd|iDK9KlnO%m+rIhLwc(s`t!^N$n^!&O z+$w;as_-9E+!|x4rfEnO?O!|ys*+V(&RFVA8|$jDb*ppZNf)SeSG7cAtC?mw04~pw;}P3e!WA@JgDQV|^;by!4|kvBy)-d%2-H-J2>DzUpLpRD0S*U8k>B zRK<)#-5Jg0TPaBz3hr)B?SGX>s^(8kgRe+Y8(j9$m+H=H-f&@{DxJnZlpw_$5v5E99OknwH2i6Y*T%ymWFwiRBWw! z;xnpsa`C_%>IRvi^!{|c*PKe|Pgk_e>yi|akHqRK3RoxQ`99N)fiTSUR2#&{@dL0f|L9gpeWpLMZxOb?rL{x*X z6HTQcS3TI}E3^2cra4wWW$S9+Y&o?-1!UQlI{eyJi(xGx*hAMx(}r-DpGcJQ5p-~a zn$)vjb!v{LR?)#&+v{!I+J&pF*F8yln&Y~kv#H@U#J`Y{YOxH8g1nwjPOs^zqf{90 zwUw$bSIo9AkEG7boN67-zVo4MN8E6rlv-0Eh@t#+DP4!WQEs=@R$cYv$J|8eAS zJPw2EugP3pu4lUq{kTtFjbnMcI-@UBcP~>&dzw;{dCm8H9cH=bhZ|I{Udut+)7dDg z`4W&#yHF(SUz`IUQi973Bejd_!s}b0ft{Vwg0+F#aF0ZY)9V}z=GO*wUzXA0d^?Bw zQ|C~FZ0*4Lb_P>0FXPJjb_R>)YJ=D&W=zpOya@&y-1t02#r^_goNs5UHh+q(^6?+h zz|N%JL2bBtKct}DpTeL!Am!1jLQ%7^Y?P{Q^l$h5y! zpXwl22I;K}wAN6EB`b&?_Wrs*EtpxM^spBy8c!{`F5iNoNu;p$|16bse_A&)1NyM; zf9QVH;mL}whqb>}omU;3VgMQH&>xz;hKS|HZC*TTg{p<}QAe*F&~fyBp5xW3_E(4H z(fcU|fx_C~>t%&g#5gcCt(xy}I1ECGv4^$4f)TQh-;QFfypP_`6Z(I3bgII+u*N@h z{|fwQ;WAWlJsgkubU-VB^9eqv!&VjGc3BH+{F_52M_5r92XgfOsiM8G_E&Ii9|v`` z|JMHM*wvgeRFOZn-anBN4psaQF<1=wi9?JQg?0aWAGh8|P4I=azwF=E`>B9UVeLP( zzwDzrF2;6HP4C}!6s|jv=lwN)P5a9ps>7tPVvCxu#Wgmxo)}^W9g2#4JNji06#_8F z>-a=AW&hNnGejLYaq6mpxQR**`EcDCVyD`oq|8y}yF^W^|DKo5%mJ zc)b}QG(S%#4fK9GRIdO|;V!KG^?pvx&x*MYbtDck7Ydp+| zA%AgdzMjMTYdmBRr~E~Agbp!~4Rw%?DSJ5OFFNI;^>Sgwn-tWd#Cl;xeH?pJxI5Kh zp6Yo22gWc(*%T2O3RF2NCq{!5jeClbC&j>!V&KU}`kKk-o-N4o6`JhyLQIW7JVzCssZkhRT3^8M?&y)%E}G$W_uo^H*s83C$lN`~T_i zmAwnut8WLi7zCy};uSYfWiLJ*>h5S)>|VsTA%>e!O!><=pzK<7{lCV0XuOAVZy!bp zb!-lGfDYx|z6@GsBr#)_8Bm-pRQyKc`(eP+_o|MFRndp-Tl z9}|BVu6|f=dA;QIj@K*0{`n97wEbiIxAxb%{U!T5nxE_seCG2X=2NX-Q{($(pb~?X z{8JYVh>>H2D@a@~qV-*9eWpaqP;S%W6+;YnUxq1-w-|AT7<7gd1YV02vBp=7Mni<* zL$vKfj8CJ<-%_Ah5_(VZ`Vx!PTJc16F#L;unb=IyCbX7JgW^S!e10U_WMZ~Pfu$q1 zNc^{1%f%nPB~S2Sju(Ib5|g@;fqhA;V3{=DngSwGpQ5v#R!h)$Ps<5dk3tmNVtDB( z4tp!Jq%|_ER3f^1Rnm0JVTb`v9g0tqh1WBzzamQcA#!i?wwpa)3|UWrfLOfVU6k!z zjQ&c1)aT3RB?h%FhPU1{zK!eOroyatIlnbZT6J|ezsnoMoZr+@-PN&Oczl?^pSp zhjG6{9e_iO1d?wZ}uHAdI{cHOTSARfgE z@e58299;|^@?T#^udq6P-64F`(SO7MaMW@B>1b9*{?Ywy zbtrG$pBQ8M*8OdDRBzqy*8OhX@6!D)b&N0FFFSBw2fjM$)$zS_zf<@7VGPjyP2JyA zhxvyAK@1*g#t7ZthtWazm%6{y{jme%9~di+u%2hAWB$Z&MU zXmrK6#SYWQKqSVXAuPtGD@HNar#yxwF(`>)>ZGw4qI7>pjAmQ+%k}qTtP%s37|FKo zck6z)?sw~c#i-Gi?sth1ZRvhj7>kk23~svLsr&sfs+oaJj8jwh%k}%0G0qHgsryac zZ|Z(kdfU1J;>|1a))mrf?P!hGBwW>+vcR=yVA!o@ZN{xy<3i(J29EA!;PqY>0@N}D zAJVOL0`@;F+`ESp^F3@%s?QdIl6y!!|ElwOJQ{|(xzH}R1CH(8>?pd@roU44yK2CC zYGOFwqGMlf4aaYh3h6F(B;3hDr5a9--(=g}oty~oq~mv!RA)CS;d+ySxSK3oxH%4w z$HuRl6tKLK;;CgN$4^wnSJcy2RN^;CMRo_N3$6+^%1mRSen)aV2dTC0^VI$wiwfr| z5LsUX$FsA+=yop5uc-5GW1&?|8*1I8<8&Jr3bo8;>+fYDX}Q5{)x9h{I+u>ixvcR% zmw}FR8JIptt?!4_%13QGuXUHLy`;83hf)P+Z?@v@<iVOpMnw&heZqocy+mGw^&S^`dK8qSka#HAqVDq*||&)5b~88YfLfK(+D{ zoH9;u#yFyCKceD4p;mk)>W&J>&ft9Us2N1v!13#BJv>_2ozD5=^d+hb209+q1*t2V zs5Ll$oJK0G(?|_n%S2KMG*M@?{-F*bRml`J3FnKg4z_ywrbZz()#3c{rDEayu@9*c z%8evd(dPWIH|LMNIe+XeYA8_+?9KUOFU}u(asGHzJfsHVLUb=tYc0+ndy4vIvaq}E zhT}Ooe>^HA9*?~`doqx+)eTne2ENy=2Gx3dIiAUd-A+oeZc%BM2gvbMA;%6Tj>_HP zcq$P4rRrjz_a3B1+=KJSR$}w1CgXUWg*01%i`_YY?9TaPcg`O*TXX!hklIbuUybv} z7B$aSFXwJd46V!gqrNuSzo$hVz=82xmxUr_O^`TRhx5leoIlo~^ujt6SYLKs*5Z6pZ-=_pwkn-!LwLjT z1~tn)tHuqddqaAI+Q&8{08c-jB|No(IrZ9bv_ZN^k98unLCV5sEN@Wj*9o=L9jT$z zhV+K*4J=2PM+zMCsKB^3q&G0-I1gFA--gfzWie}mgpZH9+X8`KASRpnC~!W&o@SMDhbi)zEs2DK{8t6GfO zu(d&2@3lcqo%c%d`l=!B&BNHRns{4QQ)6gt2ya;4z%ruM$h`c*+RYon8(12!n$&P= zMbf>2)fKg2YlG_L3$lPSJ6Al-7VYnv2!d;=IKE z{WffGpt)GDzKa`}pL-k98)z<8m#EWvMcUk;qV(E8bFsSG2$ql%|&epZ&=!(64Eu)Hok^r;MP!;`5F>ZEAN@+qBb0D$nFg^7qww) z1J`(O!_@}0vn==L)CQRgcd#wD=+%bshUE=17j83NR+e0rUaSpQ8$^JxthQSlWG>u` zzFe-ntZ=q0QC1tKH^^KBSvhNiWTKXJn`*=I2APYH6zsD0^>gsNVS9tjMW|c3)|4_A zsrt&5lDJ&in>WZ@xW}S4$XukV;8sTW2APXYnTyQj4Kf$0UYXh;bCK#Us|~Urxn)06 z4G6VC_9ImSy7D$}2yc+Nm`&!Q929L(pi+Mu%4+7R9#`(YxNHPyPZrh4sbgY3t=vL8zVxwS#|qo(B4hUpEm zA4{A~wRU4R>E%AWVR-}VUe^-Y$6D;XTP*Zhtn*ncJ6z-?U2M8q^aU47eij!}iv{M3 z4$LAY*cv)7ihmYs5f?Kki`?6b)Vt;6a9@0pi(Mg$GQ5j9_NA3ND`FSv>B{NlzIcrn zt7RAE))iu#r2Z(_0g#bw|WFbV563!6L4~0_*Ro_1e0=2VlST* zTb~TDOco|iGLcSZ6DO-~CtcCWnxM%kU~;CO^q(gCu_w!eCaV=E>3b(b>XWT5lbQL+ z`0V7uXtJ$#lFe~a#&goWpDbaV>=2cCpJb?>OxG)kI$7K}88R)%Bf|(z7B)`0&oeNk zXxf@|CMHt}VjY~+WLI2z()5V{dD8Sv(hX16?oXP&g4`FTZ_@b{_3EVQlkC9c3^W-@ zp5*wRv~`pHIO2ev4Eapjx=C9%Y3nAn<0tdGNgkx6txMXvWD{Mor#0EaleBe7TbHbN zOfF=SNsS~KP15uw{qtnyW3n+eY3q_@M#-wjq^(n_L6Y(#$#I;FN+oTb+}$LbMUn$H zS$mYUbxB*7tlvmRmy>NmNn4k+bxB*7Z1GQ0geD7;lC~~s>$HT(#4;t>i<`7{Nn4j} z{7WibXX?D2`@-}kBQ_ZbyC+RwQi4qDD7DikZC!FbP^&cDFH5(R37w>^OWL}m@JZ5< z6J2D|)+KFSGANdGQQrjnK`jVzE8D&a3a*xOq5T-9^`qaT6 zbmW4jFW4R>V&5QnSTM5^G<`vC`=IFy@?c3w*P)3}oG#Rr9O{w`v3!MC!9uKMAr`a{ zYg$}&*_*9b?5tmw1QXzz|JJMyqO)vbvutLC;Ps`t=2Q&!ni6%qrQ?Z^wTY3l>-xGG zZcT7&e%sLT%!(n(*e=Fcg(kA=`i5CQbUhyy4+e))UCgOuUJkKtC{9?GPb?(jfaQAl zVID7Zed-ELu_~k~R72ONn0>0cG{yOyVx_FvrwQ|Tsp~VZvSMwiJZ7e@k5#>onV0*k z%RWU!np3PW>f+C7USEdolEpZM8E;InE~R;K#rlzAiK^k?^@vrZ93>r3tT&2prE-V4 zP29`yXl_wuZ?Vp(U^%%Oenh}Jp001}`jp2{vHHoGeI&tr-s);ku^gsY7SrU%nh#R* zK`OMDvj1}O3+s9mkj*PLW@%xh?0?$+Uv{}q+5ai~KTU5d`#(h{uVSc&bv)4ri*=|l zGvZ1`PO%nE+5ai~KR*n|wEqr8PucsauJtMVKV|=Gt<2-OdSj^YLS6NRbvzZF$o|h^ z#$ww4Df>TV|7WPUCUkwW|1(UgY))lUDl_`4;<~O#%VSIC^OgOdsWJ@N|LHmw+5f2& z*%T|o)O;{CA55`6OtGk024adeZi;1YYCf3Lyw0lak^P^t|I<9#OV=a&Kb2N0tmCP6 zWT|pytA0tzK@~$kSz;AYrH&^mY%^iY{!g(0O=afiadD6eeU7V|T#%$%qND3Ox<1)| zr*`J2Y{sQHuA=$W5UJY7wN6XN+d7`E&-1}`k&2ReX0eg*#pq%+6ieNQg-7;qC?TsZ z?I+tV79z3yDLXqtEJz`is1VCjTER-JKC(AM1zSU@-VaNY61_v!TVGa7v1p3LW~kUp zECr9nN~~96x%#lyisi|y#m5>?W@WZ)lvrZ*{(o7*^?rR=$;2vl#k$tanr0R>u?{w| z21;!!m#kdHN2T(RxRX7fb7_&-z9F^m3m@(|W*d8(MFR#djX}Y`9fI>ywW^&-r+Y#cf`#|8Lsg zcG)#qGWE`|7ORU*T@rV_w zdvtYjy6W*$o~hqedzh+;oU|`%a(-S~-!HKyFU5P0STB!Q^{kP4#4>xtYIzjDJ*o>m zWa%l)`QfOd@l1|~yLt|r`wDY@J?dCFDt&IH& z>;Gx*kKXSi?aL>0PfYuM!~%Q7GI_LKKC1GU4t8PMPw8g&E&p+oT%a)Lm#z7FOVvir ze|>Acx257@<>0L*77KxHsdQ}lGh3<`Tl2-%dU#7EVJrW#^}TMfAaAj(*6ncolJ;zi zRdtJ1bqD?7E&H{#KHDl}-tvF9#Co?_|F>AhxBQXaL;v6P|C}$kspuklI#go^N8gzlb= zrfPB33oMD#TJqHK%c=IBdEE+{j?XF{EPwWBeLz;he{~5H%m3eMXWedHR{Ym;n^Hac z#`l%qS@KV(@;T~boKx{;skBV1j*^A%AM!QIkEQ(g@Onu3L~&wGwMl*QUqj>d$;W9u z)4e`azUY%L)cU1qJ^0CIDqqmj>s#k%wVqZyLQS=Kov*f_t@G20-yEyzs-8wYM7gz| zmx@|VzlvA5-XMR7e<*$=mUcHvrBqA4^&67RR{S#62>*>2Dc&X--VDd9tYZC93F7z; zD`%^oWvG5ZeYPpd_*DENm5=$`o5ey&Z`i^7?1l{uRr%M ziP^z*m~U8G@0L5+cz(`@g(uI?u9$L9ljmc7dtsBe@O}G`xlmQNaPoYY@7tmOQtq_K zXBf|1m-(r}eE%-`3B#=8+`vR=jX@W#Oq;)eqp|UH~Icu_V5(u`*&C4 z`Ig9a=noa<`*&HdR+v3-m-)=Xq2KvFUe@Cl#`{;xC(P%CNv74vvb`Jq;lgUR@aSfhVX`0)N&Us5=D zJx63sTu(l(BHl0KwS`G0)|p=`OmeJF6};V9zg)|2tpDC}oa9y{Q`(}**q!ykyR!vs zcQ#RMac1mSz)j5euankicdkcwXMA&au5Wi28CYk%L1Dgswfy7#lWAsmszU5e*5pSX z#`|SwbYb|fk9>^eWhD38gDPr!u-SEHXUCXLnH5t1#a`sm#1^-XZ4u7i4uWe0hB4BMQS`+fqp^%=a(2K7Zjyo{y@% zg+=y9Rg!X-gXd#>xA5Wpvwpg8@Os#pv(>LsnD1Y(-fSoHxrIpvsP)NKJwjo=f64lr zEtS>6B3q;i$gcGoe`oxxmJgg?lVqiDdC`eUcBu8}BZK7mNTOC)fR#Vmyf$NouT(49rVW~F!F-Phw&K56)`yQo< z&eBt7>B?BFCbFV=#gj6{I9O{Cv9Y*}c!?t7<&E`KPScQ035dSbf^!S{2xpk`CY-)j z{Asyh<+1hX_D#w+n)S6)Eta+muKuHLv$YUu8}u2OJHoA=Y=B zsqfYea?3j2wLvMSloId~WBw~zFwK<9s^iKQszLT`(1#o*2Mn<|E=x{7axa}lBDVj) z7)G|@w;^N9`uQI4PPw;RVsuEq#?^nR3th zzA?>8C*9sPoU(I``La~*8m@{N65m{-o2Ac%Jx>ru8}uO$ZaXQ--v*}{hi22)DZmY%Or+UHwYL8kGp35GuD;u5Vy>oo z;hH>5x2cTYYRjaaQhG1D|0()1$*IKEYlFVqrbdU=hQ3Pi)W1}!rq`z8-^b6Y?CDjy z^Ip|wUKIq)s|wQEpprcaSiGFAl5%%_NN|<5z4Jr0tHj|A^X7(15fux4wSI1r9FM1oynk>eEa=n$JK1Vf|$4yUXJBzI{n|a(vRvx|1;!N*Jwe}tL()V9sV;3h?qi2cDyFl ztCEYRWV$7}=9KsQAra!sV8$Vi2J6B_qmaxB)MP~-B#s9CgPgoy7OW=Y(@86q)TAuP zf9A`;)M0W-pDe7^S|ce=mt3lSD5QA%i?>e~{KH1Y!J&v=vRX}K56M`dT1S$yJjuei zWF98j_M5C?PI92uLXH2-mw_F^$pXEkAV9KRE4jk?@QLw$SsGZ7|IC+x>gc(7`&+lq zmw_?wWJ)nP7bU}Us<%nDBPXRB&meWSA?dHmtI$WR^kUix0&gUItZgO*Wv4Ph*m5e^M}H zve{!&fw>_6nS`Zm^qcfMC&gCFap6CcDw8#AlR;QjyeT?ll12vCR6Qs1jt~7KxlhTT zR^BSl%iCw0qN?~NrM3(5yuAI++b1E2>Ic;xrfS1UI^{{0_{r8_RW45Ik9-I``7)3J zZ*s9b=^{?Lhy{6791XgNlP;pV*(S-UCWSaAUBpQjaneN;PswE4$fS$-Az9_iU`AFB z4qe1a7je=>oQ&*DN=mC2Z_-7abP*>TXC_IqCtbuz7g3}rlRZz9EjyDg;-rf>+5b9| zE8b5wLFIqT5-%zTeDyxW3iMnB7q6l^YM)-mStkW+ML)3AF0-`OerYUWWu%lVQ9I#s zu5Bv{qxRb+`YUP6zKoClUs1p9l#%(_718>%J2)BS_6LQgKJ5_BQ79xo75a4EY%1sT zX|Gd)M|1*fDsDX89;kGw6*3g#SX#k<+cDyk9T0XZKMDYF)!uN`PJYzhp!R|Vd#4nebtx#Pf__HVLr9ni>d#$H}QVE@)1gkO{ILM z+V!UB0bCsLrCU_0LRvtzc;x?E53?!fAiA0_y`<>)n&BJn*t9Ic#`MzDzNnX)GT(#3#3iL|7>WV()s$KQ!`CrZFSL-XQgkI&(FX>TUg@Q1i zUt#zYh4K9kX{{ID!kE8DBYkUOyg!BEf0wr!!k9k_A6|bpUO#L8Mk9T}Q2RvT#p~sI zYBaXL6h6Fu`Y(kCub1>tqmj<9aPsaNwUH}1@ zswktg*kpA2%a47NkO;idmS{C}20j?VSSP>InPtPCui{D1lfg?T-LdO>5boO28Y zSPR2GjX}M-!m$5^Q~$qt{cH~|OnWdW6g&p~o>(|}KcvGdO#4;0%j-X# z*U$O?b))cl8DA?rdHqa`6ox-p!%O=WY^Qm3ZG#x`y~4C#!PxtIT*C0@3&VdHLHeM= zv|quXYvJJiuoCUP9=!fZ`xR`58o^X=VcM%;Ie)!wA-s6~Y|j|Mcw=GOE9t6^U~PBd z0b@W zL)2<3g*Weq{?!m&!7~s1fBIL2gZD%KYRJDH^3R{S;r-CR8cMGg*7cB1bV$v$F#V}1 zf2!7Z{D1jVqQf7`S`B5Q3e$dRJzkjp)RaFZy7(bFk}>69iJrZ%u1EgWWY;Gxbmi((F|7yv<5*^^1 z-uVCYuX;Ji)aoKAV{bdu~OcVWaQ|-Fip9^#SR#?Z|Iv&@DuEK4KKQ*lv zn@TLTzIa&2OC2wDJjQF9==QXpDXizAd_ogFUsF7+kuIQc)BcR-HqPG+i%VI5D;qw9HyeyP!4EUfVtonvE_Na4`_q3hB3E1uaX z(^t6bcp87vn-$ja7%y(hM>UCfH_`Pp#ZQ~)r<&-Ho9NV<yLh_Dk8m zOSh~b4>ftH$wMU%g&CnB4;4IA@KEp;F1d^^*Akbd7?(mOmx;s6CWp&qNE+ z#HGmhWrfFOC*~!c#ASN>L#e{dl=nR;cr`7V|Rjei>J}YzM73g@@uev&Q5yBzIX( zdYLd4olU_Y#GzQmdKpV7I0=K0huS<8hn-a~1$n5&Lvh$y<6Dr2N**eCC=NS|X)a@B zmrNy>Rm%#LUv^ervSwUHGzA z;y5$SqIRsyiuTJG*JVokGGcPchj5vXDab>?nqL;>U&dxGmv)!L8ka)Omvu^)DqUNw z;#;iag@=A0`n~ISvJx$lDLky-kA6S;ee3sWtpC^jaM`fM(*AHK?)ts!cd~}m3&P*& z=0B|8w|?LHed_nA-&4P*eh>XlmX{^gAF<*;%-_Wtw8+|Aci5v^ zq2I|mvc!6{sFJazDzFtMD17w0z8BV;@5QwK!}>k-d+PVl@1fs^ekY6AYA`Dg<9Det zJgnche&70i>G!4IQ#kbhq2GsoANsxNcd|T9atCawSZsxv3UB?s_50HAOTSP3p87rX zd+7I}--mwJ{fTvIQpILV1!PM#aEqmRPdY3Q>-VYOr+yFp9{PRg_o3fuPp4SbCRu>C z%FwpRMG7zdzV!Rl?^C~r@X-Gc{oeIE?dN2f@RrJDVcieQ+O||Pw^TL@PyIgid+PVp z??brj|GR#ty_{l6oMat-Sij4jr0faPQd?znTP6Mv>v!3gl>K0sb*s#7t99XF{?0y# zhxNPeFSdW@_bq>A%3qoCSEO27SoS4kKbS^(Sij4@gzTqS{U*bZg=Ig7?r+F`%3qoC zSCmF6Ec-HKUsz^cxat38FT3mu)3AlNewV%MvX@=G!4Ir+!zPNWIVEnsFn@knqZ5#Zjw)r7FfSqm4OUhRQ>i=s#aSfg1)aP|{VRzPfu} zy0|kQqm7QCGUz4R)<;icRvZgy^{&>O-Yva*&ETTy{Bpxh3uD@`w$&ifO!e2Ku=9Lz zbIq8ZMkM%8G+4)wjMB?f>KM)ZEJvmCcUwAnOZ!5Rm4ETX=%AJZzvBCU4HNF5P)YF} zmRMGh|9Ri0g;81fxh6R|D%3vPLe!!S`X`PT72ns}UlJt7T zwl=uQ;+Xa1h`(oV)x~d7QkI8pqLgW+@h|&|c!ajHd2_p5es{~cH0u{y(0i%GdRn4r znl15pleZ8|fsClN+q(D}fE#F(8 zdYU9DyG1>_j6u3@BCbC5aA#IXsk?k`8G{hP;`73FP?4HGKU@b58H11@d%4_kgu6UUowLu+C-7S2g zIDb7lG6r2Dl8?_o2X&uVSL49!7JBVcTAtlxUfs<`UE=4@Emu3wwfxalgIzl!6>C;# zZIaL|XIr1Ts^gDhtWZstV5Ide3>A%*C?CyFL|e>4HA^T zX+@}%WYP^cF^R)8ujnN6c7E!4#p(KVoZXp3V^AB^OLeRbb=IewN$T5Ys=w%{!8?yC zsM4f!Wwa%cb*w>tR8^8kWdOOaWwHlr;X&qAXMXz5< z`dM2Y)G0c+x1s5vO`2ItT{*SEZByGaRe@v`y=eXf#Z)b{s=jebN&V7 zNRmP4CwTq{?=RruNRqndCpi8H<2Nw>2RW0Q|7Y08V)#d*W_?U9s)tw_hIB%P)QyH%$HwG}dq`AxQ1q!VSG7Y~ zhmlK|ait+vh(R*I!VIDecE!CWcV160P%+e1q%P>fT&7!|LS5A_F2wVzOJ}GHXAB0T zhPq7BndRyHRk!%buq}rT9|>6M;k%Rc+C(i zSSJDP!;P3Zt1cF>PSMiBjCFOf)O3ox)#xy&+gYGrm_hJ<7)|M7rRk(KF3hM$=ge4` zVb)G>?u8+FJzPZ;=J(V$W3Zis)rI+eca~4}&eG}LSv&nO$Ahd6{bfvnDGJv+i;=q} zRl0uXLZf>uJ5%oCfp@E;;?&H z7h0FbMpsu@Cp+}Ryq>1-x#@dld2JWVOIcHO*M~YnxQX?(F}hM%U2|g5YFk~?(kf{Zg=|Y*nM+-;V$Et| zb#06DZ(HiZ7E4xPju(X%3ahJgs*852i+QRGkmZ%VrMkE!kJO~{)+o|hSY3&dH2Zd? zyX#zCoh)B`UEMq%S%h9!wz{I#H5%^Pb(gfdKqaKv#L6mGtM~cxdOlso>f-)%{i@5l zbN+2z-!nzcUl%RU-_^x0nW@(FeM=U!No1+9ec*MKUSU2jc9C9I&wctlF&STGI6uvq zY=!#tOrj%;x(b)l*Oyt~%M$y`^pqM7hYDaH9>OV19tzgx4e3fg?Ee?9N9z2k|4;qD zcs#_zHOYhUFnK%p1wI_YQ&{7*_5WM{zy1F@zj#98=>NqtLJw$&N&aD;pB~i1LpX(( zFvpt~?qU6|_b2uKkOzB+$Ms?Tzuq75Fp1IrVe!7C#)G^)Du{o)F#3P-%!v0%B>4~P z{7e78_5XT*QtuCW$Hx@U)Fe;&5YO&VLqTEj#86!KVgCQr`!n_aOuawiH7l(D@BaTK z%<*P}#lutB|1TaN**8|?KAxX0tn=ypnd0eT!^p$_f8F2I_=;y~iq~f@{r}eZZjJBO z|BDxC>iwB|f2Q6amNY-C|ChaCIrk6`^_Yu1V3Qu=<5B9uA*}x&`hUGIOYh6l`$FE= zhf{b7b9}|)HJ9eArTJ=UzFL~EmgXz=Vh=TBJ?#J2`St!R*_$PMvot@M$7*SQTIRvh z{IoPbiI;G4h4OF+`~St8rT1sczHHf-t@&zezS^3vw&ts?`AR%(D)$(w*%+#L7%FNQ zs!SLvFBmEr7)r+vrPzn^mulMK3A3pMjkKUi7`RZuCI-0MJ=BuOm>S(??ny-%LEAbxAbN&B3 z|JD0+2eH3xQ2L!MpFhuTHHfAB13ZLLv072jD&coEDt`qGE9AIe`PR+ zGdj-5C_^Nkw4n&BcmxO68!v#X3>_n|L%Knq zjNeoSlKyHRihvFA$h$qRhLhu?D&Qddt((vlDLV}y9y zg1oqenGp-=7=;?>LJ_whAxU9I@M18c8R97niNS|5xb=DRdeqMt$`FUDv?FNG+!hjS zC@;(mRdBstcxivW@1Y2AD5DjMCP?+X*NdeYx>~-cU9_3_a@aRn2M06GJZMKO)=EOFx2)S-L+IJbPjb^s3IfP z*C`G4!hGLTx^qbahr+aHDV?TF>CRB4yi16@Ff($gjC4-*K}eS>r@AqwnjqA&ooeHhHl)m0_l@WRZni#NPW zm$^%N`RO2Of3>Mx^`AAhc*t)rz3 z{~YQ-6Hl-RKe|MAUq>UyOT5d|&3U1AK6e9$b=TA}sEnrK_g#cBudS8i#d!4Tj8C4< z@AY)f&!;n9e>yAJjw&)vXGPfQd_JesKA%p)kE4o#)3`n@%!Jr&0Il{d`m zZS3?qjq&`NjbR@PpV?jnKC zWxRV|=C=xy5a!a~Ebm3vOKI@3h9uy*>}20ZgeZsp#y+e#+lNK{g-IZ!PR7C{kUGqF z?8E#|VP5|cf#V@>`#!92+((3f^+E9X)NR;rK!8Qiwz*#0oAJMbyxvVN&AnN{u{Y=M zz3H!S^)Jqo35WaE<0G^yMwyDg?atKinYQdTnkp97UuN^b+8KadLuZV!Jh5Hx?T~Gg$Tle zY(ZsCIG%%rHhYNBaxgx>2N|1pxobQ=^Oaj21%-M2gE}I6h`?@$5O+|AVyohP4_0t) zRg@Rz^*>=qVqSk|Gg)C0ly&OH@6P<@?#%b@&h=bj61a8ho)_lzck1SDkzwsl|9$tS z>leXV6Je`#4R>REU}0WwTfE*zo%+HY&qkf&EuF_L-I%Qky}}&N#tw$Ux_;{9F0uYE zm69!S*%wB9pu9U=|GHfNuFHCW!js2mg=W1yi^mVF@7Cpdr?9Sn==w==ur50$-t->C zy#7PN+rn7CuEX{DI^n^$3* zyozI`0(rGTr%k#7{|eb%YQ4xvMS|iJ8VjFx__RZ(9lmtwF4l&lvkc#yq#ds*SriXAeR?{+E66w~)2V*ZymWFk7I?^dOSwZZo?r%y||@F!BO zOBca&ZRkEr`(#XYmh`2k+MXh}PMc~>iuBE;FFnP0^%Qw^+VE+8x9NK+`>Axa^8d9# zrw!k2_+CQ1z)?@TX@gJeGTe6lLcjhvO^L!VUvq8Hy|nac{h*rfwlTc;HuzqePus+aaCNuk^SWk7_cHEM z6_?2m$JO1AYbt~*Mb+ByDHUQa?!|_5THni|THo8C)1IT#9?^SH5o5BWQswBXHrI?W zDdRP%`K3y46?V@h@*c9Puq7UZqI_|J=n!V*yGUW?4si}S`}WJF415+;*w*&>h1B2B^Kl2ICJi|oFO zLhOq=%!~Bfi=nE;Vg+e@FP1JWR{Dy@e6h}Vadow*Gq4y`U91USEO`)tdO_R*r^rP@ zp2ce6@&ozLyal!jE@nm+6Ge-Cd5dMnYx1gCRIoVlFNW!sB9d&uU(8=DlIt&amn|mu z7Hg>%TfG*!nHC$l7VE4QJ+;N?++q@Du}5k#CAXNNUo2QyT{*p#^64d$HJa(J5VwOs=Dg||pjnlf-o@G*ZVJtWCHQsjJ76I~&k-6@quDu*p0DCt^uW%+aq%vnoglNIcnT=Sc} z`g>6Ciy<~~W^aja4X1*c3Fbvg2uwO*l07AqRG|Pe+KvJ@8@)aDm;~i#i51Zi* z!!(EFd50eIQQ;fKkPbN~4ny6CKJa1B)?o?wVa3^D%;L}#6y&~(kfXt``_Rk1U9y+;4ECDFlUU&|#;!KD*;*_3vfmAXqJ z<`kNzn|H||br~wX?0)$OkuUBmd31@?b8uLvRS^5KEBhlQ&%q(9_hrYM#K-OtRS#v# zneV(THoffCz4X2=!w#2)OP6(wmjZB?{?%oi;j$Lsl4R|&V(3F_L|do4M{6QLlXyrI zp=DG1j)RF!0%}d|cTI$KDu;OlQ6jJsp;kA+l?dL{{?nwf(YgrT)IQKe=-22NJ%X(! ztlvdIruLvVM2M$Skv2p~rgnoiM4&Z9Ag3f|3lVmO20z|b> zHPTHy0!I->ijeMzAn=I5?})JSXuOmyXli$EEKlq0%AQKJw5eUIT@l<}5lX7vvPl)S zNrSAZJ-Vr#t4pl&O8_eRo-VqZE;^eo`iL$%t1fznAv%yD`i3F8k0CmsA^MFWdW|7^ zjUoDsA$pV{I-McPCwh?&-Ja-YzVvrL(c6iRPIN9Q`koX$50y3_ z9h~UUOb;h|q!fKtivB1?H}a)F6J3?)ho8j_vCEyZZjF=+Un1;gvnS z`W~;o$E)x0iq4Q#lVhZF@m zope)+>xI`XqW@3*Kk2|0gRVmf#i5L1;iLa2y_QmrwK|}^TZ7+UVO|djf*}RabbRcN;nQ>9EXyM5A*z@i(RBYc$oLYI8ouN|EE8)M8CR3uew;P z{Mda$nByUO)5Yjg;idoI`hR_YnlBf{U&j)iXJP*T9QuFS!^PUt$5zDanfe~5zDLp} zJ-%X9=_9%k7J_y8XMIZA zcwR|AU(s<$K3H{FwfbcP%u>8_iLk!>l_chPFV)d5(V56!D9m__+RFGkk>6fawB?=8%Do#u1WS!pxT(~@uJ%Sz7kx# z5;&7|P`{$%yy_>tY!oQW^=u>Ebz!cj8(Y8&b3NHQ*Jq6diiNpeY^2v5>Q5SNu*mp` zj^Z$WP|Hi!p&rsXJ@<+GZ`I3g(Lpd?yi`S3%TpHSt75H|rz}TS1Jcs>{FblWpY;Y` zx(t55uh5qJ^ZR`6A@`@e&@07N@vRDZS%tLpeLeaR?!Q>Ad^d~p>viP*s;a(QRmY_3 zf6HtBKdV{Za+_7We4lr{ucBjN1#P4MT9|ZTZHTT#Wu%RCC|iPYg)jF{6(MuJE=)S3 zHdS~$MQ6kLc`HA^B{*1E^f!zbmHXd0o`+SmjroVdq}yubE!kZ)*rIcM<<6{VX1C6( zf9C$8)9b81+@0~{S1@h*BJMA`ug>|Wu;`LVkNN16SVk>+!Ol9i-6Ha$X z>qq}2I;iCSqDvdB=X&&0tkovHYGKh)F`vK35WN=bkM;=BYl#kVgi5%BbbcZ_*+X=M zW4d0PbZ!z1e--1bJ{LXKAYE@^(S5Npy1u5y{YA$&t}50q?MbLNAXNSz%s0Kdbnef3 ziXEzf7u|bd(qRSjC535ELUfm+Uw?FQ+@EyKJ47EC%s zwp?z}k#A8A)$*Toi`n?TB+I!A(|)Ar?2>fAh57!ZItfzramjj(TD@~VO3GL4qyThb z+LN4ozjBG5aPj>~(L*NJV}<#CrRYbK0^;RuaX!jRdy@1|_iTKBW)uA+>pKh6o=oOf z_hdfv(NA)J(HBb2@=>ecc}UK0OFnN)uC6fOFUf}$79A$*D+-eyac;g}YHi-CH`tQH z*@|x$=KH01bz#m&%9j_WzoU5dqdVpP!*r>%CoAahEUqv2VsZWphjD+#j|9eq zX!D=sjWmhQFpUJu{N4u1(wu2s9SIMVC&5tDP%qeHo-k>Ks@sRdKaUba92wSUx>Ev5 zS0%^gvP|uwB;C{8(S=>9z_ccro7PQcVmh2Q#N*V`Qaqwsj3#lV+Q4v0xmO?R77_9G(deSF8eKy;l>UgJdikjL zMY~K35}6%oNRlE3C;7KQqDl4W#E#S@3lw46rqB>ZisdaN&Gl%TM5wgIeiPsZ`%{%|#71%vq-F*%MC^SC|Mt zLwdtcKS}IJb8AfJ`H-oJ7jIsPFs&gKp=Fk)6&gZOH$0vvn!dNxFx#B8tCp$*%Gt9z zQ#W&77x&^8M`(RjdJ4PB*YNNXe?J z)+l>gOUpd83U|E?l2dvaJ8obF)?=AvO0_o7FZ>6lT8T(L`rD=r%**9qA^&6DmC)Fb z(pe5mA4Ss;hO)~U?05bb$FO!ulOMB#;ccKBm|@ANIMT?9Yf&(h+h#K>^RSLrNVqIj zDp4yhy4Hnh9BZ1BxKwVbXXVkDCP}e+PF9rviyS+(L5#{;v_B0oiFD1>m`XOgHYkyo zQ;fN_L7dC34NFl=J?$~t!gRS+iY2KwsNwMiY?!ekXU*Ti`wK|oq@Q5_3poA)#&2N$ z2G0K+Tz>)gAL028y#Fysrlg->{|y{}0pmXh^Ec2b>_5Ww8@T@hp5MUx3rGgNzk~fR z;P?%U-@yFO!TB4w{s{M9!1JGj_ZN_?e?P(g7jXOqj6cHs3poD*uD^i$H}L!h-v1aR zHQ`UN{|1h~fbpM$`5QR@2-jc0{TJ~32HyWVG#BBoVE+p^egoq-(1m1w0q1Yv`u_}j zXFFN%TED3rr7+_mo!pfVA7U<ruN; zXSv@Cq3C*)OHpiNC`4PBNg;`!NDZ#AGIdQUBjqlJL_3DsH%8a_yUwq6q``LP!lF}` zy7K5czX{ruyoll8gDGUsSS;y4%OkEG#iTmR_A(l|URI+;NdRSif zWS*3|R)SbTy`lDmK|cPM=~l8_SzCpPhUk!n+Qr6p!rIAaWw1(4dKqnE6%AaS|LXkL8WC2NQ1(jg z8PTlDp?pVEYENO*XiDpT(%wojsMH|3u1EQWPnxtV<@rfXf6~l4pK_O7dQ?N_A3Fa~ zdT=ODB{8!Y7LL*R$yNU{$4U!JQ%OPU!n~f)^@y%6EMya$Kg+!9e7+v#q0}xIsobC1 z2P1VoN)CnM)S-6na9x4gyJIpRqw>s$m8(@AdOD}9tTCFVHH~}&Zl8j zsio^#x}IenGiO$H{;l&Xu@-7StJgzmS7j=<&#&T%qw6^;Sdo5W9A)gu_Woq*^t-Od zx_?Jma~BbaelA?eVbwEbYvQURhvkGX$Eq}~u1D>jaix4IdeLmkIH}z}n=(Po+1QVy zBhx_5saRbPD{i8buH>0tVO($FM$C5Ncm}g=W`f|e&TY(q_Zu| z=W#>_bwnq1$gn=7b6c3t8TFet=@8Q5N}~Vj}xTJI`p>-^LZT6pB~Y} zNpklUX7l<*e|LtSN7wV{Yxmm(p*z{$hm%F0>yP{|O(5IPxP4t6D z^k`f3ebiNW^mW3f=M$ac65ZVr9jNGWzjT43cTCami7t4h=96APS4!E#dddQ{|mNoV=!N_cz!ep~qPvw`Mq$#uO0MFi*ZBWj4~@or%1ghIo`gb;g-Ji#L=&+l4uY=m?g*iVB(Jc?smyVI7A0DDZ9!YxG zAv*rSu8tu(hcQI|`y2*f%83l+&+2wae>CK8*MgGw$9jRnq{mkNX@vZ{XrzY?3KvZvhWv{lJ>m$qQy1p*4AObNbT;lclFu_3pBy1N>0m=hVfy1i+4rG%{Y!`QdRT>D zuM_FZgC)L0@$Fg-@_7dRiI-l##H6Q6`V%kxj^ii&iNbuI${( z(T}I(Hj@5CVa`9P^;wb*tv)}_KgoPdVLs2)`XEI|o%AP0(w`{I=bQ3J^YZzo{D~=l zV#=SGUH+-)$7Yv&O1GSgC4Et0bc$!W-+P~XWv$d^byMgGJ`UPSceb4kuc{)Ffw zzVaF$c?&+zDStu&_iuf{_1s6!LG;?qatD@Q5dFO5f7Z}?P5#7^Ke6ObEH)Lt^`X|A zqF?{4H$_iAx7WKozWj?N|6<9%SX=b;TkC827q!0s_v4>V`*K8obvom3r!(Gnl)pNi zaxw)jJF!hUH6Cv^C|1&>P8208Q>j6$uowG2$r<0uj3nPATCg=ZC z^8x;UGM-a-@&5z%uQ2?xBYS>izt80Q@3HIRekb!qNAf_Yh9`(I-=5Vlzn(?^{fOTE zh+gL?URIdj+gh-fM?^74@xmi{iX$=mvsgcS6dx^&`R#1hrxwQg=}3zEsP+D(Q~3X! zk6$x8#&?sB>4-kNyiY1}Z^>$JX=5L*&z_)n5^w(hkf!&LFn7{E*9XD>XFUF~A>)1z z{qu8}f2h?d?eig>+DmugdezB)L_b`3@&D;x73TYNSwHc@fw;fR=GZ4Ej{Cc;w|s2P z{D0OD6sB&p2mlIG(7D_l{y*vVp6WRNpY^7I3m+X0$b~)vD z?;u_K6G9)vlocOS!s%1$=d3TjgLJl6)HyF5&g*AO`Hd$1bmk9kFkfDnI^e`ms&ZjvG7PS&3k#(e+Slm{{2r^)zcVc6%gwy@7va+tT4i@4v({9NJ5 z|0iwRV@K!pGv0L<>E>Sg;Sg{BKkI$2=>G0veQaUa>nnMe$3%wfUsj{vQno*mr$3T| zX;=CyO&w5AK=5jVBz2mKKx+dDk7@(UJZl(_b`ix(EtD-3B&<4wVsqtkV2{F7(wT>a)y4ujS zi>lYB%ANXCdrfU%Lb^7D#zP8N^|}lV(2%BDdG$Jm=+Nfrp?l6ZL%c^r?Ws)wmEpns|Cc^B#2S$OObAn6$D zWtb`kdK)AjxK?N=?j>3V}K%v`cCQ=OFcnDiD- zF@?^L*5s6!Pi+|9MRJxiG!xFDnQ#u7oH=B2=CGz^m&uu3CTEIbt~O{UEYF_1q=;i{ zgJ!}!nh8_g>~Fw09x1uN+8~pYVl8}vpW20#;!h$}CdX`}Pehbsn(8a74blaDv~=1a zlau1UtB0@GG9{)~8)R~_>$ObjwLNAx?b7sGn)|Lc$mE1r0BQr|gI{dl#34ZJTQ=?$_$p&s+vAR82+*D|CUS{r18V(7Juu9?uS zsWa4jRU0%DhPu7V7RUw-^_SJ$K|Kf2>9f3GGdsFBwIWzMy*HH%4uyFRbzCSDwb>TE zMf%RB=Pk(!wm@%Ilt>TpaKbqXpDWwO;N@Yh@=>aX!V0G|kwQ+!!?K5RHJDXX(vLY8 zDli-h&PgBf&_z3>w?3(C7B7Pi;9=F4{Mkt2@nv6d(ouFtswb>MAc(Fo2Mv7U8 zdYKo$Z1cGED6Wzao0mb(S4C7VmxGrQ2P!wdtY)|@_PShqURwTgv~d}i=Vc$oy>xe) zXm^_E>RY1^sxsxmjFC0b=Csb$M-y#M6HQLjTBM2QrpYI6qPb~9*Tc4kE>*h1x*l=w zx1^)e6u)Xx^=_giXi}YTq9tfk=bv<0njEFJbbiqYw59P_x*jcL+t&4n2B2+Sk5s*t zhI?5yUXR-L8`}g6>v|OHRM~b{8+7k%do9fC=~R&^%*fyciuiUBxOb`4 zKOQGyI-GszdL#wXsVdjIByGBQ>wD;WLe~>qsdcqwnE1S>&M#V|K6U=7^NYslg>!)w zHtEU^bS>Ze()DazkJ>!CG8SEo;$3N@OTSo0cKjgb+Ww=Brc?C`H?rL)7%L4>`86_Fvjx<2y9I zLmCdlf&|ty(D=&UrBve!FJbN9_5WS}Z{-G!rvfk8HJ*}@N`*dC{z@tZ=r~TYOyObe zFYy5jJZOGNr9V<)QYjk4!uo&dXLLzUD4UYqvKGz%FPa(A`pf>L{Kafm4YBaZGS>b} z`)hn#>iQ<p6R>5AsfqG4AwULP7HYtIa0?Jt@&&6n2q2&KLqzYLW>yEv7X^ zBRNG|IHg{v^n6T>r)XhA6*JCqs7$f2#?vB(9~wl9`e{5Za+k+gG>jV0l=fO`J^%oK z|9?u+;#ta2iPK2Y9;Vd+!&v)EY*R%lA(5dFt#G7hH$Q@g8sC(_pVrr38cmJwhj#Pm z|3xdo_0YrXmw5kx&ky+i8T)$OCio*BKj8QQyG_wpZ2duV7wJHAt z9zS4>&&gk6jhARre#ZL;tnoS?k00>Wc-{U>Jbu6$KedJaj5U6D{ebr`vBvLueEy8t zcK2}q0goSW2H6kb2z{Y!j=Jsv;cuJKaa@fVL@VvS$VAMpGEYrOjYCDwR#kB6|@ zq`$cTfX6Sf#%tscc>aLb4|x9)dwhPtYU>xR*FWI#OPs&N^9Q_s!21WR_bWUeKj3UX z;Qj+1KVZFIqCxl>&mXYHEB7z4#xFfSKVZ?6eX+)Cj$dMppJ*Pwc>aJjUUUBvYrLk% z;|ILj4_M>1#xJqPYvm7k{t|2aS=%b^fK}GhS5q60@y69}Br`Sd=;Y{DVDgfH1}X=?r0>f3s`ysn8I!A0y+gY24O? zLzw4(ydJ=#3#YKo7djr}opBjoDLjSgA6|Ayz4X6u2rps!zjy2S%r_TKVV(c#{H#yT zM%mE9Q}__Gwd0j<2*dv@yo4_?{l&-PA)LaSnEAKDI{(t~S%99RPk1U$!aD!a@sIBB z=>D$GPa4Xz84o;?At7wy(&{cAma(xC0Gz7EzeolTk8vsgbx;sqm0P3{ zDomQTMVjMB^Tp$fhH0_)pfG!S7HLTfllHFMC0>tcp%!Hv9<3SAH$*!|8KGxB`8(~? zSxvNUqE(w}mYJ+iIg>J*Wxholw?q?1nxMj@8Jnz!FRX4P#>);6guSnhi+PruxCcv}bG5`B$F?l^98cWek25HL*lg2L?pL#T%JRjFXkD!Ac zi}d%8WNizRc5@uQ--Gq}XOITFFlh~+&Id5pL$w+wP3K^I{?Vd}HkGuUqH!-w`!Ga< zDOvCHc=dtFHBwl#v-D>Q^Zo8@fh~8O^PObqA4}pS=KI|#19~LW_?lUHJ)|)&%=u6K zBd3dIva5%+lSJvUJo5PLjeecDykD-zkNR~AbG~b&(LRmwu)=(w8|x=eBNxa~zf|F+ z-_>W#dbQIi<9!RV6Tf|O}+Tao+ezis7^q3L& zyLlDX;`z9K?nCv;|A+to4v+uC|EoCu5C1=n^Z)Sw(|G+4njs`yp^0qgwghx=k*kNb;tJ)eGKonQTm zUp#)mx}Hz}vc}8(%<4D0>WBPdJx}%XioQ0k`T<4Q7+3vkSN)e??0#!KKlKZL`>}OB z>X-duo!|Z2AFS(9Kj>Be-BrKiRX^@^zx}6Q+*s#VzvQ>S{0gg|`qO`IyoBQiJcNBc z9G|29;v@cmFV^*_ANi<1@u)xfsNee>Uytyo-+ujobv>W{d|i+G^~HxEje~!{>j$js zk-a{$&qwz7h;KoJ@c)4IJk<~X8_=kI^UAN~cMU-tRP9*Zxnu+DG(h7Z>B6JN#`>w09LkL>Z0{r&Jq ze6X%Z_E~&!A$fy1;!pVUQT&;o;xwP1c*|aY_$*|PB@1u%;}BoiKVY9-q3)#1ELyQ)&|bz|W!c%N~FDJJj$0#Xi6IG-Q9b{AH0%7hXSLonQX(7yEkjygqy+ zx*qw*U%nFK@dGv=impfYdCMM)zp?QA0qc31PvwJkJ+jwd{uN!1Wr@`<+tm*%+3DH4 z_h&y-SHEdjzi3y#Xjgw|S3hW1e`Z(zWmkVC`z8KChFSfNUHyt({fS-uh+X}KJ$*b~ zulo6ze}7oltA4qzez~rGxR0!~`q{er$GZB*y86Gm`oFsP8oKxxy7(8m_!PSM6uS5g zy7&z&i`~Uf(8WI>8T8rJ|KHW$-_^e_S^GKEPe0U8Khz&T)DJ(@?>_W>S>}7_`x^Sb zhQ6<%?`!D$8v4G5zOSM0Y3O?z`kscqr=jm@s2_Ofdm8$lhQ6nv?`i0J8v34wz9;e{ zKCJ82_cZi94Si3N^`1lD(@_8B(Dya;eGPqIL*LiX_cio=jqT&)xY6QS=Z^gV^Xr_lE#ne`d^oD`wD$uq3?PGmiZ$Io;z5R%{)Z35mH&-AYRT%SAF7hkY*MfNNYwsAuy#B>{`}%s| zuN21jSC0$p+rqs5P5(GI`JD>m`>L-oc)odgKHbk|JhkxU^E@S5&Dt=MZBkbwadS{9cWXeC9ll|axZrIZ&$jb9EUpJ@j=PAeoGoD%)_H-`B zU*^*NEb^61_2)jBUY?KqN3UI#=VSif`onWKtUn4PKQcGt!B0o|64SnJ@&^_^JRb#3 zYq)v6d(*yd=2M@pHP6TTpC`8+#I&!6{hd#tw-LjipNIW?ubr63C%;tT!{f7FX3M1y}IP{F7KxCe9W&+@g3CcKznt0KV{8uKTk$Eff-M% zw-@%RFzi*0Hty%OdoN6KTCR7tCR1tFzoAk z%E7?YLn{n_a1ZsXJ%Tl0#@Dv^zIRMK-@^F*x9Ur_>hX56A9PF4r9N-=QzgHT!W2wR z=7Y-|z`pM1kN~THttM3LFTT-T69mR*GXMSPZ@548s`jFPvKR9~dyx-xOY?j$>I3X0 zq1q+!*-gEjCuGb1F2)b{X1t`l0iJKOexYWs#{IAQ?HcBneaO$U5Bd4F)Ks?=QtRnT z0+)P*g?awl)o;jn{l1*f_g(6LB>&mI>My+4{h{hl9k36tUzPp0lA3Ljk8R7sys$s# z2}S~Up1)Jy>CqdhpPA?H4iLg1MwlS-(UXik^a^Zf8R;Qa|@flg6B{110)s6Pb~<= zufcwF#edIa|8HUTZ_g$E4*H{K(H@^Q$Um?`d^3y}l>0*dgGKv%)*;`(B7fA`kjtgU-92u@i|<^^N#qU3gdibK4BlP^k&PtQ_;6^J$BXKU+yLB<87?3EAtG0{&vRG zZfAb=cG~CL8DGDB!akO{=lN?ng!>t^&xK(h@8J5WtSv>T$Ao>X$CKQj^*?to-u2q) zH!;uu2w;K9&vV7ETJAaQj7tb%g z#=D0!u$j-kn|e)mlb^Dz73^hsmz(>uzopz2>UAF}k}JMj)(7_T9wyNb5EiV!s>g>^m^p-R+Kq)9!7ijaqjoQIlR zhMH=Iio}PCHiwEthl*4s${IsOx1pkEiO7aT&TEmY|5wpws7N%_G!SY+2o)hqBt1e! z)v2QGkBF8m!QkCJ8r@=zID!#kdK_O+T_npQfu%)0dmverWP@Q=yv})s*vT z%5)Q}zOSpM0+L2O5!@uUh%TFE{y~KI5=&IMNwm62l(}tP55K1}N5T^A?Gl;(h$vfh z*&@yoA?Kp&VP0R~x*po6@@bkduWv|%cu3UzBht<9D_NBKM8I`^i8v35J`ag(heV@8 zBGVs{b zQX=Fjk@qRl_^Iz}a=tIj>tj(*e^oNVI-f+iC6byFRaFqXF#V^(yq-1m{}Sz9`kq+C zP*@`MOCszN+0CWziKLVd^ZK^Frya&TpPFt`O+Be5qEypWs;MjY)%CH7IFG(BH5KNO zeLDKy)O49@^2{Uqq^8?cQ*WxNIMu|QYU)lkm8Y84lO)a$^Lj*DkRnY;iJ&KgVufkX zru>JgCa@`gWRi6IacU+p=Vyst6h6YdzNz0e01qfdSBlR=?A znS|>~8WraE{;Cpylm5(gZ}3DNOsPrkAcJny#jzuBN5Jv@erMv#uttt|qpwpjKhp!zusiRb3Ht zJuyk@++P(KG3}{>Y=xWbf$huj_b|7*Tk@?Tg~QU1jhKK4iC-^gEJ zk>2xv>>tUW>Gr?m|7~IUM>*{e%Ab@!lzM+CI{f@s`K#0Zviw{5qapt<#!i!|M@Y`B!Xu>?*4JhxL;Dsjm0Chu3FXe`mYsO5N><5!BmY5fsP@t&o4)hGU@_?Om4PxVY#@kPbErsAJd@t(YncPf6Wc;Y91 zs`!=Gw~>k;r{Z~`_bC(~iskq+uSe_q2*q1N@ps3+hvH+3PsC7scqraI^nMSKxH#V5 z^*(jQm%B(Vy5f^v@xZQlvf@ojs=j;*uji9*Q9QSaRHEs9QoJo1?d70|USZnHan)q? zAr+9w^M`anqUudeVognIO~-3G9-9_Ml2orSk0<}HFprm4$2&US(eaLsN72Z_Ivz<) zo~8xQzjeH=-qi7?jyFk~A(5GpYBEeUK`P!hOxJRa9qg>^iUd}P=0M0z5BWT>dLu#QKO z%c1yvVeT&y#1RyE86x2uB2gP6AuFuo9UbqmiC6iWArhS-(wV|K-q!J!j<<9?u0IOv zcuU7)ey*^NH+4J~wH9WRa;S+l)MTrCXJH*rP0yjGbIq@X*)$xXq+WVIA-2 zct^)0DO-6TI^HIU!Jt1}SjXEs-qP`wj<<9?#uE$ccysf9ruLuOf3gX`CM@o+Ci0Ow zo|^23n)*lRc%kEQeLRLpFNR1shDcDx(D8I$jY?|*R4KB>%^-GeF&iL5tLFDl$I{7rTc)X+I9UX7$cw5I~kzipR zZ|itV`!DU!^oi{zz? zgr+L9UYJHmAz;m3bV;$NhE2hiD_~C_hQp>;lblclxeAHWrdo0ND@+*O;3yY z>AjerF3hH=H4l%+Cf;uo5JdqCYkaoGN1|eDYkaug+Usb1j>hL`e2&J4O`&^Tjn9?H z6pOSAv*~9yj!!M`*p$@Ow8ZswVK%*Oy+2#;&({01SrlJbqFQ=?w%(tu_h+-nd2f>R z6y|tsy-!>3)1DeH`X5iTi`S#~Y3qI3OXIcpJ#M{ETkq4>`?R;lOYhIt`?K}_?5pu1 zY5Ws8!}&#uQJCX%Sl_#!Y9X-Rr=$1j=zTh@Pb#dYIK59t@6*xybY!oN?A6ixbo4$Q zy-$bp<7>jh`+xL49lcLS@6(a}Ix_Y0~r2RDecs$v!tM}>ZeY$#|uI$&9{knRe zuHL7s_vtG-$?x;({keL7uI$y7y}EjTuHK)k_va(3DSLJG{#?C3YFdAqiaeg|*VX%U z^*&v{Pqbg(rc%C7&%B|jtnqz%BBneZ?N?!mz%pJ}m`#sOBEV`AJB{zt6J_P`LL##g z9d5K=&ph+~_&ycZcukEL?N?!q*V1^YiLXg?xzT=|wlzL#nr#xzZnRgY9gWXNgjeGu z5%fm;RaoPr_o?fB>P@1`O`^qmpFSeQdY`&P;5+Tt>3W~EJ}S)d>O&&Ov|p#|ed>Cj zy56Tw`*pNlJR((iBF{XY-lsk_UbJVghm^?!o-EdMX{=BjfvEuftI4i0#&5ZyF?18SFJ1 zltLIRDjgJV7}SLrtS}vHkQmB`vD|d91%1#s46c<2^;sNwW#Q={`I@xq2dP*Fm4F6? zjE0uP>R^N8Q0h{V2^P8y_FW8?#TESb6Gg)dtQdJvr&<}| zvQM3G0Ztxya^J}#BI#`_) z9WQV&HyE5cf|0Qx`(?1sGuQ^AuEl~l5qm;Z;-HSjU;r<{!nT96vRcc_&|b5 z(qD?r7nqHt5~GrqRzch^7u~9nkPew-)uhTM(Kl(tNrFy_dJ^PQ>uEuri1wM{igEb@ zlbS?IlIl?nmS9PyQ!LjK5mh-$a>n_eg!{h@p8q%C{U3rP4gc?i{l5*4|2JU#Pr>|O zg7bd~uKzZ;|CiwTpMv*)2cGsn1^fRSaQx?B{J#P7e+th3CAj{V;QpV2=YI;`{~<`h zivK;>|EJ*ie*?yU7oaaa5B>}_aT9P|W)HF#tdo_-T3^%FmS8x$-LY|kp zP0TPSgPH1QNoxKjDQlAofJp(sWGQqqE~-Z2g1qM7i6||_qz8%hmBj{JL3I|Fr7}c_Mz!$xs+8bPDno zI6E_ABKC-sf|>-qsE;LPFsTV8`@WOOm!y=5NX66o;=$%c)b5-H^D%A-`WX<%$C(srw}vs-()zQ&lA7iTHJsv0S~v1$hgco!MBK{H*bzp_7nbo`wEa~~abLz9fQKYo(ykB|>VpX3i89iI_r z*vT38@4ir8hQHANu@6>|M-CnNSKqHbSe}T}G1K4Qulvv6Z~r6YW&G-MmXs^!?nzht zmmm8dzwY0C>^w5(?kTlddgihWEEPy9wd9o0EoXMt6%^!&BvNb0`{hJ0L0{|R(&k9b zRpO%5zRV@5TiP7fLL_SmMpsXLLQkmXVK=6gi#zDNeZEY zyg}L=)_x@0vkUS>v^gKh%b*Ebp%d|bX@ZudjcI~dwVP})PgbN9jI$*K%7xKI`L5qdkke5N5^MO1sEz*|cIV}>opp#S|N#@}xwPuM=8&c}^ zm+<@s?mxoy7jXUt=5Jv91suPD{g1Hy*CC~9e}d;(Z{Yk3n16)v7jXOq?0m~^&2?#?u+EIJP*_6jPn zlY94Zix7%=goh%p#(_cJR8iYUq%5#11$ihj@EjiM@KBwV1R#6$7*X1m#@O2}qA|7NrQW+U6C!qna(;Nt5|d8&du z)ZwA{dT&u*k=cgy1$&DZ3}Mo}`Fc}CV3Q+qvj=c*F>>+sX2fPQyt5hn+2m)~3>TJRvYO>PiO7?Yzahs}-j}>S ztu^H2>sPH|wJy{0U(0a)su+brVT!qNHNof0N{PwTs&up_GoF(v&dHqSWLL5zyCyS_ zlc~nZ+@j>a>MHo_)vp+5CiN!Mc9X>MlexCZY+6Acnb8mCyd-fpnbj(|2)S?a*W|C0 zN5;25ndO;G>rCcxCKET4G&hs!mC1a{q!9XK#$+x&BQ{n~CNd^77n7Nb$yCMMjP@|0 zFquJ^Oc_k(2qq)>1^Fw@7d5FSgY%Oi^n&^oPf$T}GUQB7B9q+OlMFeN)6?W!HaSZc z6@kz&K(hHjOktW@(Nuu&ep5{qMZqno&BtcLa(N4&HX}(CH zJ;@(DDep9?&QOrQ(tKT-uRD2U&Nw6&nv{u{WXYct(VNuJoMa-JREU`*Xq%VT?7}r& zRQD@52{#~SoPykU@YkVVd1TBui=u-C`D^l5ShGcfxkY`$Mfrk7_Vz{c!$mEe)i!_K z`jtmMJo4c zrbWr_)fazV`W3_8H;>Fm)(?L@`jtmMJTjvYtliqI-P&~I_7q7q7mqB*+Vt!85}`Qe zvQ6Tey~M`dc_?zLn(CW@BbHQcmQ-z)RBd{U1$ks~f-@RGf6@fe^d~pVqBhH-Ho12; zozKlmr%hjUFAr$>Um^-DTi~4!~RYgD51b3b51$twCB({uc535FOpx1^C^r!f08e} zNPaj@>nQ2N59gCG;*p(QQW{H1F6FJ1)l$w&*|bimY}z7J>WxQM+Kyk!#d2w1i2^u3 z$ct2NQH1gQaJU@aq=wsE9Y1n@LLOO41)Nhz%)3on&ku(y>wTqRzz>HCTAQT41$ku7 zDO{EB%dCgF^!edjDFrCQ%UsOmN`y{PK`cG!1ugnHi+;|cH?mlGyy%TA`WuVo!P4$o z3=A&L?2D1W#n4|tUNaZVoM9K|(6zC8TAb+?r?$nZZE^CF+R);RvpB^p#_JX*k;M?) zB9r!F4tLR9FN%Ik(`eCbF51aOJGp4h7MZjcSt1wh)grh5qE%TmBa3mff;=)~GK`EZ zvP3S1!WO^K#cyyi*i~N!kIca#3wJFhf)Iv+@vDMK$V1VnQr2%%)oe3DwHcw>B<0== zN$o~^#TeA49PMtLe;H*e$Rl$OZFDa?0}Gvl)9Ljwjq)f5=8ktNsxk@(4NjCXLHc3J@c|Qu`79^WQ zW{*w6jm;v2O%9FCs)Nm{zs-u!&ANiks@=^pgU#yb%~JWzUH}Wxv2tLuZDg}lV3S2* zv$1-!lX|mMV6$hrAWy{50UM1sTZuQDcE5pNMSA}no@Ic}0a!-lI9oX`>c}VKbq;m% z$c|6)M4YWYF-8Ry8P9R(k6{)E_ZXs~$#n?MK{iHR;#aXRg``yY#3MWCq`EIfnRsNa zjxjig<==;eK!+v1Cl~*D@}DV>m6(qX%c4$p{@VE~m6b?5bpr3kAuHFRvcn-c*a>PJ zan?AbSvzEGJJj1fq?J45zB`;24+;7Xx&96(+%qQsIr-1Ye-56PqE@5vS4n8`=`)RW zm>@fp|2=fK4ij^SzS+~xBIG~wWuQO#bin={HmjE5pBrC>#+QK(^PwR9Vea_QZl#KiWFs{CH&M~Gcu-Vo{N?hu2eu$5m){* zJ-1KQF3+pfj4NPeCsoC&9Kjz`6hF)r;VjzS1tef&$O|fHnyi?6pDWEpJ{R(Dxt|` zG0$e1P(l7P*X)i3an;5usm+qAvQIn_tFu`5Rn9eoYz#*;l)Wx%Y%Z&J81`RG85HDs z6=P(ymbsX+sf?-OWmrrnE#|zW8^4%(TTI(6mSQdzc`oKOKOyDD^D-Xm$Tn-n*j2HZ z?_SK6FDCRqv3gd}v3Piq$;9~u7Hq5~N#`ps=-(~U6AT7~*?jW6fD)sn58PnAKYks3 z?9Wt057StWq z$Q`3?5U=;8^}e*;myXS)nCxX{>M|8o5Qkz}a;(EOgrb|kz%VxvN$FUznmjrpZ9zcl7|EgpICP@9K3JQSvw%Np0qV8~^V{4y$jS^IGr zleskJm&W|km|up5F3SrqJ@d;Vz{{TW%NmT!1;S;W+hrx&rJDKWI_0u(;?kI38uLqI zep#(_FW+|76kS#gU9Q?{-3gzmFy8mVc;O#D#CYWkPaPjGePO)y59|Dg=kILlqQ^HW zTQP%eOq2(i43kXeekbkRWYlC*t$Ggjr7@^KDZe(EC!d3@lZ?~P!NysJI%X$V5NDf$ zXeMLl%4DU(Wb|UP7iV(9oSaT4*G99kU96F6g%a_T;fl%kWwNPjHnz7mwp}(7g3@hC zR^BB`gi@-rEG=jUY?{LNe0N1L>FXu1E%qbCCL)^;0?ciqdQ*L&8ZHKAFHQh>K*cS+X!K+5VUeDJ7dSleKlpjAgP_ zG#hFDxsXaOvywT#VHiN2$!6K2M;&f1bJy_8hgRdydK+1UI zng?G|7KsEaSc8F(=zK*xUs0BUM29ht4lg%YDI8n^1jF_lO6u*V%FW~+STz>)QU%>p&!T8U?@fWcF1#JIycgFgZURQ{sNBw9PEDq+b`g{e*w>b4(|UPT>m*Z z{{rS;!1xO|{sQ*@9Blt}IPYJ;^PhwJFW~ym!TA?3{{qHe!0{Kb|EJ&~rNFoiE5gE@ zIF!OJ%x~+C#^X`Iy)Zo*Wl#%q!nhO;8J7*^+n0tz37IvBSvthfmYj6&G3iUZL86s+dP|JT|zCSzoe(a?EQ4@8jxw$ak zr@fr$p#70dKuCZg#M@NnKwWzsEF%^2m+G3_?^{#<55;|A@Iis|B}l5L~}EzE3`inZdh z`ae|MRMv&|=dxm=Fq65B6eZa@rEMEaSzeBkIZGCi7iMWhQ!2etPq8pd3mOUP3p2ak z7{Sg)O1GDtRq~cq5RG+KHJvHv+?la0|BjhsWw85D8ai3+(UTP%oy-S?S;Ep;GMJsU z)rBdu-r1B=m`bOe<;OXca~@#qkTi0r`Xy94lafLweZ^DK=Ji^3-6Gx} zNoYyDA6@^E8Z~>2pYSuzC;p!&l*|$&Rb}j9kzy(S2CsLkAZDwYW>4uw%RY#ZH$^L( z+e*g_)~5IcQ>1x?`8*CQFAMYj4%hp6*ydQ6B_L|x$wQi)RR2q1UjL!$W??>`BYScr zsePo#e3*^N!@}Fbx<57$=B58wm~}Z<)ihV7I9K&UnqO0O?uA(ebtyBGD&Nja&h5f{ z9(U<}n5nJR8H?;L8Qt=7eNmW#4UIfrvvo@9G`2AmrnXNTDzyv>6*T%AwR~X>Xp;(2 zla6JRd`w%qeo{WqS|0LzTlaT$y)1j4om2~jc|Vt8^ehvvC1$%C1NbM)K?(gXOudZKeBjxbpd`ZH>Sq^6qOW2jY zs)eOfM*sf_q_Og!O+8OBjpv(IRFjsd1dUSKL8`8vD$Sm%C!a1xpNwZuSH-)eehx{I zv4!h}b9lax-Wb`B=0}fv^7=nQVbbK9OZT^R{YTd?O`v%t0Zj^;xf-v3HA$0E{-ZFT z&l=Jn3u}t;`WNG)g(Vy&UEak?$R`~Z#1a6L*4Em(zoYBFx_)W>tSdb#@(k6}qrFr! zVPVT3Nv5Z>E#r2{2QeOBnBT+YdZr#X#Q~MSDl82pvPsNyY449Thz`l03rh-%vb2RI z(Z%lBCp5+5Q=GIgJ8ur-e^X7t^GK-dkk_p+-)HqU7nW!ki+T%F_f9?8PoazFlWO3R zr))> zQ(W^?H1<<;`cq_HQ*yqAr8da;(v%p*+|~i4g2+?;OJa`4-Wm_7J4%12u*QQDLsK+a zb5Ds2PR@Uu>*Xnat0}h8xi$ZYBen42{b~N%X}v}Y>ut@4oB8p=R0|Z>)7*pe#~!-> zq5B`Yf95xyG$pU6>;Aj$zw7?H#4Nj%Fq;G%o5rJQJXn7*#nxT8X}-3IC+Ab~mp{!< z`WLrrzUO?pq|dmHR8iU0QdnxOj4w=a|2#QU-Vgh+pZY1!&jyvJohpCnBelx=u>|bW z{YwLPNkCWmyTTfesqsiDvZmx*6|Y#S@sOJHBG=4QrR8{s#v?Qyil;1c<~)U5-ruml z%lUPYx%>IUJbt%7&G~ha%lygt^88)?Z0Gzcg~2I#!or+i7sWB)^=Nq@0uo zc*_^9FQgeQtxzkAPRi~Sra$#2d}7}JWShdYs_j&~W^M^llR#@u#e1w(Y-MB1B2V%) z&&kHFmk!bTkMrwC?3xW)^P{?*lA%_-YmyNBNo(``ikH>&sP!)Aml;}L2IrT_2CMp+ z{(9jhyoHajj>q{XWN$(;oLvluyj7fyX3FE0vKW4&(is~j=yz3Tlcede#QUf zPX_A|3m@H|;&qaR59z{(;>(3~e@Ev(I=|wNlq-E$_jh%E`u8F2`Cz@m!;W``@Dkp_ ziibL0+I9Y}<0~F3z5BvCzwC+D}LPTimEi{{5reyOmIKXrdo_a}R!cucT<9?8}mU z(Rwr#Z!N6*k$qXRFQjIASob6QvSeSD>2ZCHuk- z`G!U$*=c#g7W>{$yVizZJQ|!@57& zmo59UWnZ@Z6ZShz)mwk_b$j`Ex*yq>E&H-%U$*>{E&oLMvDmUNTlQtkKiTq6w(QH6 zec7@v`}p{}KiQWp`=b1CZ22ebet%f^CwrrKWhfsINB+u@y*aWsNA~8(Ur~Iga2MA3 zWpCK8QQj2n&uZjbTy5(ASszv26on5v>)%#qezR7i)QRk5R4eZZ_My%e++Qr(Ov2~gB-bggaiwvA~LuhE^o?;hEy9#UD?LS#5zXS}6`fADzhXFi?%A7$>z zkzWwUQ$K2bJ>-cW3-Rh z=7fR*hy4sk`~^q-YEN+h$Ggl&9PyB!%k_PYCJFCbmsr5S`(Bp=dtdI-7r_`!71ocGcX|o$iwyCEMGS{G$CUjBt4zf!kIMu zv#GA1VS&adL}O5-sRw`|FIEty8W1muhp*a2l90^G>g^+Sdy<+oX*lz^Z-;F{>@uX$ zO7{?%hM65qY>?aPr(!!GqsO__oW|ZawXQI8kWAd_I!>*NQz$)KICx!Bq6&CNuH6^ zv3tFufb8V!YNSs*Rmwx(u=@2B?suBA`K9HO04sT1SfZ2i2*S>lj>$55BR36XReV?O>Twy@Q(& zd`RrkAEe+Jq^%t+a2o9Q9c)(|TpszDH|CS}-Rbk#A;*DY$U{FMPuxN5{?j9^jE{Kby zbjP65$Dk=tx7i?_`QTb-&^>=zru`kcHe37T#&cW z`76vwXX2%^?yfTp)ag*ENwSj%u`>nNnX&5(h<4@=JJX+?+0stmtTUI}>8f>Rt~+(0 zJM-q9Y4^^oeWzd7S<}#2649yK*V$0gSx=(yNT=FHXR%DDizsR_F;S@TwX?3NGmOw# z72H`A)>%W=8S?I|XzX-8JL~ld@)lq=d-9Iho7q{)-&rZy6FyRB+)%V01#!*PpYE*r zR||5d(_0V&OCPwC9u2A!Sroq?iG=cLm)DacXoj$*Erk%cuBAPYNU&18BRna-= zTrYI?0(Q1ubowftzDn=GtD;q5maNlP>GV}Zq0^Z`>5X=hzDjQrHiNU>8?6etTI#g? zl~#p`=gz*Y-e^^r74P&_I;D;~DdH~0BB}UCF*yp)QJ_*PgZK@`g$*kV*D`s*(l!>t zv22bL5KHhFkmlUPVnCJ(vapb`3KlK0#Bnj<%5*G~wJc>|p~K*Q7&DegV3ub&K8sIT zs>(uEmcgoMmL(`GSYdgKnm5_1s7B6zM=TOl%aq{bZ}HM#sdVjNZN7Hn4F zek2wUjQ|iS14M`b#9|&%L;O2pAX##Lq}2{$W9@; ze@Bw@!hb3rhX@P|p z5L3ZYAVxnGq6A`~kReCz$9NpWaSZ!0V#xgzPQxg%-S&mj|9|bhTW?)Qx}JNlUqK)j z?#>RSQRf;00>trh);6~7)w^x44Xhv_Cdo;gX^K=x%5FFIk53h;1D|I~mMqDASYZcA z{E1^$)u>U2@A$s*w9_RzwiD-^@GirS@Y>q^CX%{)GwkK1*6I>qskv=%tX(V$wlRPQRQG!RST{XX} z=9l(w%q$-tGS8=>kmz~N?@(WO))BsF>;^_rGU+FVtdo-=)*ev9Up z<+ej6y@~jr$IZs`G}EUrWX&(j^P-5WVUrM4 znc@QZQ?~zPWn6VGr{@AiMI%u_EuCz+K_pl32q@O zU2@2p-=g`Yk0BO%E{0t7eADwhPYcVtW1)9!$h^NMJ`Cli=Xt*s($2@yG~Z40Ekdm# z?ztl7#nO46mah5knr}u+hs^t}5Ke2zJYNgpf`-idt*p}VSI&1`m=ZFkU$zAAw@UcV zA#;8!=^J7t+|rPFpH(F^RMK4xx$F5=mJjcAnReOy-nMR|g5kh^4e3+x6 z>!3xJC($J!@U*7dD%#$SLBbZH88n*8jTUgDnb-&d-hykWSaQV*78W23dxgJE{4EE` zKwn!}Yr@!_m~~Vcl?gtofU2E?BtB~GjDt~Ktpv*&B5N@@NM=1(7PeO= z=~R~8S61;?rr}g#BIpdNO%rOnB4>Ph!EEPfDRfP}EGol=S|ZRq zaeN3uuQI$CUc9JQ1&pVE-yz%mV$Z%q0;7;82nHFb7EoU}ZU;*w<7H zRaJY=>JX`VmD%N$$*Mz7q{u34rlVG3?n?))OgOD9Ag*-yROWOHkq;+|OaNE}g`nU) zD`|vVEf_!s!CnW?;W_kX=Z|jF}7PSF{V=sK?A8 z;HU)oAw&@YmNenZGOpi*KFks&Vc|4GyE0utK(E5l#hGU0(it+7+7>#aW+}3MU-WzC*DXrjTNtRCRKa{cI;$+}_ex+>hT=kJ z)sUH?x+L|_;O&r^h`6xUf2PFo{ge7<;C;x<`CaIYo-*f;&aNr*{b^henK_tCR)5rA zg1C#qLN3uEGu?S9>YrGcLoWI~bAFei?8Kt}tNN$&Y^E>reTe-xsiisIs{V;JH)Xz0 zWxnr1Y_pkM86@-f3|0-9#R3bRZ&TLqyMC|KPjWR3S^f2_=atb(?&={cd9*v#l)1zU zFWV_|{A=d?(|I>!^-pY+Anx!WZmSTekxW^6v{Aoj-uPT#l9{EuxPN62(|!*jC^MPX z|B%%`b;KbvNjk7tYBuk*Va|sF_JP5-Av0%J-0DN-`5owwnXkk7NW3osE8l`s(F6TT z!CCKtUa27~ADhm{-~{}T)xUE6Lsr^#R+2Uo)P}77i9Hou;SjQt&9hR|i~6VYa>%?t zT*wiMGtkvv;e8Q^6+83UIe%2dhOGXG6*|-7IerEwhRpLjH1$tM#DqHH`?L6S$h<#7 zQ~xX_4K8~M#Lf$@uyQ?C=)6BdS5Ci*y+T*{fa=Ozg%&zfXac#S=5|&o1JCoYs(;nq z4P%z?qq@Gp;N>_T*necOB&fu2$XUPVeG*kq5Em7UkQ+C|!22Tx^{1+d$Yg{eSN)#% zM`ZAG7LMrmOk#@kArE<{4u$U@vwqL}Ek^ZE-}sPKeZ$1QA@hEVN&Pb=GN{Bks33aE zoKKZE#mxI7GWB)Hyx*cKtzuUHD(Z?%R1T3IvLUP9sHlHpGlr`pSl?k)TyH&z2(H|1mLgG?L zTnUL6Ya&N=%F5d%V>&Zt#*q12=0X<^g9}1j2BIEFg;)tRK7?bhyf&e*3p8Dr=ZO>) zi3Ozs+*ZPy;H`yDEimaF_ygu06WLh6#5*_(9EL#|Iy+(#I?a$o2G+!k9VOJI&MnFG zoz$6`POQ$ZcEN*jARWtEB)A-0l$}pyk z@o3mD0~f;zK+C)}GcV1|_(@Lcm+aakx6KUWWYx78yN3unET7J*6;hEAvz(N4m|1_7 z$;gsPppxkp%|&%V#Drotq9KzQ;@iPjVm%OfA2VG!J3vlEG9re4CQM9b{4&c2-K{Ig>ErPJDlR8IA_hXGAzwU%xdZqRB*_qg1N`}urTZhgic$c z&=q4AWRaZFuWgECBKR6gFA0BLaOq+vFd5Q95gjEldkC1GWx1@4E6i+C;KH0)G%I!@ zB#NJ~AeS!fQWQ-JGGJwp5aw&hGEkU3A4DEM3`{z(1v@n5ELjGukq@$dFM~W}{l2Ju z8DPRch(Yhq1wUoAx5~hk0n2R0X<#psWf05Y-@%0tYy_)!rmXgg+G7A9%ApyVo;GET zUk0<$GH}BheukliokcpiPF&C~B|BK_CM>>HT*c0$mOSIV_U!L6RIO3RuM~ z2ag=GQ31;+2W?~;Oo&XHney8l5OOfsA<^Ujk;6s-vzX;TlEZf#+j2lGa#+a0Rx}^1 zg`W-$IYj7?l!If+>aXhea*!3hf6@Ch?P|sY;HNAh6!;fiyLzcsl;J!na zgH%9ZvB@Dza+)ctJ-t84V?yk5*va7@yBvsez{jrn=(_K^9IjpSp#bNQIsUHuO%D4g zoVX|leq7}sU3I^&djD1T`>OXB9^R1o{;S?!0o5q9y{G`dkky{<`&IL?YCeP$7zHDk zh}<(}B2y_qJFdqh*;rLkvf9h)kNQob17*ln?N#;H)LvJ6yzi0%zG=~X1fhu$b#lsV zPdHFX0S{F}4_Po_((e*IH$&!qpA=9|((jVsu~N|ctIj(qVJDG{dCF{$VC+-o_@&<^ z1*qj%pYde1CsZ|}3=Fxcy{7)U#;^I1z9;gMDYLz#`5+n3kg4A!>3fMt7DLv2B+Z9G z=8QoneJ?2?DJ9JZ6Otx*4n-#HGwqr z$Tm?|lL@gSQWG&cTX}X@I-G~dev?|}D=@K|uh-8QybK{tq8{S!hASyP(;8vzTSNa%-$bPxmJ2yMkuuA5D zN@wN})iU3Ro4s?h@6_(7vWKt0&F(@U7AG;au8O6vbF(w%H962IvSksbnNN-VihYfH zzl*C)wMyLk4BJ%_HtZBOaqp9q$eeULRZtu=H#_+uT{6|V*-18236Wfh^|4cK#m!ET z`bx{Dl2x-32SAT zW4APoEpxLIUAi)WvQySp)H3Iao82{V-1{UEsieuRWZ9_WwO5M%Vm_#!)PfMC^2dAU8X+o@*1qB=^4PDwzRTS#eV79;r># znB44DM4IB+O zkm)^2@}?%156$dkT5Bpt5OsVIb^4I$-4m5|5Vdqj@@^%inH zH_4242n#6)`)J6tzH`@jsU;6tfcf$+W~B#<`70E8*9tKKhRpXV@@AF5@s>dEZ;^LL zG3?1CfcGhiW)-4Lgrao*lJq`V<5RSxC|$m&yr>k7U$hXVigH2T`XX|RB4&&t?{!hR zO`*9pWO`GJ2u=#Qo`MQ)hb*QF;kDOAq!=nEsm$D(To!o0%IvI3eS!C>Du)wx%#g*x zk@P-FC$3rJRrIVDdWS`AJ6S&XKD_3FZv8N>Opc$$vXfJU~kpoIPaT_g&>nojd_U zrhd|^d6@J*D!=TCR(3iXCg~I3Ptn2N^*)M@^;H<>M>^ex*89LMmJ%jFSF6%+Pe7YMHi1FuTg_*jOw(N)Y^-f-*I|(^67Mz%5|#h zoxlv8X@cYZ8blm3ooT9_N_{7)T4#b?XFRm?Kbmg5vIwtb5nhQLCX4VNq!3Iz9O7N<;NJ-UB?w<63SUDGvn7hJP3bF36c?P* zj~As2@8Z*R{ayGUGT;{(2#Y8lmn^)YEIb>f-!H`kbjF7fJ_*5r)QGxj-okZ(tFggUTBM zap@-B3GoG#uC|L`!u1(s@B}%ag6i!<6uv_g*JBdiiPGzX99BE{Axd8layW-9JQ<~< z2c?Gx8OT8n%us|kLGa=b6mSSiuQnTPP`dOko`>*i6rgww9*6!e!f}y>L^{}~tmg^- ztaSA$SIKHWt9_D-MBbEBR{KfqC$%rq9tIGmtno*+CsK8)o>Nx)LG6o$y@-aL=Veqm z!IahhqV^Y!cTxL`+F#T@!R({Jx}(TuA|3csW_vU>r`#l~{jT=A+9%lmxIS0OYQL%d zruLiOx2gTA_N(5fs{N|*R<&Q%eo^~H?Gp?>X6?VM{YNb{s(fb3YES!(=iQVwe(g7b zzsIcoM*O8It9|V^k*^4UWXft^`!Q?(iA*Ar6fmk>Y07Fp=>4_-vi2YGsiv&S5f6zQFt#?j*``WSNqzJBp{r!+V5(g4!4-J|3oej8E}}g+S7hZB%6py z`;Yo=R5@G}UQSf`+?3URQTw_NMW&!~y%-r_nX=wT`!i`jChfnZ`%vT#k?G`9*7wtX zOxll0_hHihqrNp|wHMu<#;^UCwEvR!pUBu^(tVh;-^pQsq4 z$QXxQBnQcAU;8iWz7%O=6j5UoRbv!cV~n~VMII7G?h{4tsB+mUt9{*%A}fiaeN=hw zl-0iO%c%Vqwf~~-LlHzq(Lly1eN@DdDleY0+SmRQd8Ephr>ypMKN4kX%4%Qx(KM4$ z`%~nSQDkf}>V6dclzYf6wmJzV@TY9Ange6kTPEx-WzFW6*vK+K&p* zM3GCzp#2x5uLkW$ms2T0CM6|=y*;6g32MS$OY*NIk{QedHzl;ue=x2)_AmcP5`JS+ z0!3;nw^PDX>MBoDxn)|FFsJfJksl73_PF4bhD?7?mcK3w{va#Bo)uuv-0wk@A4gEfg}}1o0tEcv&T^@w_Rk60#PaH&qFURpqvo z;5(tJJhsYTSXEA1qY}^r?@=QY0IT3Is{DVIWP?NI`BMca@tOoRK2MsOA@lsHj3>^7 zHO?24_=9R1f@(T~YFdIy)`Cj*f=U)cb%K-1u^Y)OgBYsDY0C4b36Hu(C1?r1zeOdO zMJ3E7CR8@zmp0n(L*{wY7{0d#=(LFJNQb_~9 z?Sen)MG0MkFX{51cfpBtC5(1r!wtFWd7d}DGk(*%5(Zf$J7n&+&Z_7kbH6FZK4c|W ziO*vd+`y`YLj}u+T=jRJFRRMgSLr*e^qrOd!I^-jgf#BARq?=;WRCBWfW`zeC5Wyp zD4PjrdYgN>?)A}g#PZzC^qV*xN=UB8piu}<<{w|S6$D;Kj zviDdN-`R<$YQ5~nR~=7vyj0Kc#7EV?*2`}EQ_n{~@A#+Voo|b0>iM_CGqd6$yYWgt z@A#zt-u-vS8`a-Ve9`g4x5W$9zt&5Z|8qAU__p|;`rnD~>3OY}-T0pR--+KjUgzf> z|8u-ff8U9>y)C}x_?hEniU;h(&m7NsTRhCqtADMRo%oj4%Wk~O@h$bQ`(`&@<>%G^ zPP|IbYrX8opB!(}-?cv89#2w#JMkes?|6{b%Wizg@gv83M)_9Gdmp^+a zUgLPo+u|?ke<%K;=e1sT<16ZaC!XSXiJs4X-tiLkulr^<{_(bWhvO6a`%e7gZSe}n zBOHI=^FixnH~ygKcj603T9%+ zeqQ}+y~zHFLF*-Gy#%e7p!E`T-vsF=LF*@I{RFL_p!+9i{e;EOv;T$bC$T_yURRNF z((@eOFwa-YjJGHH4`+G?+oS(+URM}zs*Eot`a5U(1b;96HA#O;%4bT11D@9n;&(~; zB_hv>0+5ITn3(AbY>%}uGu?pg>H8=8(`I@Bzt^JmyedAvD!#mGJ$9WRUHiEyf2Jw_ zq$z*O_VK)%_e4|6UX?UX<^! z@caq{BAnskMw#(y@x#sVFy=QfJ~PqS*&o4#=JCz;2vsxb(b%5s-ynNb{DVVgzD-a* zU|^M24APf_^w*$#rl5S1z<59mj2Fz~neFMm3p!uJqWNAl-z?Cc$1lF$vdX`<%Kz5o zpCg}Y6d&tYPMq(BI8#IB{j$j4wGiMSF7#&)nfa=V?Ej_MUuSO@gn_w zk-oSne?|PxaS6;;K&GkMcZT6i;4B+_dYTRlIstJbP8VdnJP2^dGamrh;r2 zI165UrN3|byZL(o*sS7XUIkCTivM(_Jvw9+po>p%6iEXf zjNh#SyjTT@(O1Sp`zk;LVt>wpe#UFM;zfcN?~1>50W@{Ol}$hbwkJTDu6STC0$3;l zU=STZ&x#*q#h1hn-o=mG1rOYl02fp*(^YTNMdz^#Kw*Fi0X`@nrz>Bl30}O3zqAPe zsR_Qlb;Vl+*wUKfwE~Q2;`46evu&amXyVIlMSvm7=V=TCc27UKotY7C|)iA zh$euDruxAaG~YphDnWoNf$`bKn)D$HkVy4rReat;NDhLO4Z>I*GUJIm`3#NuR+Sh5 zQB;dVmi@%_5XG1{?HlUvjNecDh|hQG1Bs+vGaa7&6WVsB+sj^yw&!H;$zCEn|8#Fw z%CsLczvqyt&t};#S^89#J)C84XW8>v{){YtNtQn*%fFN5Kg#mQiREvWkFY=aC$s#s zS^i(;=S_Dw$0vVyRy-gpzK|8a$cm4MYA-6EZxKC)vmA`iuUapw)(iE48ScT~%fj_iSpPc9+dAc@=ewTg zdMWbv6#0{i{Aor0Ix)u0azXYl|7DTATjU=u@;?{(KZ^Y4Me%~7ctlaWqbQyt+O`>P z)bmZxbG;PB>y#fkv{So3hTosI}@KL{K1gBp6C5p7y1(_^QmXJUe7l@&;0pHePYP8?`nd_y=|I<`&+2nt3s=sZj4{projLOd)GS^E}J$+OC{!rKT zJlBiR$|gG+pQrsk`&oBS2B z|EmBHg_p9E56k?jnP1EHl&{$Z(A$ZRF!OooUr~N&7d_-oey{+AyZl)@{twyDJqkcl z0I6N|23>%;UG*kibX#43kX`i-0(qPH*#Zm|Agt)vn&{h_01lh-Plq`ze~bJ9T>!Jn zcWqty{h}XiqAP4o{*k770AlIy=F{)^Uz888eBdU!%O-rGmJL{|e0=32x2*Yhy+M-y zMgG;6H2=b**{yd_eTC{<#(B^Aj`|*<$p1rsV^e)i6J1?XeN_|S^!R+L7g0USO8=c0 zqlPu4`W6EycL7KXP+R&!5IxF}Ro_EYy&wjSP^$r?1@J6?V6CceQvP{mzUGkS|0=3) zBECwHJ~-=-*q-WJD)ZT9Jrmm#zE`EcBvjScRN*V_)>G+whyb86KV-;Rg}KPZ6vD!R!ce{zxku`r%K>)Tk**HvHFRZk}X+Cp}sSzpKYn(FyfPg1Jt2brHf zWC7Y&0s5<+MfUQnFXZow>i1MXQ?lv>RX=WyP;{2_i%=(N(`o z;;oRI>V;LWoyi$A>w{H4%y^Xii$hj@wD9=Ga$eir{0GbIBjYc%1adD6YOJ-oMxQujoG(5n>d# zr^_;+Y~y znKJzwif1sW$ig;oAJ2~Dg|DWp1T3IY}{K5;}^_TMeX{r}& ziZ3+PCpN_!n(85&>N8vA`O+%mn?sg=mFG{Z@|RZmi&al6Mv3X4<@lM87n|bWjTrqy z*7?)2{kiO4`68{!U#xuk)^xrozeM;DQTa~OKdk3wH;tz9_!gsUJ_dQ|9^7sc#RN=XGB=pPl$9L+1I? zl}|7}XOPVCDF3D_-=@=^7&7(4PJiH#n`E9ZU3g|)`SM-(YRVs+GRLoaimv?pu6*yV z_*ADo7Ui!WGWEkQ{KYOj$*y|Bo~Yk+)hl+zhr94YyXrNCXEJ2!H(mIvL*0|i^G5h5 zL#Dpdg}>W{C)`zEEWDHS^WwBQU2MKh2NOz zk4o~dCBzmF!u!;IUA12o-%hLc>#F^_8ef$AH7lN(7x~kLf0q@HQvUSB8x=ljSAVO1 z&;7bfur2)49sjZX;XD3j`Oibw@40`o2-&mhLo)08r+=L7D_*!m@Ez@+SG*w!A2o5m z<|G2_#QmC+;w>V;%)-;nA}G#D1m9Wyb>Y$N;I}GYAQOLO$RbQvyf=&RIx9Y`eDaA8 ztN4oWX@y5QWWJB%G0N}RB}C_apQ{Ma<$uqrZ_8EjtjhhH#m|(h2kfXc_G{CA zZ6cT#-dh$vs`A^Xtau#bp}A|ncI{WiOR@;1a~Ga(*M9BVuR8?s+P|yfeZq^(Ytiqu ze?^F!RgYH|#cvnx-?4lZPZj>|BK%#}cfN+OUih?&;>AG(^~!fCL4@wgk167BD?tSA z%6BO8zlynT$h4|Cs`o2poF_wueVoKcH_U;R?K6HpWaz&`uI!)w)oD(! zed6bi(;WWDX>Kf#$saV$5B5iYWD@_|kXMd}`N~OrZfT)?I^@LX>7Ppr>r;kY*dOh~ z3HQMFCqD8pkFfqzV7~B>VP8#i3jGJ>6HfDr?XzBF$Xrh$!hT4B^-(D>e=-H~ucSbJ znG~4+kpk--r+LTzn7=+`t}o#cqzRBrnd>Xi|BwRfb*H(=@i0GPnvd*{_!%kCzE6Q< zsYB-aimW$Jk@)39=K6~C{|$2$=kJiYzQlKsgm09@pOS>nm4p|TgkP4#XOzUZm4v^S zgzu4r|B-|bl7w%WBJcMU>5otHA5HU{<6-{QgmYnk^j{B|>npNeX2`hjQ(`{Zkh#7R z@2er>K1hl6(L=`lIb^P{#CY6v7jQh(Z--bp9>%MtJAwT(e?BGZlSAfuO3a_1?gzdv z@kWQt^)>8WzAy9LhMd?w;~7Kd`bvyX47v0D*M;L@KJ<{GuTA=lNSW&^6Q5wZOBTww z&r>Eo(2!xjr%ZmKB>QE^Tu+(t$g!Ij_Rsv1A#*)t*3S)@>nRhDG%25J$Xs8U`PpMR z@qW*&Z%CQ(sv&cIW!Ccynd>VvKOtq-uML^&Yp4^2GW6XfeR8^^5@p`+h4GNFJ-EIK z>usj{iv2NOJ!IbZh4}SDUO686*M`jXHq1G`KkdIE7mkPZT|=h6sCvgC^L{Ul_j{p# zXvkb&h52Da?tEXy?}p6#y%K+C$Xs7_;r(6%^+(kg4w?6RWxZ{x2DZv=Xi+EI%KY|O1{k`K8YkgiE-RU$y{H_-uY9`D4Y;lf(y{n18bC-xoiS?)M?ff6jcM z*Th@of0jKRh4&xD2N1<4F!UeAvlM^ow9jXNpECV9QGCIX>nlY0$75AIj`m#;e{2vR zYY={M5PohDo^23+VGzD+5Z-DK-e(a0=8)-+imG3T@+U;)gGbdT1oLmJUQ76KLHKYX z$UhXi__f6^9)x!jgm)8!cQXq}D4UcFY=Er zw4aB}{l6$4wx~XH5kL7tJoF(m9=yoky)YkQ$o&3`@Qj9e#rY8b)}s2#h4s-xW_)N- zJX-j0L&o~@()iYQUuI>lz}jIt3_}tR(^0%O+=FA)+#U8%a#x6O3ak}6(9odn zPQl6w=#>iMO8s#qoY+b0R0oUFdu*+8#J*=!bTnS0h zS<%p0VAGkO+^LRsdJ8(0*-l-z6WXH_Nvcz^>C|gF)tXLJl3wsZJ99WYDa|`GMmq_# zJN21Pb*59B=~QMqb9XycV2go z%1fi}qSVsH%1#v;H~L8$-6*OeZd76#ec+8MOrr+V2*B9LU)LC3Rkn9y@{DrJRr;$; z_Qt?>BW_Y%v7M#mS0+7G>MWHiOQps#43G>F`fIKDzLnX%Lzf8MfOOOJ!DfC1`qOPGn`uePzXVWdc$qpQzw#Dz(?jBe#-EtuocI(ou<~PSHjYCsL!u-$>xyh)2~(pCPoJ#^e_t z^$g`SW(u~%!<=?gquZ$^j+se-EeDD)K~$mAGGBrIt427tM(e&YSS0M!MpuuJI~s{o z89C>k^J1Q^@s4{c0ew!T3yipE;O#?n|J`7|bqs7|BNts_*Q z#vpWK4u~LM8Z!lFuMXH9WuNTaiYc*icIwMh4OXZCGp za!6-wS7&K(XC|$PUOT-3ong(+D78S#J8P~wLt=fUwa5gozGfQB95cODB9mR2yS37l zxnq^nIl#Eg%CxMNtQsppAy?K6tW42bi72zu;l9#8v<4`FE768mCdaJAvRMh0x3Uso zWg*hay45w|=v(QnFX&jqg3D#9ADN{Dnbt&R;z}lOsM26F6^u-h z)=W--%oMduhWAW@fy{gfrMC)=IkR{wvvx|Uthqt$$vnPt$Hk|#RYmOuN|VW%p%m4j zA>w@rIFgy~pVveY=VYd>k!fpWLI)I9MimB<3-kC3%Qy>BJOvU|czl%*DB@aFg6X(I z3q;&jLN6)-=3!wjSRqtlp{`P5p@>tokS4IOYDu8cg&2y3kU51JN`-_Jh53Yqc1mHk zePOZvFc7~I0D9jKjEKo0fp@{ukSbbi4p@`$FFcqi} z`CUNQWsMYZd=*xe7Zz0()`*vt9x7&_RQf0?^LH!hq$<(E1a4OYT~dSwtwe&VbXipL z=-30vtkKFMpGpg;vShN-p;4)CRN_%q>Kk@f(&VYsH)^6&k@`j@QA17i^-=w(WWlLa zKWe5siW2WGD%Fol_XaxjO60U=!7kM%$__*v^gWM`ICoV@_(Dko#9hKWv|D9`F5def{+6@uNpi zw%42E&GqKt{C|!wPk-E=AD>-F?<{963J*Pj3Flljo~v#ZBrB=f(vkN3X$ z?z7K6|K^+VhwT`{gZ;hH=bIYoGqY|qvbXoc&ri3PPdAsFCpT^F?adLN3~`KSdv)>b z@@V_``P z?CqUheE#9k6d4uq#T~N^=ejLNOxxDEo16o zE$t0sdhtPDwEXn);`rICsoCFo;AX;3pKQ;sPA|^y{^MwSd;b0P507WH(Vc7K)tg^l z{B(tXz8XvA{QC6hU{3fI8{2>PHFxvtN8fG^pPg+k|9J7!opwHC{T^MMUvI|xy}Vt# z?=tN78T8rqZqwJ7&yKF2U2e_}ZWrNkdh+nqK%Q>Sk2hBj|7o=O-{w+&@M=hB+oS7? z%a`BvJwIJ}&1XBhID7Ww{I~ydpWXDc`$~7tq=SP`zx?vx^DjUD>hrI^{pR@|def`k zj($ww7w5;@KmUsFH%5ND3~t8w#rgI2`|ah2eC0n}o<7-J{`~)JfBvOUJ^tZ+UVi>* zd54L(db&M2-Cq4R>-e*A9({jtc{&csxi~+hsZX!3FHaAjU2i|6k#C-T|NVAc7{~v7 zdGYM&IPiYExexC$_csmRPyI)p`uHyUfAjO-a_fJTDgE~J$uC|KA25aK9j5T;;@SE2 zZ?ky6JO9)9#ZTwo4&lSD(aZ0**B{iv<+JThD<5;B^BooNWaxiSo}FEv9z0jK-a!|f z6<O3tkQ-;o`}`)pk2H!sWr+8lJhhHx1uycBj$X_y5Bmzxe8lZ@>80&wo)1FO#jO zyj~0693Kz;cihJZ&(#Vl;bX#IECedydwWN(&bfmpn?E0H4zETF*V}{fIj>L8kFF1X z+?+kzvcZ=h;AnF-T01*^a{B56-#kBbp5+~Tyxd%$Ui@Nrd&|R@mb+U!-d>OOa`5`i zb9b<>JBj}5#l_inbB-rI_>Oq{k@vbM{@dBb=K2@@{_Xb9*Y7*do3{V;?q&J99>yzd z?;MHmap;`>uV>p=*U|?c>K}5TfA~rD5f0rC*J$6jg|Fr@_^KIin(d#D&Ym4_Uw>G< zG#y97$o;Uo4A6}0W<>1Nb`SSz#_RAjpbb9^6!B0P& zUT?3SZtTn%m(Uv;dv^8fR@1NY{;bRT@|ONk@6Tv!X&>(WDKlF;-FtuTy+3dNbobt$ zd+*Qtyy5@BE{J>Y&%O8O-uv^uZ}!``ZQku;{vA1Y*3e^pH&yQ!+vyWF_~ZI=YO6Kn zkL}BOKeO)S*t74knXPR|dHe%M^{ z`A`1;H$_}7o?SnEc71R&&G^Z|!S~Nkn}a{SAp-Vm)cD%gb?qNVjh}5TAFRgDY0vl3 zhkG^tojzTC&1bt;2Mfd9bKa5U!ug>4A^M7M?{v(_q_iFve zozMIZ^Z8mG^j#+QjY)s+HFLlHJ}n<)>(suiYkUt|C%whinemn93i8LWb^7BN9`+j7 z*xqF8oS&Q?Z_jTc7}wj&AIIEo&Yu48a%tR}Cd}wkMVI2K`d8O9fHWA;@ z!14BEGv@x_Z1erW#pUrEZZmdIjitayl%To7a7IeR_8F@ac=A z?Bv<`(bqSB`04WWb9sLAMdz7)a(404nD(1nc&w-6 zv0L`Ge}4MH^)Xs{cJ^WdUi{loJjZ;Gxq3JzYZ7C}JozWm%fEmB_xB>tod(lM-3?O$JY^6icMTP*tW3J39T zvFJzRPq%yjdGB|>JHI%*+WWtzDsb}f_~iEc|9!^hw(RGaO=Y*$BD^uhbrp5*V}t}_Wsd) zCckmU=5J`}{9&%Msd)Syq)vqo(93W1g*BG<(982L>g6}_g1WqqUcSHoVERmlJ@k0* z+t0uG_KUCo`N8}eSKI3w@QUNp>6v)Eci5MHOfUFTOtb z>ho_u{ijdA{q*4Z01oDW9=u6w8ZSMroEu|gd;a6;<;D4v?fFfT()RrOiQ_R=(T(?Z z?{M2oTGq0rc$_zDKHP3{Io{MTKE`H^^>DmNI6gdnKE^)I&XeO3_VW^t_se0(`;+uw zKOe<)6PF&!X0C$E?X8NsJ@|nBUSNB?Fn69ld$AtIR~gH6oKx3#HqZF`!PO6&@vFy^ z6y$})e7ybf^yt+l+kd`3=T12L-5-Cy_u0ke_IKZY_4~ckwm(1NcYB}x;k&*6vKJq= z<)8oWzx+*pHniNE#I|9&T+Q$H(w*}3ho7%r_m?NzCl{ALzj(*%i|bkDc>8Zd7wAh~ z#suF0#r|L}0!lw+yZ;~E~^JQ&A+j}N;a zjt<*?$;Ze0bM@{YA0IDAF_xUy?SmH(F>??vHpVbi+9ve%I4y_0oNU(|m$J?6X#Ag( z$9XKIF{(a6vQG}5FS<=Xnk0+s{!#A7`}riq{mp20zozx5 zoE+A}&1Up}IX?8ov#W!sd6C+VkXIB$c?0B!g&qC zL7g&56Bq2lDU)Q7JOq;yoam=`{X&ZC5~D<6c1jfBWupinMuOZA5sR8kc}c7kgp(g4 zpkYb)Fhl^k#V!#Tv_vo+A^`#c@Hs@F+Y;v7pu&dW#VU-#hnzAFmvIXQ$yhZjc}j*1 zOwbreMK@*imq?90W%OUkS21Ng-*Zlqu}aq!{k~v;d-A|e8hs;+~=6YF!W&FO7E@#S^??&p>DdYWDsUs`(kM*oQ$>={(F`qKWL+aNl<9)|(^$_lVL05^Ab43VlMp+SKzYa_1SWy?kV&vj^2{ld z5V+tJn=%Q;t88In?uTJ0M9KL3Fa|7(06JI6B)ASFEFUtcVaQKB#20^^o|#Pc^vrin z;%VQW9idQKb}X zMS-7r--K*P_#J!U*3T}jjc6DA7 zyi>80CTK@bU3p>;yS>X3NxO4A564U-fz^IA#IDJEj7v#7pNQ*jjc^nYcwn7cX$~#GE3dx;F z8Wu8b(U28~sARl}O)6r>?LSU8(wBF#_GUTqbIEdur-n1CFv3#E;l7eueU;y^kn}57 zuAQk2(~hG?HR+i_9#TjMGVbwIsnU+g*4IN2tMO&03JjV2GBb`NKTM{ds?shFWC&}G z>}su7-zFUHxQ=<^RUQvjcYh{V;rN=oflK84XKK)~l|a8^BFSPWz2U6;<8UMS;cpCI z>e%#j4&7(v?u!9z1(W);#8aT>rDS@(3TaJqpxTfltxD=r&>_Qo8^#?^ zzeL92x!&mcD5R`O^laA>ct0#rpL8kmrH^0mywQ0P6&r{dYb-9LL7GM&scw1a_ej+{ zFK8~4WH(dQ$|Rr5WO~m0Ho1}WuhUQ=S#_ZYxit*3@m8%=Oe=ToP%_|}Al+%>_0@S| zRc@G?WQPRVDIs!y$<0fztAYd#qs}RjZ83{+uC1gaT%!!Xg}iyWkT)=OcvXfBO?WKR zLd}CD&?(DFL#A5>@fllY#FrXdW&Cu=q_CsuRLI3NW}p6(L=)XydfItIGhpnvk|c{Z zNiFuF(?=R)qmIu*?+j0lu`LTV!N8co5PA2k^w~yQ)Z_2;232l@adhLZoiRw-8g=DK zPG+kye){~oVm4g|UCHXd;wnt?+0MW|DF#W3DVE|2rEq0beO%+DoPR#kS4S6@FY;+_ z(nc>|6~pYkkoUg&{L`-wKK-xX{L7c$ef#;r-4yHdi_0gQv(vxarrmz0U!0uSU%*GargCilZzBG^T1W?X*W~j_bVbtqpR1ByE!f@P-jVnB z{sdOGcU|eX|9g6Ib#S&h+@9^Uu{#`BJH2(P(mG6=ms7P{^)&zYn#!lQf7kUD=J)Qm zeZ%*C+h?8C3vU_g>$1?5vwz#q>mI!McCUwaxasdXyhGUe%Nvk6ZeH^jFdlA4@-YxP zZhHDHP&(fJ3LlKtanr?nB6i#kZwIyGc3dCuiPIZC@#|1L-abX||CNR2f!BS&4&GRq z@Jmy1`&PFn(Qo_JU(b^9va?^R!+6iPo_}6`#Vp)t48QFpd^4|^x4rspz5K}}kh$Ym z_<%_WzhV+z-HHDmXX2Hr{7Ypzw`2K*%PlYYC8X5zhu`ILdyQ`NcI~LvmmRsA*LLDE zp1%t3?QNy<8v9b;pZE58BPG>a#o-+*x1axPhI;>+=v;T4&#%_gJvP_hJvP@p#O}Y_ zI@5bhuHRA5HK*XWyTSaNz3bmhIid9QEY>zj9ne$O#>uk_sOoBse)f3I(T@X2wnZ{C^5 zn`DB&zQ(yOOJCLx*EmNzF?~E@t6kN zuerb1JnuEndpfLtR~qQ+lUnZyu|5tV)~{5{!@Gm-zonfV*N>@|-$K}zU+V~-KdWW= zsEG&vXe!S=o5jawv-p*pDweq9fBWRY-rnBc)%E83+129*dwY9(dwW;co9pf4y>Gtz z?6c3m`R2ji-qrQy^7`@KCvl0({u1_M_%_tXxj&BK;Tm)9{og)$u(x-0y}5pN_4vWw z-rnBT_2zo}c<-C>> Skipping '$pipeline_run_file'." - continue - else - mkdir -p "results/$pipeline_run_name" - fi - - pipelines_path="pipelines/$primitive_name" - - if [[ ! -d "$pipelines_path" ]]; then - echo ">>> ERROR: Could not find pipelines for '$pipeline_run_file'." - overall_result="1" - continue - fi - - echo ">>> Running '$pipeline_run_file'." - python3 -m d3m --pipelines-path "$pipelines_path" \ - runtime \ - --datasets /data/datasets --volumes /data/static_files \ - fit-score --input-run "$pipeline_run_file" \ - --output "results/$pipeline_run_name/predictions.csv" \ - --scores "results/$pipeline_run_name/scores.csv" \ - --output-run "results/$pipeline_run_name/pipeline_runs.yaml" - result="$?" - - if [[ "$result" -eq 0 ]]; then - echo ">>> SUCCESS ($pipeline_run_file)" - else - echo ">>> ERROR ($pipeline_run_file)" - overall_result="1" - fi -done < <(find pipeline_runs -name '*.yml.gz' -or -name '*.yaml.gz') - -exit "$overall_result" diff --git a/common-primitives/run_tests.py b/common-primitives/run_tests.py deleted file mode 100755 index 16c264a..0000000 --- a/common-primitives/run_tests.py +++ /dev/null @@ -1,11 +0,0 @@ -#!/usr/bin/env python3 - -import sys -import unittest - -runner = unittest.TextTestRunner(verbosity=1) - -tests = unittest.TestLoader().discover('tests') - -if not runner.run(tests).wasSuccessful(): - sys.exit(1) diff --git a/common-primitives/setup.cfg b/common-primitives/setup.cfg deleted file mode 100644 index e218fc8..0000000 --- a/common-primitives/setup.cfg +++ /dev/null @@ -1,28 +0,0 @@ -[pycodestyle] -max-line-length = 200 - -[metadata] -description-file = README.md - -[mypy] -warn_redundant_casts = True -# TODO: Enable back once false positives are fixed. -# See: https://github.com/python/mypy/issues/4412 -#warn_unused_ignores = True -warn_unused_configs = True -disallow_untyped_defs = True - -# TODO: Remove once this is fixed: https://github.com/python/mypy/issues/4300 -[mypy-d3m.container.list] -ignore_errors = True - -# TODO: Remove once this is fixed: https://github.com/python/mypy/issues/4300 -[mypy-d3m.metadata.hyperparams] -ignore_errors = True - -# TODO: Remove once this is fixed: https://github.com/python/mypy/pull/4384#issuecomment-354033177 -[mypy-d3m.primitive_interfaces.distance] -ignore_errors = True - -[mypy-common_primitives.slacker.*] -ignore_errors = True diff --git a/common-primitives/setup.py b/common-primitives/setup.py deleted file mode 100644 index c8d1e21..0000000 --- a/common-primitives/setup.py +++ /dev/null @@ -1,65 +0,0 @@ -import os -import sys -from setuptools import setup, find_packages - -PACKAGE_NAME = 'common_primitives' -MINIMUM_PYTHON_VERSION = 3, 6 - - -def check_python_version(): - """Exit when the Python version is too low.""" - if sys.version_info < MINIMUM_PYTHON_VERSION: - sys.exit("Python {}.{}+ is required.".format(*MINIMUM_PYTHON_VERSION)) - - -def read_package_variable(key): - """Read the value of a variable from the package without importing.""" - module_path = os.path.join(PACKAGE_NAME, '__init__.py') - with open(module_path) as module: - for line in module: - parts = line.strip().split(' ') - if parts and parts[0] == key: - return parts[-1].strip("'") - raise KeyError("'{0}' not found in '{1}'".format(key, module_path)) - - -def read_readme(): - with open(os.path.join(os.path.dirname(__file__), 'README.md'), encoding='utf8') as file: - return file.read() - - -def read_entry_points(): - with open('entry_points.ini') as entry_points: - return entry_points.read() - - -check_python_version() -version = read_package_variable('__version__') - -setup( - name=PACKAGE_NAME, - version=version, - description='D3M common primitives', - author=read_package_variable('__author__'), - packages=find_packages(exclude=['contrib', 'docs', 'tests*']), - data_files=[('./', ['./entry_points.ini'])], - install_requires=[ - 'd3m', - 'pandas', - 'scikit-learn', - 'numpy', - 'lightgbm>=2.2.2,<=2.3.0', - 'opencv-python-headless<=4.1.1.26,>=4.1', - 'imageio>=2.3.0,<=2.6.0', - 'pillow==6.2.1', - 'xgboost>=0.81,<=0.90', - ], - entry_points=read_entry_points(), - url='https://gitlab.com/datadrivendiscovery/common-primitives', - long_description=read_readme(), - long_description_content_type='text/markdown', - license='Apache-2.0', - classifiers=[ - 'License :: OSI Approved :: Apache Software License', - ], -) diff --git a/common-primitives/sklearn-wrap/.gitignore b/common-primitives/sklearn-wrap/.gitignore deleted file mode 100644 index 36fa0f3..0000000 --- a/common-primitives/sklearn-wrap/.gitignore +++ /dev/null @@ -1,2 +0,0 @@ -.pyc -__pycache__ diff --git a/common-primitives/sklearn-wrap/requirements.txt b/common-primitives/sklearn-wrap/requirements.txt deleted file mode 100644 index fccd5e8..0000000 --- a/common-primitives/sklearn-wrap/requirements.txt +++ /dev/null @@ -1,31 +0,0 @@ -scikit-learn==0.22.0 -pytypes==1.0b5 -frozendict==1.2 -numpy>=1.15.4,<=1.18.1 -jsonschema==2.6.0 -requests>=2.19.1,<=2.22.0 -strict-rfc3339==0.7 -rfc3987==1.3.8 -webcolors>=1.8.1,<=1.10 -dateparser>=0.7.0,<=0.7.2 -python-dateutil==2.8.1 -pandas==0.25 -typing-inspect==0.5.0 -GitPython>=2.1.11,<=3.0.5 -jsonpath-ng==1.4.3 -custom-inherit>=2.2.0,<=2.2.2 -PyYAML>=5.1,<=5.3 -pycurl>=7.43.0.2,<=7.43.0.3 -pyarrow==0.15.1 -gputil>=1.3.0,<=1.4.0 -pyrsistent>=0.14.11,<=0.15.7 -scipy>=1.2.1,<=1.4.1 -openml==0.10.1 -lightgbm>=2.2.2,<=2.3.0 -opencv-python-headless<=4.1.1.26,>=4.1 -imageio>=2.3.0,<=2.6.0 -pillow==6.2.1 -xgboost>=0.81,<=0.90 -Jinja2==2.9.4 -simplejson==3.12.0 -gitdb2==2.0.6 diff --git a/common-primitives/sklearn-wrap/setup.py b/common-primitives/sklearn-wrap/setup.py deleted file mode 100644 index 0090ec8..0000000 --- a/common-primitives/sklearn-wrap/setup.py +++ /dev/null @@ -1,106 +0,0 @@ -import os -from setuptools import setup, find_packages - -PACKAGE_NAME = 'sklearn_wrap' - - -def read_package_variable(key): - """Read the value of a variable from the package without importing.""" - module_path = os.path.join(PACKAGE_NAME, '__init__.py') - with open(module_path) as module: - for line in module: - parts = line.strip().split(' ') - if parts and parts[0] == key: - return parts[-1].strip("'") - assert False, "'{0}' not found in '{1}'".format(key, module_path) - - -setup( - name=PACKAGE_NAME, - version=read_package_variable('__version__'), - description='Primitives created using the Sklearn auto wrapper', - author=read_package_variable('__author__'), - packages=find_packages(exclude=['contrib', 'docs', 'tests*']), - install_requires=[ - 'd3m', - 'Jinja2==2.9.4', - 'simplejson==3.12.0', - 'scikit-learn==0.22.0', - ], - url='https://gitlab.datadrivendiscovery.org/jpl/sklearn-wrapping', - entry_points = { - 'd3m.primitives': [ - 'data_cleaning.string_imputer.SKlearn = sklearn_wrap.SKStringImputer:SKStringImputer', - 'classification.gradient_boosting.SKlearn = sklearn_wrap.SKGradientBoostingClassifier:SKGradientBoostingClassifier', - 'classification.quadratic_discriminant_analysis.SKlearn = sklearn_wrap.SKQuadraticDiscriminantAnalysis:SKQuadraticDiscriminantAnalysis', - 'classification.decision_tree.SKlearn = sklearn_wrap.SKDecisionTreeClassifier:SKDecisionTreeClassifier', - 'classification.sgd.SKlearn = sklearn_wrap.SKSGDClassifier:SKSGDClassifier', - 'classification.nearest_centroid.SKlearn = sklearn_wrap.SKNearestCentroid:SKNearestCentroid', - 'classification.mlp.SKlearn = sklearn_wrap.SKMLPClassifier:SKMLPClassifier', - 'classification.bagging.SKlearn = sklearn_wrap.SKBaggingClassifier:SKBaggingClassifier', - 'classification.linear_svc.SKlearn = sklearn_wrap.SKLinearSVC:SKLinearSVC', - 'classification.linear_discriminant_analysis.SKlearn = sklearn_wrap.SKLinearDiscriminantAnalysis:SKLinearDiscriminantAnalysis', - 'classification.passive_aggressive.SKlearn = sklearn_wrap.SKPassiveAggressiveClassifier:SKPassiveAggressiveClassifier', - 'classification.gaussian_naive_bayes.SKlearn = sklearn_wrap.SKGaussianNB:SKGaussianNB', - 'classification.ada_boost.SKlearn = sklearn_wrap.SKAdaBoostClassifier:SKAdaBoostClassifier', - 'classification.random_forest.SKlearn = sklearn_wrap.SKRandomForestClassifier:SKRandomForestClassifier', - 'classification.svc.SKlearn = sklearn_wrap.SKSVC:SKSVC', - 'classification.multinomial_naive_bayes.SKlearn = sklearn_wrap.SKMultinomialNB:SKMultinomialNB', - 'classification.dummy.SKlearn = sklearn_wrap.SKDummyClassifier:SKDummyClassifier', - 'classification.extra_trees.SKlearn = sklearn_wrap.SKExtraTreesClassifier:SKExtraTreesClassifier', - 'classification.logistic_regression.SKlearn = sklearn_wrap.SKLogisticRegression:SKLogisticRegression', - 'classification.bernoulli_naive_bayes.SKlearn = sklearn_wrap.SKBernoulliNB:SKBernoulliNB', - 'classification.k_neighbors.SKlearn = sklearn_wrap.SKKNeighborsClassifier:SKKNeighborsClassifier', - 'regression.decision_tree.SKlearn = sklearn_wrap.SKDecisionTreeRegressor:SKDecisionTreeRegressor', - 'regression.ada_boost.SKlearn = sklearn_wrap.SKAdaBoostRegressor:SKAdaBoostRegressor', - 'regression.k_neighbors.SKlearn = sklearn_wrap.SKKNeighborsRegressor:SKKNeighborsRegressor', - 'regression.linear.SKlearn = sklearn_wrap.SKLinearRegression:SKLinearRegression', - 'regression.bagging.SKlearn = sklearn_wrap.SKBaggingRegressor:SKBaggingRegressor', - 'regression.lasso_cv.SKlearn = sklearn_wrap.SKLassoCV:SKLassoCV', - 'regression.elastic_net.SKlearn = sklearn_wrap.SKElasticNet:SKElasticNet', - 'regression.ard.SKlearn = sklearn_wrap.SKARDRegression:SKARDRegression', - 'regression.svr.SKlearn = sklearn_wrap.SKSVR:SKSVR', - 'regression.ridge.SKlearn = sklearn_wrap.SKRidge:SKRidge', - 'regression.gaussian_process.SKlearn = sklearn_wrap.SKGaussianProcessRegressor:SKGaussianProcessRegressor', - 'regression.mlp.SKlearn = sklearn_wrap.SKMLPRegressor:SKMLPRegressor', - 'regression.dummy.SKlearn = sklearn_wrap.SKDummyRegressor:SKDummyRegressor', - 'regression.sgd.SKlearn = sklearn_wrap.SKSGDRegressor:SKSGDRegressor', - 'regression.lasso.SKlearn = sklearn_wrap.SKLasso:SKLasso', - 'regression.lars.SKlearn = sklearn_wrap.SKLars:SKLars', - 'regression.extra_trees.SKlearn = sklearn_wrap.SKExtraTreesRegressor:SKExtraTreesRegressor', - 'regression.linear_svr.SKlearn = sklearn_wrap.SKLinearSVR:SKLinearSVR', - 'regression.random_forest.SKlearn = sklearn_wrap.SKRandomForestRegressor:SKRandomForestRegressor', - 'regression.gradient_boosting.SKlearn = sklearn_wrap.SKGradientBoostingRegressor:SKGradientBoostingRegressor', - 'regression.passive_aggressive.SKlearn = sklearn_wrap.SKPassiveAggressiveRegressor:SKPassiveAggressiveRegressor', - 'regression.kernel_ridge.SKlearn = sklearn_wrap.SKKernelRidge:SKKernelRidge', - 'data_preprocessing.max_abs_scaler.SKlearn = sklearn_wrap.SKMaxAbsScaler:SKMaxAbsScaler', - 'data_preprocessing.normalizer.SKlearn = sklearn_wrap.SKNormalizer:SKNormalizer', - 'data_preprocessing.robust_scaler.SKlearn = sklearn_wrap.SKRobustScaler:SKRobustScaler', - 'data_preprocessing.tfidf_vectorizer.SKlearn = sklearn_wrap.SKTfidfVectorizer:SKTfidfVectorizer', - 'data_transformation.one_hot_encoder.SKlearn = sklearn_wrap.SKOneHotEncoder:SKOneHotEncoder', - 'data_preprocessing.truncated_svd.SKlearn = sklearn_wrap.SKTruncatedSVD:SKTruncatedSVD', - 'feature_selection.select_percentile.SKlearn = sklearn_wrap.SKSelectPercentile:SKSelectPercentile', - 'feature_extraction.pca.SKlearn = sklearn_wrap.SKPCA:SKPCA', - 'data_preprocessing.count_vectorizer.SKlearn = sklearn_wrap.SKCountVectorizer:SKCountVectorizer', - 'data_transformation.ordinal_encoder.SKlearn = sklearn_wrap.SKOrdinalEncoder:SKOrdinalEncoder', - 'data_preprocessing.binarizer.SKlearn = sklearn_wrap.SKBinarizer:SKBinarizer', - 'data_cleaning.missing_indicator.SKlearn = sklearn_wrap.SKMissingIndicator:SKMissingIndicator', - 'feature_selection.select_fwe.SKlearn = sklearn_wrap.SKSelectFwe:SKSelectFwe', - 'data_preprocessing.rbf_sampler.SKlearn = sklearn_wrap.SKRBFSampler:SKRBFSampler', - 'data_preprocessing.min_max_scaler.SKlearn = sklearn_wrap.SKMinMaxScaler:SKMinMaxScaler', - 'data_preprocessing.random_trees_embedding.SKlearn = sklearn_wrap.SKRandomTreesEmbedding:SKRandomTreesEmbedding', - 'data_transformation.gaussian_random_projection.SKlearn = sklearn_wrap.SKGaussianRandomProjection:SKGaussianRandomProjection', - 'feature_extraction.kernel_pca.SKlearn = sklearn_wrap.SKKernelPCA:SKKernelPCA', - 'data_preprocessing.polynomial_features.SKlearn = sklearn_wrap.SKPolynomialFeatures:SKPolynomialFeatures', - 'data_preprocessing.feature_agglomeration.SKlearn = sklearn_wrap.SKFeatureAgglomeration:SKFeatureAgglomeration', - 'data_cleaning.imputer.SKlearn = sklearn_wrap.SKImputer:SKImputer', - 'data_preprocessing.standard_scaler.SKlearn = sklearn_wrap.SKStandardScaler:SKStandardScaler', - 'data_transformation.fast_ica.SKlearn = sklearn_wrap.SKFastICA:SKFastICA', - 'data_preprocessing.quantile_transformer.SKlearn = sklearn_wrap.SKQuantileTransformer:SKQuantileTransformer', - 'data_transformation.sparse_random_projection.SKlearn = sklearn_wrap.SKSparseRandomProjection:SKSparseRandomProjection', - 'data_preprocessing.nystroem.SKlearn = sklearn_wrap.SKNystroem:SKNystroem', - 'feature_selection.variance_threshold.SKlearn = sklearn_wrap.SKVarianceThreshold:SKVarianceThreshold', - 'feature_selection.generic_univariate_select.SKlearn = sklearn_wrap.SKGenericUnivariateSelect:SKGenericUnivariateSelect', - ], - }, -) diff --git a/common-primitives/sklearn-wrap/sklearn_wrap/SKARDRegression.py b/common-primitives/sklearn-wrap/sklearn_wrap/SKARDRegression.py deleted file mode 100644 index 6d1b782..0000000 --- a/common-primitives/sklearn-wrap/sklearn_wrap/SKARDRegression.py +++ /dev/null @@ -1,470 +0,0 @@ -from typing import Any, Callable, List, Dict, Union, Optional, Sequence, Tuple -from numpy import ndarray -from collections import OrderedDict -from scipy import sparse -import os -import sklearn -import numpy -import typing - -# Custom import commands if any -from sklearn.linear_model.bayes import ARDRegression - - -from d3m.container.numpy import ndarray as d3m_ndarray -from d3m.container import DataFrame as d3m_dataframe -from d3m.metadata import hyperparams, params, base as metadata_base -from d3m import utils -from d3m.base import utils as base_utils -from d3m.exceptions import PrimitiveNotFittedError -from d3m.primitive_interfaces.base import CallResult, DockerContainer - -from d3m.primitive_interfaces.supervised_learning import SupervisedLearnerPrimitiveBase -from d3m.primitive_interfaces.base import ProbabilisticCompositionalityMixin, ContinueFitMixin -from d3m import exceptions -import pandas - - - -Inputs = d3m_dataframe -Outputs = d3m_dataframe - - -class Params(params.Params): - coef_: Optional[ndarray] - alpha_: Optional[float] - lambda_: Optional[ndarray] - sigma_: Optional[ndarray] - scores_: Optional[Sequence[Any]] - intercept_: Optional[float] - input_column_names: Optional[Any] - target_names_: Optional[Sequence[Any]] - training_indices_: Optional[Sequence[int]] - target_column_indices_: Optional[Sequence[int]] - target_columns_metadata_: Optional[List[OrderedDict]] - - - -class Hyperparams(hyperparams.Hyperparams): - n_iter = hyperparams.Bounded[int]( - default=300, - lower=0, - upper=None, - description='Maximum number of iterations. Default is 300', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] - ) - tol = hyperparams.Bounded[float]( - default=0.001, - lower=0, - upper=None, - description='Stop the algorithm if w has converged. Default is 1.e-3.', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] - ) - alpha_1 = hyperparams.Hyperparameter[float]( - default=1e-06, - description='Hyper-parameter : shape parameter for the Gamma distribution prior over the alpha parameter. Default is 1.e-6.', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] - ) - alpha_2 = hyperparams.Hyperparameter[float]( - default=1e-06, - description='Hyper-parameter : inverse scale parameter (rate parameter) for the Gamma distribution prior over the alpha parameter. Default is 1.e-6.', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] - ) - lambda_1 = hyperparams.Hyperparameter[float]( - default=1e-06, - description='Hyper-parameter : shape parameter for the Gamma distribution prior over the lambda parameter. Default is 1.e-6.', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] - ) - lambda_2 = hyperparams.Hyperparameter[float]( - default=1e-06, - description='Hyper-parameter : inverse scale parameter (rate parameter) for the Gamma distribution prior over the lambda parameter. Default is 1.e-6.', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] - ) - threshold_lambda = hyperparams.Hyperparameter[float]( - default=10000.0, - description='threshold for removing (pruning) weights with high precision from the computation. Default is 1.e+4.', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] - ) - fit_intercept = hyperparams.UniformBool( - default=True, - description='whether to calculate the intercept for this model. If set to false, no intercept will be used in calculations (e.g. data is expected to be already centered). Default is True.', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] - ) - normalize = hyperparams.UniformBool( - default=False, - description='If True, the regressors X will be normalized before regression. This parameter is ignored when `fit_intercept` is set to False. When the regressors are normalized, note that this makes the hyperparameters learnt more robust and almost independent of the number of samples. The same property is not valid for standardized data. However, if you wish to standardize, please use `preprocessing.StandardScaler` before calling `fit` on an estimator with `normalize=False`. copy_X : boolean, optional, default True. If True, X will be copied; else, it may be overwritten.', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] - ) - - use_inputs_columns = hyperparams.Set( - elements=hyperparams.Hyperparameter[int](-1), - default=(), - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="A set of column indices to force primitive to use as training input. If any specified column cannot be parsed, it is skipped.", - ) - use_outputs_columns = hyperparams.Set( - elements=hyperparams.Hyperparameter[int](-1), - default=(), - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="A set of column indices to force primitive to use as training target. If any specified column cannot be parsed, it is skipped.", - ) - exclude_inputs_columns = hyperparams.Set( - elements=hyperparams.Hyperparameter[int](-1), - default=(), - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="A set of column indices to not use as training inputs. Applicable only if \"use_columns\" is not provided.", - ) - exclude_outputs_columns = hyperparams.Set( - elements=hyperparams.Hyperparameter[int](-1), - default=(), - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="A set of column indices to not use as training target. Applicable only if \"use_columns\" is not provided.", - ) - return_result = hyperparams.Enumeration( - values=['append', 'replace', 'new'], - default='new', - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="Should parsed columns be appended, should they replace original columns, or should only parsed columns be returned? This hyperparam is ignored if use_semantic_types is set to false.", - ) - use_semantic_types = hyperparams.UniformBool( - default=False, - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="Controls whether semantic_types metadata will be used for filtering columns in input dataframe. Setting this to false makes the code ignore return_result and will produce only the output dataframe" - ) - add_index_columns = hyperparams.UniformBool( - default=False, - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="Also include primary index columns if input data has them. Applicable only if \"return_result\" is set to \"new\".", - ) - error_on_no_input = hyperparams.UniformBool( - default=True, - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="Throw an exception if no input column is selected/provided. Defaults to true to behave like sklearn. To prevent pipelines from breaking set this to False.", - ) - - return_semantic_type = hyperparams.Enumeration[str]( - values=['https://metadata.datadrivendiscovery.org/types/Attribute', 'https://metadata.datadrivendiscovery.org/types/ConstructedAttribute', 'https://metadata.datadrivendiscovery.org/types/PredictedTarget'], - default='https://metadata.datadrivendiscovery.org/types/PredictedTarget', - description='Decides what semantic type to attach to generated output', - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'] - ) - -class SKARDRegression(SupervisedLearnerPrimitiveBase[Inputs, Outputs, Params, Hyperparams]): - """ - Primitive wrapping for sklearn ARDRegression - `sklearn documentation `_ - - """ - - __author__ = "JPL MARVIN" - metadata = metadata_base.PrimitiveMetadata({ - "algorithm_types": [metadata_base.PrimitiveAlgorithmType.BAYESIAN_LINEAR_REGRESSION, ], - "name": "sklearn.linear_model.bayes.ARDRegression", - "primitive_family": metadata_base.PrimitiveFamily.REGRESSION, - "python_path": "d3m.primitives.regression.ard.SKlearn", - "source": {'name': 'JPL', 'contact': 'mailto:shah@jpl.nasa.gov', 'uris': ['https://gitlab.com/datadrivendiscovery/sklearn-wrap/issues', 'https://scikit-learn.org/stable/modules/generated/sklearn.linear_model.ARDRegression.html']}, - "version": "2019.11.13", - "id": "966dd2c4-d439-3ad6-b49f-17706595606c", - "hyperparams_to_tune": ['n_iter'], - 'installation': [ - {'type': metadata_base.PrimitiveInstallationType.PIP, - 'package_uri': 'git+https://gitlab.com/datadrivendiscovery/sklearn-wrap.git@{git_commit}#egg=sklearn_wrap'.format( - git_commit=utils.current_git_commit(os.path.dirname(__file__)), - ), - }] - }) - - def __init__(self, *, - hyperparams: Hyperparams, - random_seed: int = 0, - docker_containers: Dict[str, DockerContainer] = None, - _copy_X: bool = True, - _verbose: bool = None) -> None: - - super().__init__(hyperparams=hyperparams, random_seed=random_seed, docker_containers=docker_containers) - - # False - self._clf = ARDRegression( - n_iter=self.hyperparams['n_iter'], - tol=self.hyperparams['tol'], - alpha_1=self.hyperparams['alpha_1'], - alpha_2=self.hyperparams['alpha_2'], - lambda_1=self.hyperparams['lambda_1'], - lambda_2=self.hyperparams['lambda_2'], - threshold_lambda=self.hyperparams['threshold_lambda'], - fit_intercept=self.hyperparams['fit_intercept'], - normalize=self.hyperparams['normalize'], - copy_X=_copy_X, - verbose=_verbose - ) - - self._inputs = None - self._outputs = None - self._training_inputs = None - self._training_outputs = None - self._target_names = None - self._training_indices = None - self._target_column_indices = None - self._target_columns_metadata: List[OrderedDict] = None - self._input_column_names = None - self._fitted = False - self._new_training_data = False - - def set_training_data(self, *, inputs: Inputs, outputs: Outputs) -> None: - self._inputs = inputs - self._outputs = outputs - self._fitted = False - self._new_training_data = True - - def fit(self, *, timeout: float = None, iterations: int = None) -> CallResult[None]: - if self._inputs is None or self._outputs is None: - raise ValueError("Missing training data.") - - if not self._new_training_data: - return CallResult(None) - self._new_training_data = False - - self._training_inputs, self._training_indices = self._get_columns_to_fit(self._inputs, self.hyperparams) - self._training_outputs, self._target_names, self._target_column_indices = self._get_targets(self._outputs, self.hyperparams) - self._input_column_names = self._training_inputs.columns - - if len(self._training_indices) > 0 and len(self._target_column_indices) > 0: - self._target_columns_metadata = self._get_target_columns_metadata(self._training_outputs.metadata, self.hyperparams) - sk_training_output = self._training_outputs.values - - shape = sk_training_output.shape - if len(shape) == 2 and shape[1] == 1: - sk_training_output = numpy.ravel(sk_training_output) - - self._clf.fit(self._training_inputs, sk_training_output) - self._fitted = True - else: - if self.hyperparams['error_on_no_input']: - raise RuntimeError("No input columns were selected") - self.logger.warn("No input columns were selected") - - return CallResult(None) - - - - def produce(self, *, inputs: Inputs, timeout: float = None, iterations: int = None) -> CallResult[Outputs]: - sk_inputs, columns_to_use = self._get_columns_to_fit(inputs, self.hyperparams) - output = [] - if len(sk_inputs.columns): - try: - sk_output = self._clf.predict(sk_inputs) - except sklearn.exceptions.NotFittedError as error: - raise PrimitiveNotFittedError("Primitive not fitted.") from error - # For primitives that allow predicting without fitting like GaussianProcessRegressor - if not self._fitted: - raise PrimitiveNotFittedError("Primitive not fitted.") - if sparse.issparse(sk_output): - sk_output = sk_output.toarray() - output = self._wrap_predictions(inputs, sk_output) - output.columns = self._target_names - output = [output] - else: - if self.hyperparams['error_on_no_input']: - raise RuntimeError("No input columns were selected") - self.logger.warn("No input columns were selected") - outputs = base_utils.combine_columns(return_result=self.hyperparams['return_result'], - add_index_columns=self.hyperparams['add_index_columns'], - inputs=inputs, column_indices=self._target_column_indices, - columns_list=output) - - return CallResult(outputs) - - - def get_params(self) -> Params: - if not self._fitted: - return Params( - coef_=None, - alpha_=None, - lambda_=None, - sigma_=None, - scores_=None, - intercept_=None, - input_column_names=self._input_column_names, - training_indices_=self._training_indices, - target_names_=self._target_names, - target_column_indices_=self._target_column_indices, - target_columns_metadata_=self._target_columns_metadata - ) - - return Params( - coef_=getattr(self._clf, 'coef_', None), - alpha_=getattr(self._clf, 'alpha_', None), - lambda_=getattr(self._clf, 'lambda_', None), - sigma_=getattr(self._clf, 'sigma_', None), - scores_=getattr(self._clf, 'scores_', None), - intercept_=getattr(self._clf, 'intercept_', None), - input_column_names=self._input_column_names, - training_indices_=self._training_indices, - target_names_=self._target_names, - target_column_indices_=self._target_column_indices, - target_columns_metadata_=self._target_columns_metadata - ) - - def set_params(self, *, params: Params) -> None: - self._clf.coef_ = params['coef_'] - self._clf.alpha_ = params['alpha_'] - self._clf.lambda_ = params['lambda_'] - self._clf.sigma_ = params['sigma_'] - self._clf.scores_ = params['scores_'] - self._clf.intercept_ = params['intercept_'] - self._input_column_names = params['input_column_names'] - self._training_indices = params['training_indices_'] - self._target_names = params['target_names_'] - self._target_column_indices = params['target_column_indices_'] - self._target_columns_metadata = params['target_columns_metadata_'] - - if params['coef_'] is not None: - self._fitted = True - if params['alpha_'] is not None: - self._fitted = True - if params['lambda_'] is not None: - self._fitted = True - if params['sigma_'] is not None: - self._fitted = True - if params['scores_'] is not None: - self._fitted = True - if params['intercept_'] is not None: - self._fitted = True - - - - - - - - @classmethod - def _get_columns_to_fit(cls, inputs: Inputs, hyperparams: Hyperparams): - if not hyperparams['use_semantic_types']: - return inputs, list(range(len(inputs.columns))) - - inputs_metadata = inputs.metadata - - def can_produce_column(column_index: int) -> bool: - return cls._can_produce_column(inputs_metadata, column_index, hyperparams) - - columns_to_produce, columns_not_to_produce = base_utils.get_columns_to_use(inputs_metadata, - use_columns=hyperparams['use_inputs_columns'], - exclude_columns=hyperparams['exclude_inputs_columns'], - can_use_column=can_produce_column) - return inputs.iloc[:, columns_to_produce], columns_to_produce - # return columns_to_produce - - @classmethod - def _can_produce_column(cls, inputs_metadata: metadata_base.DataMetadata, column_index: int, hyperparams: Hyperparams) -> bool: - column_metadata = inputs_metadata.query((metadata_base.ALL_ELEMENTS, column_index)) - - accepted_structural_types = (int, float, numpy.integer, numpy.float64) - accepted_semantic_types = set() - accepted_semantic_types.add("https://metadata.datadrivendiscovery.org/types/Attribute") - if not issubclass(column_metadata['structural_type'], accepted_structural_types): - return False - - semantic_types = set(column_metadata.get('semantic_types', [])) - - if len(semantic_types) == 0: - cls.logger.warning("No semantic types found in column metadata") - return False - # Making sure all accepted_semantic_types are available in semantic_types - if len(accepted_semantic_types - semantic_types) == 0: - return True - - return False - - @classmethod - def _get_targets(cls, data: d3m_dataframe, hyperparams: Hyperparams): - if not hyperparams['use_semantic_types']: - return data, list(data.columns), list(range(len(data.columns))) - - metadata = data.metadata - - def can_produce_column(column_index: int) -> bool: - accepted_semantic_types = set() - accepted_semantic_types.add("https://metadata.datadrivendiscovery.org/types/TrueTarget") - column_metadata = metadata.query((metadata_base.ALL_ELEMENTS, column_index)) - semantic_types = set(column_metadata.get('semantic_types', [])) - if len(semantic_types) == 0: - cls.logger.warning("No semantic types found in column metadata") - return False - # Making sure all accepted_semantic_types are available in semantic_types - if len(accepted_semantic_types - semantic_types) == 0: - return True - return False - - target_column_indices, target_columns_not_to_produce = base_utils.get_columns_to_use(metadata, - use_columns=hyperparams[ - 'use_outputs_columns'], - exclude_columns= - hyperparams[ - 'exclude_outputs_columns'], - can_use_column=can_produce_column) - targets = [] - if target_column_indices: - targets = data.select_columns(target_column_indices) - target_column_names = [] - for idx in target_column_indices: - target_column_names.append(data.columns[idx]) - return targets, target_column_names, target_column_indices - - @classmethod - def _get_target_columns_metadata(cls, outputs_metadata: metadata_base.DataMetadata, hyperparams) -> List[OrderedDict]: - outputs_length = outputs_metadata.query((metadata_base.ALL_ELEMENTS,))['dimension']['length'] - - target_columns_metadata: List[OrderedDict] = [] - for column_index in range(outputs_length): - column_metadata = OrderedDict(outputs_metadata.query_column(column_index)) - - # Update semantic types and prepare it for predicted targets. - semantic_types = set(column_metadata.get('semantic_types', [])) - semantic_types_to_remove = set(["https://metadata.datadrivendiscovery.org/types/TrueTarget","https://metadata.datadrivendiscovery.org/types/SuggestedTarget",]) - add_semantic_types = set(["https://metadata.datadrivendiscovery.org/types/PredictedTarget",]) - add_semantic_types.add(hyperparams["return_semantic_type"]) - semantic_types = semantic_types - semantic_types_to_remove - semantic_types = semantic_types.union(add_semantic_types) - column_metadata['semantic_types'] = list(semantic_types) - - target_columns_metadata.append(column_metadata) - - return target_columns_metadata - - @classmethod - def _update_predictions_metadata(cls, inputs_metadata: metadata_base.DataMetadata, outputs: Optional[Outputs], - target_columns_metadata: List[OrderedDict]) -> metadata_base.DataMetadata: - outputs_metadata = metadata_base.DataMetadata().generate(value=outputs) - - for column_index, column_metadata in enumerate(target_columns_metadata): - column_metadata.pop("structural_type", None) - outputs_metadata = outputs_metadata.update_column(column_index, column_metadata) - - return outputs_metadata - - def _wrap_predictions(self, inputs: Inputs, predictions: ndarray) -> Outputs: - outputs = d3m_dataframe(predictions, generate_metadata=False) - outputs.metadata = self._update_predictions_metadata(inputs.metadata, outputs, self._target_columns_metadata) - return outputs - - - @classmethod - def _add_target_columns_metadata(cls, outputs_metadata: metadata_base.DataMetadata): - outputs_length = outputs_metadata.query((metadata_base.ALL_ELEMENTS,))['dimension']['length'] - - target_columns_metadata: List[OrderedDict] = [] - for column_index in range(outputs_length): - column_metadata = OrderedDict() - semantic_types = [] - semantic_types.append('https://metadata.datadrivendiscovery.org/types/PredictedTarget') - column_name = outputs_metadata.query((metadata_base.ALL_ELEMENTS, column_index)).get("name") - if column_name is None: - column_name = "output_{}".format(column_index) - column_metadata["semantic_types"] = semantic_types - column_metadata["name"] = str(column_name) - target_columns_metadata.append(column_metadata) - - return target_columns_metadata - - -SKARDRegression.__doc__ = ARDRegression.__doc__ \ No newline at end of file diff --git a/common-primitives/sklearn-wrap/sklearn_wrap/SKAdaBoostClassifier.py b/common-primitives/sklearn-wrap/sklearn_wrap/SKAdaBoostClassifier.py deleted file mode 100644 index e48b2b6..0000000 --- a/common-primitives/sklearn-wrap/sklearn_wrap/SKAdaBoostClassifier.py +++ /dev/null @@ -1,498 +0,0 @@ -from typing import Any, Callable, List, Dict, Union, Optional, Sequence, Tuple -from numpy import ndarray -from collections import OrderedDict -from scipy import sparse -import os -import sklearn -import numpy -import typing - -# Custom import commands if any -from sklearn.ensemble.weight_boosting import AdaBoostClassifier - - -from d3m.container.numpy import ndarray as d3m_ndarray -from d3m.container import DataFrame as d3m_dataframe -from d3m.metadata import hyperparams, params, base as metadata_base -from d3m import utils -from d3m.base import utils as base_utils -from d3m.exceptions import PrimitiveNotFittedError -from d3m.primitive_interfaces.base import CallResult, DockerContainer - -from d3m.primitive_interfaces.supervised_learning import SupervisedLearnerPrimitiveBase -from d3m.primitive_interfaces.base import ProbabilisticCompositionalityMixin, ContinueFitMixin -from d3m import exceptions -import pandas - - - -Inputs = d3m_dataframe -Outputs = d3m_dataframe - - -class Params(params.Params): - estimators_: Optional[Sequence[sklearn.base.BaseEstimator]] - classes_: Optional[ndarray] - n_classes_: Optional[int] - estimator_weights_: Optional[ndarray] - estimator_errors_: Optional[ndarray] - base_estimator_: Optional[object] - estimator_params: Optional[tuple] - input_column_names: Optional[Any] - target_names_: Optional[Sequence[Any]] - training_indices_: Optional[Sequence[int]] - target_column_indices_: Optional[Sequence[int]] - target_columns_metadata_: Optional[List[OrderedDict]] - - - -class Hyperparams(hyperparams.Hyperparams): - base_estimator = hyperparams.Constant( - default=None, - description='The base estimator from which the boosted ensemble is built. Support for sample weighting is required, as well as proper `classes_` and `n_classes_` attributes.', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] - ) - n_estimators = hyperparams.Bounded[int]( - lower=1, - upper=None, - default=50, - description='The maximum number of estimators at which boosting is terminated. In case of perfect fit, the learning procedure is stopped early.', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] - ) - learning_rate = hyperparams.Uniform( - lower=0.01, - upper=2, - default=0.1, - description='Learning rate shrinks the contribution of each classifier by ``learning_rate``. There is a trade-off between ``learning_rate`` and ``n_estimators``.', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] - ) - algorithm = hyperparams.Enumeration[str]( - values=['SAMME.R', 'SAMME'], - default='SAMME.R', - description='If \'SAMME.R\' then use the SAMME.R real boosting algorithm. ``base_estimator`` must support calculation of class probabilities. If \'SAMME\' then use the SAMME discrete boosting algorithm. The SAMME.R algorithm typically converges faster than SAMME, achieving a lower test error with fewer boosting iterations.', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] - ) - - use_inputs_columns = hyperparams.Set( - elements=hyperparams.Hyperparameter[int](-1), - default=(), - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="A set of column indices to force primitive to use as training input. If any specified column cannot be parsed, it is skipped.", - ) - use_outputs_columns = hyperparams.Set( - elements=hyperparams.Hyperparameter[int](-1), - default=(), - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="A set of column indices to force primitive to use as training target. If any specified column cannot be parsed, it is skipped.", - ) - exclude_inputs_columns = hyperparams.Set( - elements=hyperparams.Hyperparameter[int](-1), - default=(), - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="A set of column indices to not use as training inputs. Applicable only if \"use_columns\" is not provided.", - ) - exclude_outputs_columns = hyperparams.Set( - elements=hyperparams.Hyperparameter[int](-1), - default=(), - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="A set of column indices to not use as training target. Applicable only if \"use_columns\" is not provided.", - ) - return_result = hyperparams.Enumeration( - values=['append', 'replace', 'new'], - default='new', - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="Should parsed columns be appended, should they replace original columns, or should only parsed columns be returned? This hyperparam is ignored if use_semantic_types is set to false.", - ) - use_semantic_types = hyperparams.UniformBool( - default=False, - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="Controls whether semantic_types metadata will be used for filtering columns in input dataframe. Setting this to false makes the code ignore return_result and will produce only the output dataframe" - ) - add_index_columns = hyperparams.UniformBool( - default=False, - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="Also include primary index columns if input data has them. Applicable only if \"return_result\" is set to \"new\".", - ) - error_on_no_input = hyperparams.UniformBool( - default=True, - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="Throw an exception if no input column is selected/provided. Defaults to true to behave like sklearn. To prevent pipelines from breaking set this to False.", - ) - - return_semantic_type = hyperparams.Enumeration[str]( - values=['https://metadata.datadrivendiscovery.org/types/Attribute', 'https://metadata.datadrivendiscovery.org/types/ConstructedAttribute', 'https://metadata.datadrivendiscovery.org/types/PredictedTarget'], - default='https://metadata.datadrivendiscovery.org/types/PredictedTarget', - description='Decides what semantic type to attach to generated output', - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'] - ) - -class SKAdaBoostClassifier(SupervisedLearnerPrimitiveBase[Inputs, Outputs, Params, Hyperparams], - ProbabilisticCompositionalityMixin[Inputs, Outputs, Params, Hyperparams]): - """ - Primitive wrapping for sklearn AdaBoostClassifier - `sklearn documentation `_ - - """ - - __author__ = "JPL MARVIN" - metadata = metadata_base.PrimitiveMetadata({ - "algorithm_types": [metadata_base.PrimitiveAlgorithmType.ADABOOST, ], - "name": "sklearn.ensemble.weight_boosting.AdaBoostClassifier", - "primitive_family": metadata_base.PrimitiveFamily.CLASSIFICATION, - "python_path": "d3m.primitives.classification.ada_boost.SKlearn", - "source": {'name': 'JPL', 'contact': 'mailto:shah@jpl.nasa.gov', 'uris': ['https://gitlab.com/datadrivendiscovery/sklearn-wrap/issues', 'https://scikit-learn.org/stable/modules/generated/sklearn.ensemble.AdaBoostClassifier.html']}, - "version": "2019.11.13", - "id": "4210a6a6-14ab-4490-a7dc-460763e70e55", - "hyperparams_to_tune": ['learning_rate', 'n_estimators'], - 'installation': [ - {'type': metadata_base.PrimitiveInstallationType.PIP, - 'package_uri': 'git+https://gitlab.com/datadrivendiscovery/sklearn-wrap.git@{git_commit}#egg=sklearn_wrap'.format( - git_commit=utils.current_git_commit(os.path.dirname(__file__)), - ), - }] - }) - - def __init__(self, *, - hyperparams: Hyperparams, - random_seed: int = 0, - docker_containers: Dict[str, DockerContainer] = None) -> None: - - super().__init__(hyperparams=hyperparams, random_seed=random_seed, docker_containers=docker_containers) - - # False - self._clf = AdaBoostClassifier( - base_estimator=self.hyperparams['base_estimator'], - n_estimators=self.hyperparams['n_estimators'], - learning_rate=self.hyperparams['learning_rate'], - algorithm=self.hyperparams['algorithm'], - random_state=self.random_seed, - ) - - self._inputs = None - self._outputs = None - self._training_inputs = None - self._training_outputs = None - self._target_names = None - self._training_indices = None - self._target_column_indices = None - self._target_columns_metadata: List[OrderedDict] = None - self._input_column_names = None - self._fitted = False - self._new_training_data = False - - def set_training_data(self, *, inputs: Inputs, outputs: Outputs) -> None: - self._inputs = inputs - self._outputs = outputs - self._fitted = False - self._new_training_data = True - - def fit(self, *, timeout: float = None, iterations: int = None) -> CallResult[None]: - if self._inputs is None or self._outputs is None: - raise ValueError("Missing training data.") - - if not self._new_training_data: - return CallResult(None) - self._new_training_data = False - - self._training_inputs, self._training_indices = self._get_columns_to_fit(self._inputs, self.hyperparams) - self._training_outputs, self._target_names, self._target_column_indices = self._get_targets(self._outputs, self.hyperparams) - self._input_column_names = self._training_inputs.columns - - if len(self._training_indices) > 0 and len(self._target_column_indices) > 0: - self._target_columns_metadata = self._get_target_columns_metadata(self._training_outputs.metadata, self.hyperparams) - sk_training_output = self._training_outputs.values - - shape = sk_training_output.shape - if len(shape) == 2 and shape[1] == 1: - sk_training_output = numpy.ravel(sk_training_output) - - self._clf.fit(self._training_inputs, sk_training_output) - self._fitted = True - else: - if self.hyperparams['error_on_no_input']: - raise RuntimeError("No input columns were selected") - self.logger.warn("No input columns were selected") - - return CallResult(None) - - - - def produce(self, *, inputs: Inputs, timeout: float = None, iterations: int = None) -> CallResult[Outputs]: - sk_inputs, columns_to_use = self._get_columns_to_fit(inputs, self.hyperparams) - output = [] - if len(sk_inputs.columns): - try: - sk_output = self._clf.predict(sk_inputs) - except sklearn.exceptions.NotFittedError as error: - raise PrimitiveNotFittedError("Primitive not fitted.") from error - # For primitives that allow predicting without fitting like GaussianProcessRegressor - if not self._fitted: - raise PrimitiveNotFittedError("Primitive not fitted.") - if sparse.issparse(sk_output): - sk_output = sk_output.toarray() - output = self._wrap_predictions(inputs, sk_output) - output.columns = self._target_names - output = [output] - else: - if self.hyperparams['error_on_no_input']: - raise RuntimeError("No input columns were selected") - self.logger.warn("No input columns were selected") - outputs = base_utils.combine_columns(return_result=self.hyperparams['return_result'], - add_index_columns=self.hyperparams['add_index_columns'], - inputs=inputs, column_indices=self._target_column_indices, - columns_list=output) - - return CallResult(outputs) - - - def get_params(self) -> Params: - if not self._fitted: - return Params( - estimators_=None, - classes_=None, - n_classes_=None, - estimator_weights_=None, - estimator_errors_=None, - base_estimator_=None, - estimator_params=None, - input_column_names=self._input_column_names, - training_indices_=self._training_indices, - target_names_=self._target_names, - target_column_indices_=self._target_column_indices, - target_columns_metadata_=self._target_columns_metadata - ) - - return Params( - estimators_=getattr(self._clf, 'estimators_', None), - classes_=getattr(self._clf, 'classes_', None), - n_classes_=getattr(self._clf, 'n_classes_', None), - estimator_weights_=getattr(self._clf, 'estimator_weights_', None), - estimator_errors_=getattr(self._clf, 'estimator_errors_', None), - base_estimator_=getattr(self._clf, 'base_estimator_', None), - estimator_params=getattr(self._clf, 'estimator_params', None), - input_column_names=self._input_column_names, - training_indices_=self._training_indices, - target_names_=self._target_names, - target_column_indices_=self._target_column_indices, - target_columns_metadata_=self._target_columns_metadata - ) - - def set_params(self, *, params: Params) -> None: - self._clf.estimators_ = params['estimators_'] - self._clf.classes_ = params['classes_'] - self._clf.n_classes_ = params['n_classes_'] - self._clf.estimator_weights_ = params['estimator_weights_'] - self._clf.estimator_errors_ = params['estimator_errors_'] - self._clf.base_estimator_ = params['base_estimator_'] - self._clf.estimator_params = params['estimator_params'] - self._input_column_names = params['input_column_names'] - self._training_indices = params['training_indices_'] - self._target_names = params['target_names_'] - self._target_column_indices = params['target_column_indices_'] - self._target_columns_metadata = params['target_columns_metadata_'] - - if params['estimators_'] is not None: - self._fitted = True - if params['classes_'] is not None: - self._fitted = True - if params['n_classes_'] is not None: - self._fitted = True - if params['estimator_weights_'] is not None: - self._fitted = True - if params['estimator_errors_'] is not None: - self._fitted = True - if params['base_estimator_'] is not None: - self._fitted = True - if params['estimator_params'] is not None: - self._fitted = True - - - def log_likelihoods(self, *, - outputs: Outputs, - inputs: Inputs, - timeout: float = None, - iterations: int = None) -> CallResult[Sequence[float]]: - inputs = inputs.iloc[:, self._training_indices] # Get ndarray - outputs = outputs.iloc[:, self._target_column_indices] - - if len(inputs.columns) and len(outputs.columns): - - if outputs.shape[1] != self._clf.n_outputs_: - raise exceptions.InvalidArgumentValueError("\"outputs\" argument does not have the correct number of target columns.") - - log_proba = self._clf.predict_log_proba(inputs) - - # Making it always a list, even when only one target. - if self._clf.n_outputs_ == 1: - log_proba = [log_proba] - classes = [self._clf.classes_] - else: - classes = self._clf.classes_ - - samples_length = inputs.shape[0] - - log_likelihoods = [] - for k in range(self._clf.n_outputs_): - # We have to map each class to its internal (numerical) index used in the learner. - # This allows "outputs" to contain string classes. - outputs_column = outputs.iloc[:, k] - classes_map = pandas.Series(numpy.arange(len(classes[k])), index=classes[k]) - mapped_outputs_column = outputs_column.map(classes_map) - - # For each target column (column in "outputs"), for each sample (row) we pick the log - # likelihood for a given class. - log_likelihoods.append(log_proba[k][numpy.arange(samples_length), mapped_outputs_column]) - - results = d3m_dataframe(dict(enumerate(log_likelihoods)), generate_metadata=True) - results.columns = outputs.columns - - for k in range(self._clf.n_outputs_): - column_metadata = outputs.metadata.query_column(k) - if 'name' in column_metadata: - results.metadata = results.metadata.update_column(k, {'name': column_metadata['name']}) - - else: - results = d3m_dataframe(generate_metadata=True) - - return CallResult(results) - - - - def produce_feature_importances(self, *, timeout: float = None, iterations: int = None) -> CallResult[d3m_dataframe]: - output = d3m_dataframe(self._clf.feature_importances_.reshape((1, len(self._input_column_names)))) - output.columns = self._input_column_names - for i in range(len(self._input_column_names)): - output.metadata = output.metadata.update_column(i, {"name": self._input_column_names[i]}) - return CallResult(output) - - @classmethod - def _get_columns_to_fit(cls, inputs: Inputs, hyperparams: Hyperparams): - if not hyperparams['use_semantic_types']: - return inputs, list(range(len(inputs.columns))) - - inputs_metadata = inputs.metadata - - def can_produce_column(column_index: int) -> bool: - return cls._can_produce_column(inputs_metadata, column_index, hyperparams) - - columns_to_produce, columns_not_to_produce = base_utils.get_columns_to_use(inputs_metadata, - use_columns=hyperparams['use_inputs_columns'], - exclude_columns=hyperparams['exclude_inputs_columns'], - can_use_column=can_produce_column) - return inputs.iloc[:, columns_to_produce], columns_to_produce - # return columns_to_produce - - @classmethod - def _can_produce_column(cls, inputs_metadata: metadata_base.DataMetadata, column_index: int, hyperparams: Hyperparams) -> bool: - column_metadata = inputs_metadata.query((metadata_base.ALL_ELEMENTS, column_index)) - - accepted_structural_types = (int, float, numpy.integer, numpy.float64) - accepted_semantic_types = set() - accepted_semantic_types.add("https://metadata.datadrivendiscovery.org/types/Attribute") - if not issubclass(column_metadata['structural_type'], accepted_structural_types): - return False - - semantic_types = set(column_metadata.get('semantic_types', [])) - - if len(semantic_types) == 0: - cls.logger.warning("No semantic types found in column metadata") - return False - # Making sure all accepted_semantic_types are available in semantic_types - if len(accepted_semantic_types - semantic_types) == 0: - return True - - return False - - @classmethod - def _get_targets(cls, data: d3m_dataframe, hyperparams: Hyperparams): - if not hyperparams['use_semantic_types']: - return data, list(data.columns), list(range(len(data.columns))) - - metadata = data.metadata - - def can_produce_column(column_index: int) -> bool: - accepted_semantic_types = set() - accepted_semantic_types.add("https://metadata.datadrivendiscovery.org/types/TrueTarget") - column_metadata = metadata.query((metadata_base.ALL_ELEMENTS, column_index)) - semantic_types = set(column_metadata.get('semantic_types', [])) - if len(semantic_types) == 0: - cls.logger.warning("No semantic types found in column metadata") - return False - # Making sure all accepted_semantic_types are available in semantic_types - if len(accepted_semantic_types - semantic_types) == 0: - return True - return False - - target_column_indices, target_columns_not_to_produce = base_utils.get_columns_to_use(metadata, - use_columns=hyperparams[ - 'use_outputs_columns'], - exclude_columns= - hyperparams[ - 'exclude_outputs_columns'], - can_use_column=can_produce_column) - targets = [] - if target_column_indices: - targets = data.select_columns(target_column_indices) - target_column_names = [] - for idx in target_column_indices: - target_column_names.append(data.columns[idx]) - return targets, target_column_names, target_column_indices - - @classmethod - def _get_target_columns_metadata(cls, outputs_metadata: metadata_base.DataMetadata, hyperparams) -> List[OrderedDict]: - outputs_length = outputs_metadata.query((metadata_base.ALL_ELEMENTS,))['dimension']['length'] - - target_columns_metadata: List[OrderedDict] = [] - for column_index in range(outputs_length): - column_metadata = OrderedDict(outputs_metadata.query_column(column_index)) - - # Update semantic types and prepare it for predicted targets. - semantic_types = set(column_metadata.get('semantic_types', [])) - semantic_types_to_remove = set(["https://metadata.datadrivendiscovery.org/types/TrueTarget","https://metadata.datadrivendiscovery.org/types/SuggestedTarget",]) - add_semantic_types = set(["https://metadata.datadrivendiscovery.org/types/PredictedTarget",]) - add_semantic_types.add(hyperparams["return_semantic_type"]) - semantic_types = semantic_types - semantic_types_to_remove - semantic_types = semantic_types.union(add_semantic_types) - column_metadata['semantic_types'] = list(semantic_types) - - target_columns_metadata.append(column_metadata) - - return target_columns_metadata - - @classmethod - def _update_predictions_metadata(cls, inputs_metadata: metadata_base.DataMetadata, outputs: Optional[Outputs], - target_columns_metadata: List[OrderedDict]) -> metadata_base.DataMetadata: - outputs_metadata = metadata_base.DataMetadata().generate(value=outputs) - - for column_index, column_metadata in enumerate(target_columns_metadata): - column_metadata.pop("structural_type", None) - outputs_metadata = outputs_metadata.update_column(column_index, column_metadata) - - return outputs_metadata - - def _wrap_predictions(self, inputs: Inputs, predictions: ndarray) -> Outputs: - outputs = d3m_dataframe(predictions, generate_metadata=False) - outputs.metadata = self._update_predictions_metadata(inputs.metadata, outputs, self._target_columns_metadata) - return outputs - - - @classmethod - def _add_target_columns_metadata(cls, outputs_metadata: metadata_base.DataMetadata): - outputs_length = outputs_metadata.query((metadata_base.ALL_ELEMENTS,))['dimension']['length'] - - target_columns_metadata: List[OrderedDict] = [] - for column_index in range(outputs_length): - column_metadata = OrderedDict() - semantic_types = [] - semantic_types.append('https://metadata.datadrivendiscovery.org/types/PredictedTarget') - column_name = outputs_metadata.query((metadata_base.ALL_ELEMENTS, column_index)).get("name") - if column_name is None: - column_name = "output_{}".format(column_index) - column_metadata["semantic_types"] = semantic_types - column_metadata["name"] = str(column_name) - target_columns_metadata.append(column_metadata) - - return target_columns_metadata - - -SKAdaBoostClassifier.__doc__ = AdaBoostClassifier.__doc__ \ No newline at end of file diff --git a/common-primitives/sklearn-wrap/sklearn_wrap/SKAdaBoostRegressor.py b/common-primitives/sklearn-wrap/sklearn_wrap/SKAdaBoostRegressor.py deleted file mode 100644 index bf06e54..0000000 --- a/common-primitives/sklearn-wrap/sklearn_wrap/SKAdaBoostRegressor.py +++ /dev/null @@ -1,437 +0,0 @@ -from typing import Any, Callable, List, Dict, Union, Optional, Sequence, Tuple -from numpy import ndarray -from collections import OrderedDict -from scipy import sparse -import os -import sklearn -import numpy -import typing - -# Custom import commands if any -from sklearn.ensemble.weight_boosting import AdaBoostRegressor - - -from d3m.container.numpy import ndarray as d3m_ndarray -from d3m.container import DataFrame as d3m_dataframe -from d3m.metadata import hyperparams, params, base as metadata_base -from d3m import utils -from d3m.base import utils as base_utils -from d3m.exceptions import PrimitiveNotFittedError -from d3m.primitive_interfaces.base import CallResult, DockerContainer - -from d3m.primitive_interfaces.supervised_learning import SupervisedLearnerPrimitiveBase -from d3m.primitive_interfaces.base import ProbabilisticCompositionalityMixin, ContinueFitMixin -from d3m import exceptions -import pandas - - - -Inputs = d3m_dataframe -Outputs = d3m_dataframe - - -class Params(params.Params): - estimators_: Optional[List[sklearn.tree.DecisionTreeRegressor]] - estimator_weights_: Optional[ndarray] - estimator_errors_: Optional[ndarray] - estimator_params: Optional[tuple] - base_estimator_: Optional[object] - input_column_names: Optional[Any] - target_names_: Optional[Sequence[Any]] - training_indices_: Optional[Sequence[int]] - target_column_indices_: Optional[Sequence[int]] - target_columns_metadata_: Optional[List[OrderedDict]] - - - -class Hyperparams(hyperparams.Hyperparams): - base_estimator = hyperparams.Constant( - default=None, - description='The base estimator from which the boosted ensemble is built. Support for sample weighting is required.', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] - ) - n_estimators = hyperparams.Bounded[int]( - lower=1, - upper=None, - default=50, - description='The maximum number of estimators at which boosting is terminated. In case of perfect fit, the learning procedure is stopped early.', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] - ) - learning_rate = hyperparams.Uniform( - lower=0.01, - upper=2, - default=0.1, - description='Learning rate shrinks the contribution of each regressor by ``learning_rate``. There is a trade-off between ``learning_rate`` and ``n_estimators``.', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] - ) - loss = hyperparams.Enumeration[str]( - values=['linear', 'square', 'exponential'], - default='linear', - description='The loss function to use when updating the weights after each boosting iteration.', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] - ) - - use_inputs_columns = hyperparams.Set( - elements=hyperparams.Hyperparameter[int](-1), - default=(), - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="A set of column indices to force primitive to use as training input. If any specified column cannot be parsed, it is skipped.", - ) - use_outputs_columns = hyperparams.Set( - elements=hyperparams.Hyperparameter[int](-1), - default=(), - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="A set of column indices to force primitive to use as training target. If any specified column cannot be parsed, it is skipped.", - ) - exclude_inputs_columns = hyperparams.Set( - elements=hyperparams.Hyperparameter[int](-1), - default=(), - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="A set of column indices to not use as training inputs. Applicable only if \"use_columns\" is not provided.", - ) - exclude_outputs_columns = hyperparams.Set( - elements=hyperparams.Hyperparameter[int](-1), - default=(), - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="A set of column indices to not use as training target. Applicable only if \"use_columns\" is not provided.", - ) - return_result = hyperparams.Enumeration( - values=['append', 'replace', 'new'], - default='new', - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="Should parsed columns be appended, should they replace original columns, or should only parsed columns be returned? This hyperparam is ignored if use_semantic_types is set to false.", - ) - use_semantic_types = hyperparams.UniformBool( - default=False, - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="Controls whether semantic_types metadata will be used for filtering columns in input dataframe. Setting this to false makes the code ignore return_result and will produce only the output dataframe" - ) - add_index_columns = hyperparams.UniformBool( - default=False, - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="Also include primary index columns if input data has them. Applicable only if \"return_result\" is set to \"new\".", - ) - error_on_no_input = hyperparams.UniformBool( - default=True, - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="Throw an exception if no input column is selected/provided. Defaults to true to behave like sklearn. To prevent pipelines from breaking set this to False.", - ) - - return_semantic_type = hyperparams.Enumeration[str]( - values=['https://metadata.datadrivendiscovery.org/types/Attribute', 'https://metadata.datadrivendiscovery.org/types/ConstructedAttribute', 'https://metadata.datadrivendiscovery.org/types/PredictedTarget'], - default='https://metadata.datadrivendiscovery.org/types/PredictedTarget', - description='Decides what semantic type to attach to generated output', - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'] - ) - -class SKAdaBoostRegressor(SupervisedLearnerPrimitiveBase[Inputs, Outputs, Params, Hyperparams]): - """ - Primitive wrapping for sklearn AdaBoostRegressor - `sklearn documentation `_ - - """ - - __author__ = "JPL MARVIN" - metadata = metadata_base.PrimitiveMetadata({ - "algorithm_types": [metadata_base.PrimitiveAlgorithmType.ADABOOST, ], - "name": "sklearn.ensemble.weight_boosting.AdaBoostRegressor", - "primitive_family": metadata_base.PrimitiveFamily.REGRESSION, - "python_path": "d3m.primitives.regression.ada_boost.SKlearn", - "source": {'name': 'JPL', 'contact': 'mailto:shah@jpl.nasa.gov', 'uris': ['https://gitlab.com/datadrivendiscovery/sklearn-wrap/issues', 'https://scikit-learn.org/stable/modules/generated/sklearn.ensemble.AdaBoostRegressor.html']}, - "version": "2019.11.13", - "id": "6cab1537-02e1-4dc4-9ebb-53fa2cbabedd", - "hyperparams_to_tune": ['learning_rate', 'n_estimators'], - 'installation': [ - {'type': metadata_base.PrimitiveInstallationType.PIP, - 'package_uri': 'git+https://gitlab.com/datadrivendiscovery/sklearn-wrap.git@{git_commit}#egg=sklearn_wrap'.format( - git_commit=utils.current_git_commit(os.path.dirname(__file__)), - ), - }] - }) - - def __init__(self, *, - hyperparams: Hyperparams, - random_seed: int = 0, - docker_containers: Dict[str, DockerContainer] = None) -> None: - - super().__init__(hyperparams=hyperparams, random_seed=random_seed, docker_containers=docker_containers) - - # False - self._clf = AdaBoostRegressor( - base_estimator=self.hyperparams['base_estimator'], - n_estimators=self.hyperparams['n_estimators'], - learning_rate=self.hyperparams['learning_rate'], - loss=self.hyperparams['loss'], - random_state=self.random_seed, - ) - - self._inputs = None - self._outputs = None - self._training_inputs = None - self._training_outputs = None - self._target_names = None - self._training_indices = None - self._target_column_indices = None - self._target_columns_metadata: List[OrderedDict] = None - self._input_column_names = None - self._fitted = False - self._new_training_data = False - - def set_training_data(self, *, inputs: Inputs, outputs: Outputs) -> None: - self._inputs = inputs - self._outputs = outputs - self._fitted = False - self._new_training_data = True - - def fit(self, *, timeout: float = None, iterations: int = None) -> CallResult[None]: - if self._inputs is None or self._outputs is None: - raise ValueError("Missing training data.") - - if not self._new_training_data: - return CallResult(None) - self._new_training_data = False - - self._training_inputs, self._training_indices = self._get_columns_to_fit(self._inputs, self.hyperparams) - self._training_outputs, self._target_names, self._target_column_indices = self._get_targets(self._outputs, self.hyperparams) - self._input_column_names = self._training_inputs.columns - - if len(self._training_indices) > 0 and len(self._target_column_indices) > 0: - self._target_columns_metadata = self._get_target_columns_metadata(self._training_outputs.metadata, self.hyperparams) - sk_training_output = self._training_outputs.values - - shape = sk_training_output.shape - if len(shape) == 2 and shape[1] == 1: - sk_training_output = numpy.ravel(sk_training_output) - - self._clf.fit(self._training_inputs, sk_training_output) - self._fitted = True - else: - if self.hyperparams['error_on_no_input']: - raise RuntimeError("No input columns were selected") - self.logger.warn("No input columns were selected") - - return CallResult(None) - - - - def produce(self, *, inputs: Inputs, timeout: float = None, iterations: int = None) -> CallResult[Outputs]: - sk_inputs, columns_to_use = self._get_columns_to_fit(inputs, self.hyperparams) - output = [] - if len(sk_inputs.columns): - try: - sk_output = self._clf.predict(sk_inputs) - except sklearn.exceptions.NotFittedError as error: - raise PrimitiveNotFittedError("Primitive not fitted.") from error - # For primitives that allow predicting without fitting like GaussianProcessRegressor - if not self._fitted: - raise PrimitiveNotFittedError("Primitive not fitted.") - if sparse.issparse(sk_output): - sk_output = sk_output.toarray() - output = self._wrap_predictions(inputs, sk_output) - output.columns = self._target_names - output = [output] - else: - if self.hyperparams['error_on_no_input']: - raise RuntimeError("No input columns were selected") - self.logger.warn("No input columns were selected") - outputs = base_utils.combine_columns(return_result=self.hyperparams['return_result'], - add_index_columns=self.hyperparams['add_index_columns'], - inputs=inputs, column_indices=self._target_column_indices, - columns_list=output) - - return CallResult(outputs) - - - def get_params(self) -> Params: - if not self._fitted: - return Params( - estimators_=None, - estimator_weights_=None, - estimator_errors_=None, - estimator_params=None, - base_estimator_=None, - input_column_names=self._input_column_names, - training_indices_=self._training_indices, - target_names_=self._target_names, - target_column_indices_=self._target_column_indices, - target_columns_metadata_=self._target_columns_metadata - ) - - return Params( - estimators_=getattr(self._clf, 'estimators_', None), - estimator_weights_=getattr(self._clf, 'estimator_weights_', None), - estimator_errors_=getattr(self._clf, 'estimator_errors_', None), - estimator_params=getattr(self._clf, 'estimator_params', None), - base_estimator_=getattr(self._clf, 'base_estimator_', None), - input_column_names=self._input_column_names, - training_indices_=self._training_indices, - target_names_=self._target_names, - target_column_indices_=self._target_column_indices, - target_columns_metadata_=self._target_columns_metadata - ) - - def set_params(self, *, params: Params) -> None: - self._clf.estimators_ = params['estimators_'] - self._clf.estimator_weights_ = params['estimator_weights_'] - self._clf.estimator_errors_ = params['estimator_errors_'] - self._clf.estimator_params = params['estimator_params'] - self._clf.base_estimator_ = params['base_estimator_'] - self._input_column_names = params['input_column_names'] - self._training_indices = params['training_indices_'] - self._target_names = params['target_names_'] - self._target_column_indices = params['target_column_indices_'] - self._target_columns_metadata = params['target_columns_metadata_'] - - if params['estimators_'] is not None: - self._fitted = True - if params['estimator_weights_'] is not None: - self._fitted = True - if params['estimator_errors_'] is not None: - self._fitted = True - if params['estimator_params'] is not None: - self._fitted = True - if params['base_estimator_'] is not None: - self._fitted = True - - - - - - def produce_feature_importances(self, *, timeout: float = None, iterations: int = None) -> CallResult[d3m_dataframe]: - output = d3m_dataframe(self._clf.feature_importances_.reshape((1, len(self._input_column_names)))) - output.columns = self._input_column_names - for i in range(len(self._input_column_names)): - output.metadata = output.metadata.update_column(i, {"name": self._input_column_names[i]}) - return CallResult(output) - - @classmethod - def _get_columns_to_fit(cls, inputs: Inputs, hyperparams: Hyperparams): - if not hyperparams['use_semantic_types']: - return inputs, list(range(len(inputs.columns))) - - inputs_metadata = inputs.metadata - - def can_produce_column(column_index: int) -> bool: - return cls._can_produce_column(inputs_metadata, column_index, hyperparams) - - columns_to_produce, columns_not_to_produce = base_utils.get_columns_to_use(inputs_metadata, - use_columns=hyperparams['use_inputs_columns'], - exclude_columns=hyperparams['exclude_inputs_columns'], - can_use_column=can_produce_column) - return inputs.iloc[:, columns_to_produce], columns_to_produce - # return columns_to_produce - - @classmethod - def _can_produce_column(cls, inputs_metadata: metadata_base.DataMetadata, column_index: int, hyperparams: Hyperparams) -> bool: - column_metadata = inputs_metadata.query((metadata_base.ALL_ELEMENTS, column_index)) - - accepted_structural_types = (int, float, numpy.integer, numpy.float64) - accepted_semantic_types = set() - accepted_semantic_types.add("https://metadata.datadrivendiscovery.org/types/Attribute") - if not issubclass(column_metadata['structural_type'], accepted_structural_types): - return False - - semantic_types = set(column_metadata.get('semantic_types', [])) - - if len(semantic_types) == 0: - cls.logger.warning("No semantic types found in column metadata") - return False - # Making sure all accepted_semantic_types are available in semantic_types - if len(accepted_semantic_types - semantic_types) == 0: - return True - - return False - - @classmethod - def _get_targets(cls, data: d3m_dataframe, hyperparams: Hyperparams): - if not hyperparams['use_semantic_types']: - return data, list(data.columns), list(range(len(data.columns))) - - metadata = data.metadata - - def can_produce_column(column_index: int) -> bool: - accepted_semantic_types = set() - accepted_semantic_types.add("https://metadata.datadrivendiscovery.org/types/TrueTarget") - column_metadata = metadata.query((metadata_base.ALL_ELEMENTS, column_index)) - semantic_types = set(column_metadata.get('semantic_types', [])) - if len(semantic_types) == 0: - cls.logger.warning("No semantic types found in column metadata") - return False - # Making sure all accepted_semantic_types are available in semantic_types - if len(accepted_semantic_types - semantic_types) == 0: - return True - return False - - target_column_indices, target_columns_not_to_produce = base_utils.get_columns_to_use(metadata, - use_columns=hyperparams[ - 'use_outputs_columns'], - exclude_columns= - hyperparams[ - 'exclude_outputs_columns'], - can_use_column=can_produce_column) - targets = [] - if target_column_indices: - targets = data.select_columns(target_column_indices) - target_column_names = [] - for idx in target_column_indices: - target_column_names.append(data.columns[idx]) - return targets, target_column_names, target_column_indices - - @classmethod - def _get_target_columns_metadata(cls, outputs_metadata: metadata_base.DataMetadata, hyperparams) -> List[OrderedDict]: - outputs_length = outputs_metadata.query((metadata_base.ALL_ELEMENTS,))['dimension']['length'] - - target_columns_metadata: List[OrderedDict] = [] - for column_index in range(outputs_length): - column_metadata = OrderedDict(outputs_metadata.query_column(column_index)) - - # Update semantic types and prepare it for predicted targets. - semantic_types = set(column_metadata.get('semantic_types', [])) - semantic_types_to_remove = set(["https://metadata.datadrivendiscovery.org/types/TrueTarget","https://metadata.datadrivendiscovery.org/types/SuggestedTarget",]) - add_semantic_types = set(["https://metadata.datadrivendiscovery.org/types/PredictedTarget",]) - add_semantic_types.add(hyperparams["return_semantic_type"]) - semantic_types = semantic_types - semantic_types_to_remove - semantic_types = semantic_types.union(add_semantic_types) - column_metadata['semantic_types'] = list(semantic_types) - - target_columns_metadata.append(column_metadata) - - return target_columns_metadata - - @classmethod - def _update_predictions_metadata(cls, inputs_metadata: metadata_base.DataMetadata, outputs: Optional[Outputs], - target_columns_metadata: List[OrderedDict]) -> metadata_base.DataMetadata: - outputs_metadata = metadata_base.DataMetadata().generate(value=outputs) - - for column_index, column_metadata in enumerate(target_columns_metadata): - column_metadata.pop("structural_type", None) - outputs_metadata = outputs_metadata.update_column(column_index, column_metadata) - - return outputs_metadata - - def _wrap_predictions(self, inputs: Inputs, predictions: ndarray) -> Outputs: - outputs = d3m_dataframe(predictions, generate_metadata=False) - outputs.metadata = self._update_predictions_metadata(inputs.metadata, outputs, self._target_columns_metadata) - return outputs - - - @classmethod - def _add_target_columns_metadata(cls, outputs_metadata: metadata_base.DataMetadata): - outputs_length = outputs_metadata.query((metadata_base.ALL_ELEMENTS,))['dimension']['length'] - - target_columns_metadata: List[OrderedDict] = [] - for column_index in range(outputs_length): - column_metadata = OrderedDict() - semantic_types = [] - semantic_types.append('https://metadata.datadrivendiscovery.org/types/PredictedTarget') - column_name = outputs_metadata.query((metadata_base.ALL_ELEMENTS, column_index)).get("name") - if column_name is None: - column_name = "output_{}".format(column_index) - column_metadata["semantic_types"] = semantic_types - column_metadata["name"] = str(column_name) - target_columns_metadata.append(column_metadata) - - return target_columns_metadata - - -SKAdaBoostRegressor.__doc__ = AdaBoostRegressor.__doc__ \ No newline at end of file diff --git a/common-primitives/sklearn-wrap/sklearn_wrap/SKBaggingClassifier.py b/common-primitives/sklearn-wrap/sklearn_wrap/SKBaggingClassifier.py deleted file mode 100644 index c875434..0000000 --- a/common-primitives/sklearn-wrap/sklearn_wrap/SKBaggingClassifier.py +++ /dev/null @@ -1,589 +0,0 @@ -from typing import Any, Callable, List, Dict, Union, Optional, Sequence, Tuple -from numpy import ndarray -from collections import OrderedDict -from scipy import sparse -import os -import sklearn -import numpy -import typing - -# Custom import commands if any -from sklearn.ensemble.bagging import BaggingClassifier - - -from d3m.container.numpy import ndarray as d3m_ndarray -from d3m.container import DataFrame as d3m_dataframe -from d3m.metadata import hyperparams, params, base as metadata_base -from d3m import utils -from d3m.base import utils as base_utils -from d3m.exceptions import PrimitiveNotFittedError -from d3m.primitive_interfaces.base import CallResult, DockerContainer - -from d3m.primitive_interfaces.supervised_learning import SupervisedLearnerPrimitiveBase -from d3m.primitive_interfaces.base import ProbabilisticCompositionalityMixin, ContinueFitMixin -from d3m import exceptions -import pandas - - - -Inputs = d3m_dataframe -Outputs = d3m_dataframe - - -class Params(params.Params): - base_estimator_: Optional[object] - estimators_: Optional[List[sklearn.tree.DecisionTreeClassifier]] - estimators_features_: Optional[List[ndarray]] - classes_: Optional[ndarray] - n_classes_: Optional[int] - oob_score_: Optional[float] - oob_decision_function_: Optional[List[ndarray]] - n_features_: Optional[int] - _max_features: Optional[int] - _max_samples: Optional[int] - _n_samples: Optional[int] - _seeds: Optional[ndarray] - estimator_params: Optional[tuple] - input_column_names: Optional[Any] - target_names_: Optional[Sequence[Any]] - training_indices_: Optional[Sequence[int]] - target_column_indices_: Optional[Sequence[int]] - target_columns_metadata_: Optional[List[OrderedDict]] - - - -class Hyperparams(hyperparams.Hyperparams): - n_estimators = hyperparams.Bounded[int]( - default=10, - lower=1, - upper=None, - description='The number of base estimators in the ensemble.', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] - ) - max_samples = hyperparams.Union( - configuration=OrderedDict({ - 'absolute': hyperparams.Bounded[int]( - lower=0, - upper=None, - default=0, - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'], - ), - 'percent': hyperparams.Bounded[float]( - default=1.0, - lower=0, - upper=1, - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'], - ) - }), - default='percent', - description='The number of samples to draw from X to train each base estimator. - If int, then draw `max_samples` samples. - If float, then draw `max_samples * X.shape[0]` samples.', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] - ) - max_features = hyperparams.Union( - configuration=OrderedDict({ - 'absolute': hyperparams.Bounded[int]( - lower=0, - upper=None, - default=0, - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'], - ), - 'percent': hyperparams.Bounded[float]( - default=1.0, - lower=0, - upper=1, - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'], - ) - }), - default='percent', - description='The number of features to draw from X to train each base estimator. - If int, then draw `max_features` features. - If float, then draw `max_features * X.shape[1]` features.', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] - ) - bootstrap = hyperparams.Enumeration[str]( - values=['bootstrap', 'bootstrap_with_oob_score', 'disabled'], - default='bootstrap', - description='Whether bootstrap samples are used when building trees.' - ' And whether to use out-of-bag samples to estimate the generalization accuracy.', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'], - ) - bootstrap_features = hyperparams.UniformBool( - default=False, - description='Whether features are drawn with replacement.', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] - ) - warm_start = hyperparams.UniformBool( - default=False, - description='When set to True, reuse the solution of the previous call to fit and add more estimators to the ensemble, otherwise, just fit a whole new ensemble. .. versionadded:: 0.17 *warm_start* constructor parameter.', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] - ) - n_jobs = hyperparams.Union( - configuration=OrderedDict({ - 'limit': hyperparams.Bounded[int]( - default=1, - lower=1, - upper=None, - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'], - ), - 'all_cores': hyperparams.Constant( - default=-1, - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'], - ) - }), - default='limit', - description='The number of jobs to run in parallel for both `fit` and `predict`. If -1, then the number of jobs is set to the number of cores.', - semantic_types=['https://metadata.datadrivendiscovery.org/types/ResourcesUseParameter'] - ) - - use_inputs_columns = hyperparams.Set( - elements=hyperparams.Hyperparameter[int](-1), - default=(), - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="A set of column indices to force primitive to use as training input. If any specified column cannot be parsed, it is skipped.", - ) - use_outputs_columns = hyperparams.Set( - elements=hyperparams.Hyperparameter[int](-1), - default=(), - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="A set of column indices to force primitive to use as training target. If any specified column cannot be parsed, it is skipped.", - ) - exclude_inputs_columns = hyperparams.Set( - elements=hyperparams.Hyperparameter[int](-1), - default=(), - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="A set of column indices to not use as training inputs. Applicable only if \"use_columns\" is not provided.", - ) - exclude_outputs_columns = hyperparams.Set( - elements=hyperparams.Hyperparameter[int](-1), - default=(), - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="A set of column indices to not use as training target. Applicable only if \"use_columns\" is not provided.", - ) - return_result = hyperparams.Enumeration( - values=['append', 'replace', 'new'], - default='new', - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="Should parsed columns be appended, should they replace original columns, or should only parsed columns be returned? This hyperparam is ignored if use_semantic_types is set to false.", - ) - use_semantic_types = hyperparams.UniformBool( - default=False, - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="Controls whether semantic_types metadata will be used for filtering columns in input dataframe. Setting this to false makes the code ignore return_result and will produce only the output dataframe" - ) - add_index_columns = hyperparams.UniformBool( - default=False, - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="Also include primary index columns if input data has them. Applicable only if \"return_result\" is set to \"new\".", - ) - error_on_no_input = hyperparams.UniformBool( - default=True, - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="Throw an exception if no input column is selected/provided. Defaults to true to behave like sklearn. To prevent pipelines from breaking set this to False.", - ) - - return_semantic_type = hyperparams.Enumeration[str]( - values=['https://metadata.datadrivendiscovery.org/types/Attribute', 'https://metadata.datadrivendiscovery.org/types/ConstructedAttribute', 'https://metadata.datadrivendiscovery.org/types/PredictedTarget'], - default='https://metadata.datadrivendiscovery.org/types/PredictedTarget', - description='Decides what semantic type to attach to generated output', - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'] - ) - -class SKBaggingClassifier(SupervisedLearnerPrimitiveBase[Inputs, Outputs, Params, Hyperparams], - ProbabilisticCompositionalityMixin[Inputs, Outputs, Params, Hyperparams]): - """ - Primitive wrapping for sklearn BaggingClassifier - `sklearn documentation `_ - - """ - - __author__ = "JPL MARVIN" - metadata = metadata_base.PrimitiveMetadata({ - "algorithm_types": [metadata_base.PrimitiveAlgorithmType.ENSEMBLE_LEARNING, ], - "name": "sklearn.ensemble.bagging.BaggingClassifier", - "primitive_family": metadata_base.PrimitiveFamily.CLASSIFICATION, - "python_path": "d3m.primitives.classification.bagging.SKlearn", - "source": {'name': 'JPL', 'contact': 'mailto:shah@jpl.nasa.gov', 'uris': ['https://gitlab.com/datadrivendiscovery/sklearn-wrap/issues', 'https://scikit-learn.org/stable/modules/generated/sklearn.ensemble.BaggingClassifier.html']}, - "version": "2019.11.13", - "id": "1b2a32a6-0ec5-3ca0-9386-b8b1f1b831d1", - "hyperparams_to_tune": ['n_estimators', 'max_samples', 'max_features'], - 'installation': [ - {'type': metadata_base.PrimitiveInstallationType.PIP, - 'package_uri': 'git+https://gitlab.com/datadrivendiscovery/sklearn-wrap.git@{git_commit}#egg=sklearn_wrap'.format( - git_commit=utils.current_git_commit(os.path.dirname(__file__)), - ), - }] - }) - - def __init__(self, *, - hyperparams: Hyperparams, - random_seed: int = 0, - docker_containers: Dict[str, DockerContainer] = None, - _verbose: int = 0) -> None: - - super().__init__(hyperparams=hyperparams, random_seed=random_seed, docker_containers=docker_containers) - - # False - self._clf = BaggingClassifier( - n_estimators=self.hyperparams['n_estimators'], - max_samples=self.hyperparams['max_samples'], - max_features=self.hyperparams['max_features'], - bootstrap=self.hyperparams['bootstrap'] in ['bootstrap', 'bootstrap_with_oob_score'], - bootstrap_features=self.hyperparams['bootstrap_features'], - oob_score=self.hyperparams['bootstrap'] in ['bootstrap_with_oob_score'], - warm_start=self.hyperparams['warm_start'], - n_jobs=self.hyperparams['n_jobs'], - random_state=self.random_seed, - verbose=_verbose - ) - - self._inputs = None - self._outputs = None - self._training_inputs = None - self._training_outputs = None - self._target_names = None - self._training_indices = None - self._target_column_indices = None - self._target_columns_metadata: List[OrderedDict] = None - self._input_column_names = None - self._fitted = False - self._new_training_data = False - - def set_training_data(self, *, inputs: Inputs, outputs: Outputs) -> None: - self._inputs = inputs - self._outputs = outputs - self._fitted = False - self._new_training_data = True - - def fit(self, *, timeout: float = None, iterations: int = None) -> CallResult[None]: - if self._inputs is None or self._outputs is None: - raise ValueError("Missing training data.") - - if not self._new_training_data: - return CallResult(None) - self._new_training_data = False - - self._training_inputs, self._training_indices = self._get_columns_to_fit(self._inputs, self.hyperparams) - self._training_outputs, self._target_names, self._target_column_indices = self._get_targets(self._outputs, self.hyperparams) - self._input_column_names = self._training_inputs.columns - - if len(self._training_indices) > 0 and len(self._target_column_indices) > 0: - self._target_columns_metadata = self._get_target_columns_metadata(self._training_outputs.metadata, self.hyperparams) - sk_training_output = self._training_outputs.values - - shape = sk_training_output.shape - if len(shape) == 2 and shape[1] == 1: - sk_training_output = numpy.ravel(sk_training_output) - - self._clf.fit(self._training_inputs, sk_training_output) - self._fitted = True - else: - if self.hyperparams['error_on_no_input']: - raise RuntimeError("No input columns were selected") - self.logger.warn("No input columns were selected") - - return CallResult(None) - - - - def produce(self, *, inputs: Inputs, timeout: float = None, iterations: int = None) -> CallResult[Outputs]: - sk_inputs, columns_to_use = self._get_columns_to_fit(inputs, self.hyperparams) - output = [] - if len(sk_inputs.columns): - try: - sk_output = self._clf.predict(sk_inputs) - except sklearn.exceptions.NotFittedError as error: - raise PrimitiveNotFittedError("Primitive not fitted.") from error - # For primitives that allow predicting without fitting like GaussianProcessRegressor - if not self._fitted: - raise PrimitiveNotFittedError("Primitive not fitted.") - if sparse.issparse(sk_output): - sk_output = sk_output.toarray() - output = self._wrap_predictions(inputs, sk_output) - output.columns = self._target_names - output = [output] - else: - if self.hyperparams['error_on_no_input']: - raise RuntimeError("No input columns were selected") - self.logger.warn("No input columns were selected") - outputs = base_utils.combine_columns(return_result=self.hyperparams['return_result'], - add_index_columns=self.hyperparams['add_index_columns'], - inputs=inputs, column_indices=self._target_column_indices, - columns_list=output) - - return CallResult(outputs) - - - def get_params(self) -> Params: - if not self._fitted: - return Params( - base_estimator_=None, - estimators_=None, - estimators_features_=None, - classes_=None, - n_classes_=None, - oob_score_=None, - oob_decision_function_=None, - n_features_=None, - _max_features=None, - _max_samples=None, - _n_samples=None, - _seeds=None, - estimator_params=None, - input_column_names=self._input_column_names, - training_indices_=self._training_indices, - target_names_=self._target_names, - target_column_indices_=self._target_column_indices, - target_columns_metadata_=self._target_columns_metadata - ) - - return Params( - base_estimator_=getattr(self._clf, 'base_estimator_', None), - estimators_=getattr(self._clf, 'estimators_', None), - estimators_features_=getattr(self._clf, 'estimators_features_', None), - classes_=getattr(self._clf, 'classes_', None), - n_classes_=getattr(self._clf, 'n_classes_', None), - oob_score_=getattr(self._clf, 'oob_score_', None), - oob_decision_function_=getattr(self._clf, 'oob_decision_function_', None), - n_features_=getattr(self._clf, 'n_features_', None), - _max_features=getattr(self._clf, '_max_features', None), - _max_samples=getattr(self._clf, '_max_samples', None), - _n_samples=getattr(self._clf, '_n_samples', None), - _seeds=getattr(self._clf, '_seeds', None), - estimator_params=getattr(self._clf, 'estimator_params', None), - input_column_names=self._input_column_names, - training_indices_=self._training_indices, - target_names_=self._target_names, - target_column_indices_=self._target_column_indices, - target_columns_metadata_=self._target_columns_metadata - ) - - def set_params(self, *, params: Params) -> None: - self._clf.base_estimator_ = params['base_estimator_'] - self._clf.estimators_ = params['estimators_'] - self._clf.estimators_features_ = params['estimators_features_'] - self._clf.classes_ = params['classes_'] - self._clf.n_classes_ = params['n_classes_'] - self._clf.oob_score_ = params['oob_score_'] - self._clf.oob_decision_function_ = params['oob_decision_function_'] - self._clf.n_features_ = params['n_features_'] - self._clf._max_features = params['_max_features'] - self._clf._max_samples = params['_max_samples'] - self._clf._n_samples = params['_n_samples'] - self._clf._seeds = params['_seeds'] - self._clf.estimator_params = params['estimator_params'] - self._input_column_names = params['input_column_names'] - self._training_indices = params['training_indices_'] - self._target_names = params['target_names_'] - self._target_column_indices = params['target_column_indices_'] - self._target_columns_metadata = params['target_columns_metadata_'] - - if params['base_estimator_'] is not None: - self._fitted = True - if params['estimators_'] is not None: - self._fitted = True - if params['estimators_features_'] is not None: - self._fitted = True - if params['classes_'] is not None: - self._fitted = True - if params['n_classes_'] is not None: - self._fitted = True - if params['oob_score_'] is not None: - self._fitted = True - if params['oob_decision_function_'] is not None: - self._fitted = True - if params['n_features_'] is not None: - self._fitted = True - if params['_max_features'] is not None: - self._fitted = True - if params['_max_samples'] is not None: - self._fitted = True - if params['_n_samples'] is not None: - self._fitted = True - if params['_seeds'] is not None: - self._fitted = True - if params['estimator_params'] is not None: - self._fitted = True - - - def log_likelihoods(self, *, - outputs: Outputs, - inputs: Inputs, - timeout: float = None, - iterations: int = None) -> CallResult[Sequence[float]]: - inputs = inputs.iloc[:, self._training_indices] # Get ndarray - outputs = outputs.iloc[:, self._target_column_indices] - - if len(inputs.columns) and len(outputs.columns): - - if outputs.shape[1] != self._clf.n_outputs_: - raise exceptions.InvalidArgumentValueError("\"outputs\" argument does not have the correct number of target columns.") - - log_proba = self._clf.predict_log_proba(inputs) - - # Making it always a list, even when only one target. - if self._clf.n_outputs_ == 1: - log_proba = [log_proba] - classes = [self._clf.classes_] - else: - classes = self._clf.classes_ - - samples_length = inputs.shape[0] - - log_likelihoods = [] - for k in range(self._clf.n_outputs_): - # We have to map each class to its internal (numerical) index used in the learner. - # This allows "outputs" to contain string classes. - outputs_column = outputs.iloc[:, k] - classes_map = pandas.Series(numpy.arange(len(classes[k])), index=classes[k]) - mapped_outputs_column = outputs_column.map(classes_map) - - # For each target column (column in "outputs"), for each sample (row) we pick the log - # likelihood for a given class. - log_likelihoods.append(log_proba[k][numpy.arange(samples_length), mapped_outputs_column]) - - results = d3m_dataframe(dict(enumerate(log_likelihoods)), generate_metadata=True) - results.columns = outputs.columns - - for k in range(self._clf.n_outputs_): - column_metadata = outputs.metadata.query_column(k) - if 'name' in column_metadata: - results.metadata = results.metadata.update_column(k, {'name': column_metadata['name']}) - - else: - results = d3m_dataframe(generate_metadata=True) - - return CallResult(results) - - - - - - @classmethod - def _get_columns_to_fit(cls, inputs: Inputs, hyperparams: Hyperparams): - if not hyperparams['use_semantic_types']: - return inputs, list(range(len(inputs.columns))) - - inputs_metadata = inputs.metadata - - def can_produce_column(column_index: int) -> bool: - return cls._can_produce_column(inputs_metadata, column_index, hyperparams) - - columns_to_produce, columns_not_to_produce = base_utils.get_columns_to_use(inputs_metadata, - use_columns=hyperparams['use_inputs_columns'], - exclude_columns=hyperparams['exclude_inputs_columns'], - can_use_column=can_produce_column) - return inputs.iloc[:, columns_to_produce], columns_to_produce - # return columns_to_produce - - @classmethod - def _can_produce_column(cls, inputs_metadata: metadata_base.DataMetadata, column_index: int, hyperparams: Hyperparams) -> bool: - column_metadata = inputs_metadata.query((metadata_base.ALL_ELEMENTS, column_index)) - - accepted_structural_types = (int, float, numpy.integer, numpy.float64) - accepted_semantic_types = set() - accepted_semantic_types.add("https://metadata.datadrivendiscovery.org/types/Attribute") - if not issubclass(column_metadata['structural_type'], accepted_structural_types): - return False - - semantic_types = set(column_metadata.get('semantic_types', [])) - - if len(semantic_types) == 0: - cls.logger.warning("No semantic types found in column metadata") - return False - # Making sure all accepted_semantic_types are available in semantic_types - if len(accepted_semantic_types - semantic_types) == 0: - return True - - return False - - @classmethod - def _get_targets(cls, data: d3m_dataframe, hyperparams: Hyperparams): - if not hyperparams['use_semantic_types']: - return data, list(data.columns), list(range(len(data.columns))) - - metadata = data.metadata - - def can_produce_column(column_index: int) -> bool: - accepted_semantic_types = set() - accepted_semantic_types.add("https://metadata.datadrivendiscovery.org/types/TrueTarget") - column_metadata = metadata.query((metadata_base.ALL_ELEMENTS, column_index)) - semantic_types = set(column_metadata.get('semantic_types', [])) - if len(semantic_types) == 0: - cls.logger.warning("No semantic types found in column metadata") - return False - # Making sure all accepted_semantic_types are available in semantic_types - if len(accepted_semantic_types - semantic_types) == 0: - return True - return False - - target_column_indices, target_columns_not_to_produce = base_utils.get_columns_to_use(metadata, - use_columns=hyperparams[ - 'use_outputs_columns'], - exclude_columns= - hyperparams[ - 'exclude_outputs_columns'], - can_use_column=can_produce_column) - targets = [] - if target_column_indices: - targets = data.select_columns(target_column_indices) - target_column_names = [] - for idx in target_column_indices: - target_column_names.append(data.columns[idx]) - return targets, target_column_names, target_column_indices - - @classmethod - def _get_target_columns_metadata(cls, outputs_metadata: metadata_base.DataMetadata, hyperparams) -> List[OrderedDict]: - outputs_length = outputs_metadata.query((metadata_base.ALL_ELEMENTS,))['dimension']['length'] - - target_columns_metadata: List[OrderedDict] = [] - for column_index in range(outputs_length): - column_metadata = OrderedDict(outputs_metadata.query_column(column_index)) - - # Update semantic types and prepare it for predicted targets. - semantic_types = set(column_metadata.get('semantic_types', [])) - semantic_types_to_remove = set(["https://metadata.datadrivendiscovery.org/types/TrueTarget","https://metadata.datadrivendiscovery.org/types/SuggestedTarget",]) - add_semantic_types = set(["https://metadata.datadrivendiscovery.org/types/PredictedTarget",]) - add_semantic_types.add(hyperparams["return_semantic_type"]) - semantic_types = semantic_types - semantic_types_to_remove - semantic_types = semantic_types.union(add_semantic_types) - column_metadata['semantic_types'] = list(semantic_types) - - target_columns_metadata.append(column_metadata) - - return target_columns_metadata - - @classmethod - def _update_predictions_metadata(cls, inputs_metadata: metadata_base.DataMetadata, outputs: Optional[Outputs], - target_columns_metadata: List[OrderedDict]) -> metadata_base.DataMetadata: - outputs_metadata = metadata_base.DataMetadata().generate(value=outputs) - - for column_index, column_metadata in enumerate(target_columns_metadata): - column_metadata.pop("structural_type", None) - outputs_metadata = outputs_metadata.update_column(column_index, column_metadata) - - return outputs_metadata - - def _wrap_predictions(self, inputs: Inputs, predictions: ndarray) -> Outputs: - outputs = d3m_dataframe(predictions, generate_metadata=False) - outputs.metadata = self._update_predictions_metadata(inputs.metadata, outputs, self._target_columns_metadata) - return outputs - - - @classmethod - def _add_target_columns_metadata(cls, outputs_metadata: metadata_base.DataMetadata): - outputs_length = outputs_metadata.query((metadata_base.ALL_ELEMENTS,))['dimension']['length'] - - target_columns_metadata: List[OrderedDict] = [] - for column_index in range(outputs_length): - column_metadata = OrderedDict() - semantic_types = [] - semantic_types.append('https://metadata.datadrivendiscovery.org/types/PredictedTarget') - column_name = outputs_metadata.query((metadata_base.ALL_ELEMENTS, column_index)).get("name") - if column_name is None: - column_name = "output_{}".format(column_index) - column_metadata["semantic_types"] = semantic_types - column_metadata["name"] = str(column_name) - target_columns_metadata.append(column_metadata) - - return target_columns_metadata - - -SKBaggingClassifier.__doc__ = BaggingClassifier.__doc__ \ No newline at end of file diff --git a/common-primitives/sklearn-wrap/sklearn_wrap/SKBaggingRegressor.py b/common-primitives/sklearn-wrap/sklearn_wrap/SKBaggingRegressor.py deleted file mode 100644 index 7a62c7b..0000000 --- a/common-primitives/sklearn-wrap/sklearn_wrap/SKBaggingRegressor.py +++ /dev/null @@ -1,533 +0,0 @@ -from typing import Any, Callable, List, Dict, Union, Optional, Sequence, Tuple -from numpy import ndarray -from collections import OrderedDict -from scipy import sparse -import os -import sklearn -import numpy -import typing - -# Custom import commands if any -from sklearn.ensemble.bagging import BaggingRegressor - - -from d3m.container.numpy import ndarray as d3m_ndarray -from d3m.container import DataFrame as d3m_dataframe -from d3m.metadata import hyperparams, params, base as metadata_base -from d3m import utils -from d3m.base import utils as base_utils -from d3m.exceptions import PrimitiveNotFittedError -from d3m.primitive_interfaces.base import CallResult, DockerContainer - -from d3m.primitive_interfaces.supervised_learning import SupervisedLearnerPrimitiveBase -from d3m.primitive_interfaces.base import ProbabilisticCompositionalityMixin, ContinueFitMixin -from d3m import exceptions -import pandas - - - -Inputs = d3m_dataframe -Outputs = d3m_dataframe - - -class Params(params.Params): - estimators_: Optional[List[sklearn.tree.DecisionTreeRegressor]] - estimators_features_: Optional[List[ndarray]] - oob_score_: Optional[float] - oob_prediction_: Optional[ndarray] - base_estimator_: Optional[object] - n_features_: Optional[int] - _max_features: Optional[int] - _max_samples: Optional[int] - _n_samples: Optional[int] - _seeds: Optional[ndarray] - estimator_params: Optional[tuple] - input_column_names: Optional[Any] - target_names_: Optional[Sequence[Any]] - training_indices_: Optional[Sequence[int]] - target_column_indices_: Optional[Sequence[int]] - target_columns_metadata_: Optional[List[OrderedDict]] - - - -class Hyperparams(hyperparams.Hyperparams): - base_estimator = hyperparams.Constant( - default=None, - description='The base estimator to fit on random subsets of the dataset. If None, then the base estimator is a decision tree.', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] - ) - n_estimators = hyperparams.Bounded[int]( - default=10, - lower=1, - upper=None, - description='The number of base estimators in the ensemble.', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] - ) - max_samples = hyperparams.Union( - configuration=OrderedDict({ - 'absolute': hyperparams.Bounded[int]( - lower=0, - upper=None, - default=0, - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'], - ), - 'percent': hyperparams.Bounded[float]( - default=1.0, - lower=0, - upper=1, - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'], - ) - }), - default='percent', - description='The number of samples to draw from X to train each base estimator. - If int, then draw `max_samples` samples. - If float, then draw `max_samples * X.shape[0]` samples.', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] - ) - max_features = hyperparams.Union( - configuration=OrderedDict({ - 'absolute': hyperparams.Bounded[int]( - lower=0, - upper=None, - default=0, - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'], - ), - 'percent': hyperparams.Bounded[float]( - default=1.0, - lower=0, - upper=1, - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'], - ) - }), - default='percent', - description='The number of features to draw from X to train each base estimator. - If int, then draw `max_features` features. - If float, then draw `max_features * X.shape[1]` features.', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] - ) - bootstrap = hyperparams.Enumeration[str]( - values=['bootstrap', 'bootstrap_with_oob_score', 'disabled'], - default='bootstrap', - description='Whether bootstrap samples are used when building trees.' - ' And whether to use out-of-bag samples to estimate the generalization accuracy.', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'], - ) - bootstrap_features = hyperparams.UniformBool( - default=False, - description='Whether features are drawn with replacement.', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] - ) - warm_start = hyperparams.UniformBool( - default=False, - description='When set to True, reuse the solution of the previous call to fit and add more estimators to the ensemble, otherwise, just fit a whole new ensemble. See :term:`the Glossary `.', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] - ) - n_jobs = hyperparams.Union( - configuration=OrderedDict({ - 'limit': hyperparams.Bounded[int]( - default=1, - lower=1, - upper=None, - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'], - ), - 'all_cores': hyperparams.Constant( - default=-1, - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'], - ) - }), - default='limit', - description='The number of jobs to run in parallel for both `fit` and `predict`. ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context. ``-1`` means using all processors. See :term:`Glossary ` for more details.', - semantic_types=['https://metadata.datadrivendiscovery.org/types/ResourcesUseParameter'] - ) - - use_inputs_columns = hyperparams.Set( - elements=hyperparams.Hyperparameter[int](-1), - default=(), - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="A set of column indices to force primitive to use as training input. If any specified column cannot be parsed, it is skipped.", - ) - use_outputs_columns = hyperparams.Set( - elements=hyperparams.Hyperparameter[int](-1), - default=(), - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="A set of column indices to force primitive to use as training target. If any specified column cannot be parsed, it is skipped.", - ) - exclude_inputs_columns = hyperparams.Set( - elements=hyperparams.Hyperparameter[int](-1), - default=(), - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="A set of column indices to not use as training inputs. Applicable only if \"use_columns\" is not provided.", - ) - exclude_outputs_columns = hyperparams.Set( - elements=hyperparams.Hyperparameter[int](-1), - default=(), - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="A set of column indices to not use as training target. Applicable only if \"use_columns\" is not provided.", - ) - return_result = hyperparams.Enumeration( - values=['append', 'replace', 'new'], - default='new', - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="Should parsed columns be appended, should they replace original columns, or should only parsed columns be returned? This hyperparam is ignored if use_semantic_types is set to false.", - ) - use_semantic_types = hyperparams.UniformBool( - default=False, - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="Controls whether semantic_types metadata will be used for filtering columns in input dataframe. Setting this to false makes the code ignore return_result and will produce only the output dataframe" - ) - add_index_columns = hyperparams.UniformBool( - default=False, - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="Also include primary index columns if input data has them. Applicable only if \"return_result\" is set to \"new\".", - ) - error_on_no_input = hyperparams.UniformBool( - default=True, - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="Throw an exception if no input column is selected/provided. Defaults to true to behave like sklearn. To prevent pipelines from breaking set this to False.", - ) - - return_semantic_type = hyperparams.Enumeration[str]( - values=['https://metadata.datadrivendiscovery.org/types/Attribute', 'https://metadata.datadrivendiscovery.org/types/ConstructedAttribute', 'https://metadata.datadrivendiscovery.org/types/PredictedTarget'], - default='https://metadata.datadrivendiscovery.org/types/PredictedTarget', - description='Decides what semantic type to attach to generated output', - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'] - ) - -class SKBaggingRegressor(SupervisedLearnerPrimitiveBase[Inputs, Outputs, Params, Hyperparams]): - """ - Primitive wrapping for sklearn BaggingRegressor - `sklearn documentation `_ - - """ - - __author__ = "JPL MARVIN" - metadata = metadata_base.PrimitiveMetadata({ - "algorithm_types": [metadata_base.PrimitiveAlgorithmType.ENSEMBLE_LEARNING, ], - "name": "sklearn.ensemble.bagging.BaggingRegressor", - "primitive_family": metadata_base.PrimitiveFamily.REGRESSION, - "python_path": "d3m.primitives.regression.bagging.SKlearn", - "source": {'name': 'JPL', 'contact': 'mailto:shah@jpl.nasa.gov', 'uris': ['https://gitlab.com/datadrivendiscovery/sklearn-wrap/issues', 'https://scikit-learn.org/stable/modules/generated/sklearn.ensemble.BaggingRegressor.html']}, - "version": "2019.11.13", - "id": "0dbc4b6d-aa57-4f11-ab18-36125880151b", - 'installation': [ - {'type': metadata_base.PrimitiveInstallationType.PIP, - 'package_uri': 'git+https://gitlab.com/datadrivendiscovery/sklearn-wrap.git@{git_commit}#egg=sklearn_wrap'.format( - git_commit=utils.current_git_commit(os.path.dirname(__file__)), - ), - }] - }) - - def __init__(self, *, - hyperparams: Hyperparams, - random_seed: int = 0, - docker_containers: Dict[str, DockerContainer] = None, - _verbose: int = 0) -> None: - - super().__init__(hyperparams=hyperparams, random_seed=random_seed, docker_containers=docker_containers) - - # False - self._clf = BaggingRegressor( - base_estimator=self.hyperparams['base_estimator'], - n_estimators=self.hyperparams['n_estimators'], - max_samples=self.hyperparams['max_samples'], - max_features=self.hyperparams['max_features'], - bootstrap=self.hyperparams['bootstrap'] in ['bootstrap', 'bootstrap_with_oob_score'], - bootstrap_features=self.hyperparams['bootstrap_features'], - oob_score=self.hyperparams['bootstrap'] in ['bootstrap_with_oob_score'], - warm_start=self.hyperparams['warm_start'], - n_jobs=self.hyperparams['n_jobs'], - random_state=self.random_seed, - verbose=_verbose - ) - - self._inputs = None - self._outputs = None - self._training_inputs = None - self._training_outputs = None - self._target_names = None - self._training_indices = None - self._target_column_indices = None - self._target_columns_metadata: List[OrderedDict] = None - self._input_column_names = None - self._fitted = False - self._new_training_data = False - - def set_training_data(self, *, inputs: Inputs, outputs: Outputs) -> None: - self._inputs = inputs - self._outputs = outputs - self._fitted = False - self._new_training_data = True - - def fit(self, *, timeout: float = None, iterations: int = None) -> CallResult[None]: - if self._inputs is None or self._outputs is None: - raise ValueError("Missing training data.") - - if not self._new_training_data: - return CallResult(None) - self._new_training_data = False - - self._training_inputs, self._training_indices = self._get_columns_to_fit(self._inputs, self.hyperparams) - self._training_outputs, self._target_names, self._target_column_indices = self._get_targets(self._outputs, self.hyperparams) - self._input_column_names = self._training_inputs.columns - - if len(self._training_indices) > 0 and len(self._target_column_indices) > 0: - self._target_columns_metadata = self._get_target_columns_metadata(self._training_outputs.metadata, self.hyperparams) - sk_training_output = self._training_outputs.values - - shape = sk_training_output.shape - if len(shape) == 2 and shape[1] == 1: - sk_training_output = numpy.ravel(sk_training_output) - - self._clf.fit(self._training_inputs, sk_training_output) - self._fitted = True - else: - if self.hyperparams['error_on_no_input']: - raise RuntimeError("No input columns were selected") - self.logger.warn("No input columns were selected") - - return CallResult(None) - - - - def produce(self, *, inputs: Inputs, timeout: float = None, iterations: int = None) -> CallResult[Outputs]: - sk_inputs, columns_to_use = self._get_columns_to_fit(inputs, self.hyperparams) - output = [] - if len(sk_inputs.columns): - try: - sk_output = self._clf.predict(sk_inputs) - except sklearn.exceptions.NotFittedError as error: - raise PrimitiveNotFittedError("Primitive not fitted.") from error - # For primitives that allow predicting without fitting like GaussianProcessRegressor - if not self._fitted: - raise PrimitiveNotFittedError("Primitive not fitted.") - if sparse.issparse(sk_output): - sk_output = sk_output.toarray() - output = self._wrap_predictions(inputs, sk_output) - output.columns = self._target_names - output = [output] - else: - if self.hyperparams['error_on_no_input']: - raise RuntimeError("No input columns were selected") - self.logger.warn("No input columns were selected") - outputs = base_utils.combine_columns(return_result=self.hyperparams['return_result'], - add_index_columns=self.hyperparams['add_index_columns'], - inputs=inputs, column_indices=self._target_column_indices, - columns_list=output) - - return CallResult(outputs) - - - def get_params(self) -> Params: - if not self._fitted: - return Params( - estimators_=None, - estimators_features_=None, - oob_score_=None, - oob_prediction_=None, - base_estimator_=None, - n_features_=None, - _max_features=None, - _max_samples=None, - _n_samples=None, - _seeds=None, - estimator_params=None, - input_column_names=self._input_column_names, - training_indices_=self._training_indices, - target_names_=self._target_names, - target_column_indices_=self._target_column_indices, - target_columns_metadata_=self._target_columns_metadata - ) - - return Params( - estimators_=getattr(self._clf, 'estimators_', None), - estimators_features_=getattr(self._clf, 'estimators_features_', None), - oob_score_=getattr(self._clf, 'oob_score_', None), - oob_prediction_=getattr(self._clf, 'oob_prediction_', None), - base_estimator_=getattr(self._clf, 'base_estimator_', None), - n_features_=getattr(self._clf, 'n_features_', None), - _max_features=getattr(self._clf, '_max_features', None), - _max_samples=getattr(self._clf, '_max_samples', None), - _n_samples=getattr(self._clf, '_n_samples', None), - _seeds=getattr(self._clf, '_seeds', None), - estimator_params=getattr(self._clf, 'estimator_params', None), - input_column_names=self._input_column_names, - training_indices_=self._training_indices, - target_names_=self._target_names, - target_column_indices_=self._target_column_indices, - target_columns_metadata_=self._target_columns_metadata - ) - - def set_params(self, *, params: Params) -> None: - self._clf.estimators_ = params['estimators_'] - self._clf.estimators_features_ = params['estimators_features_'] - self._clf.oob_score_ = params['oob_score_'] - self._clf.oob_prediction_ = params['oob_prediction_'] - self._clf.base_estimator_ = params['base_estimator_'] - self._clf.n_features_ = params['n_features_'] - self._clf._max_features = params['_max_features'] - self._clf._max_samples = params['_max_samples'] - self._clf._n_samples = params['_n_samples'] - self._clf._seeds = params['_seeds'] - self._clf.estimator_params = params['estimator_params'] - self._input_column_names = params['input_column_names'] - self._training_indices = params['training_indices_'] - self._target_names = params['target_names_'] - self._target_column_indices = params['target_column_indices_'] - self._target_columns_metadata = params['target_columns_metadata_'] - - if params['estimators_'] is not None: - self._fitted = True - if params['estimators_features_'] is not None: - self._fitted = True - if params['oob_score_'] is not None: - self._fitted = True - if params['oob_prediction_'] is not None: - self._fitted = True - if params['base_estimator_'] is not None: - self._fitted = True - if params['n_features_'] is not None: - self._fitted = True - if params['_max_features'] is not None: - self._fitted = True - if params['_max_samples'] is not None: - self._fitted = True - if params['_n_samples'] is not None: - self._fitted = True - if params['_seeds'] is not None: - self._fitted = True - if params['estimator_params'] is not None: - self._fitted = True - - - - - - - - @classmethod - def _get_columns_to_fit(cls, inputs: Inputs, hyperparams: Hyperparams): - if not hyperparams['use_semantic_types']: - return inputs, list(range(len(inputs.columns))) - - inputs_metadata = inputs.metadata - - def can_produce_column(column_index: int) -> bool: - return cls._can_produce_column(inputs_metadata, column_index, hyperparams) - - columns_to_produce, columns_not_to_produce = base_utils.get_columns_to_use(inputs_metadata, - use_columns=hyperparams['use_inputs_columns'], - exclude_columns=hyperparams['exclude_inputs_columns'], - can_use_column=can_produce_column) - return inputs.iloc[:, columns_to_produce], columns_to_produce - # return columns_to_produce - - @classmethod - def _can_produce_column(cls, inputs_metadata: metadata_base.DataMetadata, column_index: int, hyperparams: Hyperparams) -> bool: - column_metadata = inputs_metadata.query((metadata_base.ALL_ELEMENTS, column_index)) - - accepted_structural_types = (int, float, numpy.integer, numpy.float64) - accepted_semantic_types = set() - accepted_semantic_types.add("https://metadata.datadrivendiscovery.org/types/Attribute") - if not issubclass(column_metadata['structural_type'], accepted_structural_types): - return False - - semantic_types = set(column_metadata.get('semantic_types', [])) - - if len(semantic_types) == 0: - cls.logger.warning("No semantic types found in column metadata") - return False - # Making sure all accepted_semantic_types are available in semantic_types - if len(accepted_semantic_types - semantic_types) == 0: - return True - - return False - - @classmethod - def _get_targets(cls, data: d3m_dataframe, hyperparams: Hyperparams): - if not hyperparams['use_semantic_types']: - return data, list(data.columns), list(range(len(data.columns))) - - metadata = data.metadata - - def can_produce_column(column_index: int) -> bool: - accepted_semantic_types = set() - accepted_semantic_types.add("https://metadata.datadrivendiscovery.org/types/TrueTarget") - column_metadata = metadata.query((metadata_base.ALL_ELEMENTS, column_index)) - semantic_types = set(column_metadata.get('semantic_types', [])) - if len(semantic_types) == 0: - cls.logger.warning("No semantic types found in column metadata") - return False - # Making sure all accepted_semantic_types are available in semantic_types - if len(accepted_semantic_types - semantic_types) == 0: - return True - return False - - target_column_indices, target_columns_not_to_produce = base_utils.get_columns_to_use(metadata, - use_columns=hyperparams[ - 'use_outputs_columns'], - exclude_columns= - hyperparams[ - 'exclude_outputs_columns'], - can_use_column=can_produce_column) - targets = [] - if target_column_indices: - targets = data.select_columns(target_column_indices) - target_column_names = [] - for idx in target_column_indices: - target_column_names.append(data.columns[idx]) - return targets, target_column_names, target_column_indices - - @classmethod - def _get_target_columns_metadata(cls, outputs_metadata: metadata_base.DataMetadata, hyperparams) -> List[OrderedDict]: - outputs_length = outputs_metadata.query((metadata_base.ALL_ELEMENTS,))['dimension']['length'] - - target_columns_metadata: List[OrderedDict] = [] - for column_index in range(outputs_length): - column_metadata = OrderedDict(outputs_metadata.query_column(column_index)) - - # Update semantic types and prepare it for predicted targets. - semantic_types = set(column_metadata.get('semantic_types', [])) - semantic_types_to_remove = set(["https://metadata.datadrivendiscovery.org/types/TrueTarget","https://metadata.datadrivendiscovery.org/types/SuggestedTarget",]) - add_semantic_types = set(["https://metadata.datadrivendiscovery.org/types/PredictedTarget",]) - add_semantic_types.add(hyperparams["return_semantic_type"]) - semantic_types = semantic_types - semantic_types_to_remove - semantic_types = semantic_types.union(add_semantic_types) - column_metadata['semantic_types'] = list(semantic_types) - - target_columns_metadata.append(column_metadata) - - return target_columns_metadata - - @classmethod - def _update_predictions_metadata(cls, inputs_metadata: metadata_base.DataMetadata, outputs: Optional[Outputs], - target_columns_metadata: List[OrderedDict]) -> metadata_base.DataMetadata: - outputs_metadata = metadata_base.DataMetadata().generate(value=outputs) - - for column_index, column_metadata in enumerate(target_columns_metadata): - column_metadata.pop("structural_type", None) - outputs_metadata = outputs_metadata.update_column(column_index, column_metadata) - - return outputs_metadata - - def _wrap_predictions(self, inputs: Inputs, predictions: ndarray) -> Outputs: - outputs = d3m_dataframe(predictions, generate_metadata=False) - outputs.metadata = self._update_predictions_metadata(inputs.metadata, outputs, self._target_columns_metadata) - return outputs - - - @classmethod - def _add_target_columns_metadata(cls, outputs_metadata: metadata_base.DataMetadata): - outputs_length = outputs_metadata.query((metadata_base.ALL_ELEMENTS,))['dimension']['length'] - - target_columns_metadata: List[OrderedDict] = [] - for column_index in range(outputs_length): - column_metadata = OrderedDict() - semantic_types = [] - semantic_types.append('https://metadata.datadrivendiscovery.org/types/PredictedTarget') - column_name = outputs_metadata.query((metadata_base.ALL_ELEMENTS, column_index)).get("name") - if column_name is None: - column_name = "output_{}".format(column_index) - column_metadata["semantic_types"] = semantic_types - column_metadata["name"] = str(column_name) - target_columns_metadata.append(column_metadata) - - return target_columns_metadata - - -SKBaggingRegressor.__doc__ = BaggingRegressor.__doc__ \ No newline at end of file diff --git a/common-primitives/sklearn-wrap/sklearn_wrap/SKBernoulliNB.py b/common-primitives/sklearn-wrap/sklearn_wrap/SKBernoulliNB.py deleted file mode 100644 index 40dde7e..0000000 --- a/common-primitives/sklearn-wrap/sklearn_wrap/SKBernoulliNB.py +++ /dev/null @@ -1,508 +0,0 @@ -from typing import Any, Callable, List, Dict, Union, Optional, Sequence, Tuple -from numpy import ndarray -from collections import OrderedDict -from scipy import sparse -import os -import sklearn -import numpy -import typing - -# Custom import commands if any -from sklearn.naive_bayes import BernoulliNB - - -from d3m.container.numpy import ndarray as d3m_ndarray -from d3m.container import DataFrame as d3m_dataframe -from d3m.metadata import hyperparams, params, base as metadata_base -from d3m import utils -from d3m.base import utils as base_utils -from d3m.exceptions import PrimitiveNotFittedError -from d3m.primitive_interfaces.base import CallResult, DockerContainer - -from d3m.primitive_interfaces.supervised_learning import SupervisedLearnerPrimitiveBase -from d3m.primitive_interfaces.base import ProbabilisticCompositionalityMixin, ContinueFitMixin -from d3m import exceptions -import pandas - - - -Inputs = d3m_dataframe -Outputs = d3m_dataframe - - -class Params(params.Params): - class_log_prior_: Optional[ndarray] - feature_log_prob_: Optional[ndarray] - class_count_: Optional[ndarray] - feature_count_: Optional[ndarray] - classes_: Optional[ndarray] - input_column_names: Optional[Any] - target_names_: Optional[Sequence[Any]] - training_indices_: Optional[Sequence[int]] - target_column_indices_: Optional[Sequence[int]] - target_columns_metadata_: Optional[List[OrderedDict]] - - - -class Hyperparams(hyperparams.Hyperparams): - alpha = hyperparams.Bounded[float]( - default=1, - lower=0, - upper=None, - description='Additive (Laplace/Lidstone) smoothing parameter (0 for no smoothing).', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] - ) - binarize = hyperparams.Union( - configuration=OrderedDict({ - 'float': hyperparams.Bounded[float]( - lower=0, - upper=None, - default=0, - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'], - ), - 'none': hyperparams.Constant( - default=None, - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'], - ) - }), - default='float', - description='Threshold for binarizing (mapping to booleans) of sample features. If None, input is presumed to already consist of binary vectors.', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] - ) - fit_prior = hyperparams.UniformBool( - default=True, - description='Whether to learn class prior probabilities or not. If false, a uniform prior will be used.', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] - ) - - use_inputs_columns = hyperparams.Set( - elements=hyperparams.Hyperparameter[int](-1), - default=(), - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="A set of column indices to force primitive to use as training input. If any specified column cannot be parsed, it is skipped.", - ) - use_outputs_columns = hyperparams.Set( - elements=hyperparams.Hyperparameter[int](-1), - default=(), - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="A set of column indices to force primitive to use as training target. If any specified column cannot be parsed, it is skipped.", - ) - exclude_inputs_columns = hyperparams.Set( - elements=hyperparams.Hyperparameter[int](-1), - default=(), - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="A set of column indices to not use as training inputs. Applicable only if \"use_columns\" is not provided.", - ) - exclude_outputs_columns = hyperparams.Set( - elements=hyperparams.Hyperparameter[int](-1), - default=(), - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="A set of column indices to not use as training target. Applicable only if \"use_columns\" is not provided.", - ) - return_result = hyperparams.Enumeration( - values=['append', 'replace', 'new'], - default='new', - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="Should parsed columns be appended, should they replace original columns, or should only parsed columns be returned? This hyperparam is ignored if use_semantic_types is set to false.", - ) - use_semantic_types = hyperparams.UniformBool( - default=False, - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="Controls whether semantic_types metadata will be used for filtering columns in input dataframe. Setting this to false makes the code ignore return_result and will produce only the output dataframe" - ) - add_index_columns = hyperparams.UniformBool( - default=False, - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="Also include primary index columns if input data has them. Applicable only if \"return_result\" is set to \"new\".", - ) - error_on_no_input = hyperparams.UniformBool( - default=True, - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="Throw an exception if no input column is selected/provided. Defaults to true to behave like sklearn. To prevent pipelines from breaking set this to False.", - ) - - return_semantic_type = hyperparams.Enumeration[str]( - values=['https://metadata.datadrivendiscovery.org/types/Attribute', 'https://metadata.datadrivendiscovery.org/types/ConstructedAttribute', 'https://metadata.datadrivendiscovery.org/types/PredictedTarget'], - default='https://metadata.datadrivendiscovery.org/types/PredictedTarget', - description='Decides what semantic type to attach to generated output', - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'] - ) - -class SKBernoulliNB(SupervisedLearnerPrimitiveBase[Inputs, Outputs, Params, Hyperparams], - ProbabilisticCompositionalityMixin[Inputs, Outputs, Params, Hyperparams], - ContinueFitMixin[Inputs, Outputs, Params, Hyperparams]): - """ - Primitive wrapping for sklearn BernoulliNB - `sklearn documentation `_ - - """ - - __author__ = "JPL MARVIN" - metadata = metadata_base.PrimitiveMetadata({ - "algorithm_types": [metadata_base.PrimitiveAlgorithmType.NAIVE_BAYES_CLASSIFIER, ], - "name": "sklearn.naive_bayes.BernoulliNB", - "primitive_family": metadata_base.PrimitiveFamily.CLASSIFICATION, - "python_path": "d3m.primitives.classification.bernoulli_naive_bayes.SKlearn", - "source": {'name': 'JPL', 'contact': 'mailto:shah@jpl.nasa.gov', 'uris': ['https://gitlab.com/datadrivendiscovery/sklearn-wrap/issues', 'https://scikit-learn.org/stable/modules/generated/sklearn.naive_bayes.BernoulliNB.html']}, - "version": "2019.11.13", - "id": "dfb1004e-02ac-3399-ba57-8a95639312cd", - "hyperparams_to_tune": ['alpha', 'binarize', 'fit_prior'], - 'installation': [ - {'type': metadata_base.PrimitiveInstallationType.PIP, - 'package_uri': 'git+https://gitlab.com/datadrivendiscovery/sklearn-wrap.git@{git_commit}#egg=sklearn_wrap'.format( - git_commit=utils.current_git_commit(os.path.dirname(__file__)), - ), - }] - }) - - def __init__(self, *, - hyperparams: Hyperparams, - random_seed: int = 0, - docker_containers: Dict[str, DockerContainer] = None) -> None: - - super().__init__(hyperparams=hyperparams, random_seed=random_seed, docker_containers=docker_containers) - - # False - self._clf = BernoulliNB( - alpha=self.hyperparams['alpha'], - binarize=self.hyperparams['binarize'], - fit_prior=self.hyperparams['fit_prior'], - ) - - self._inputs = None - self._outputs = None - self._training_inputs = None - self._training_outputs = None - self._target_names = None - self._training_indices = None - self._target_column_indices = None - self._target_columns_metadata: List[OrderedDict] = None - self._input_column_names = None - self._fitted = False - self._new_training_data = False - - def set_training_data(self, *, inputs: Inputs, outputs: Outputs) -> None: - self._inputs = inputs - self._outputs = outputs - self._fitted = False - self._new_training_data = True - - def fit(self, *, timeout: float = None, iterations: int = None) -> CallResult[None]: - if self._inputs is None or self._outputs is None: - raise ValueError("Missing training data.") - - if not self._new_training_data: - return CallResult(None) - self._new_training_data = False - - self._training_inputs, self._training_indices = self._get_columns_to_fit(self._inputs, self.hyperparams) - self._training_outputs, self._target_names, self._target_column_indices = self._get_targets(self._outputs, self.hyperparams) - self._input_column_names = self._training_inputs.columns - - if len(self._training_indices) > 0 and len(self._target_column_indices) > 0: - self._target_columns_metadata = self._get_target_columns_metadata(self._training_outputs.metadata, self.hyperparams) - sk_training_output = self._training_outputs.values - - shape = sk_training_output.shape - if len(shape) == 2 and shape[1] == 1: - sk_training_output = numpy.ravel(sk_training_output) - - self._clf.fit(self._training_inputs, sk_training_output) - self._fitted = True - else: - if self.hyperparams['error_on_no_input']: - raise RuntimeError("No input columns were selected") - self.logger.warn("No input columns were selected") - - return CallResult(None) - - def continue_fit(self, *, timeout: float = None, iterations: int = None) -> CallResult[None]: - if self._training_inputs is None or self._training_outputs is None: - raise ValueError("Missing training data.") - - if not self._new_training_data: - return CallResult(None) - self._new_training_data = False - - if len(self._training_indices) > 0 and len(self._target_column_indices) > 0: - self._target_columns_metadata = self._get_target_columns_metadata(self._training_outputs.metadata, self.hyperparams) - sk_training_output = self._training_outputs.values - - shape = sk_training_output.shape - if len(shape) == 2 and shape[1] == 1: - sk_training_output = numpy.ravel(sk_training_output) - - self._clf.partial_fit(self._training_inputs, sk_training_output) - self._fitted = True - else: - if self.hyperparams['error_on_no_input']: - raise RuntimeError("No input columns were selected") - self.logger.warn("No input columns were selected") - - return CallResult(None) - - - def produce(self, *, inputs: Inputs, timeout: float = None, iterations: int = None) -> CallResult[Outputs]: - sk_inputs, columns_to_use = self._get_columns_to_fit(inputs, self.hyperparams) - output = [] - if len(sk_inputs.columns): - try: - sk_output = self._clf.predict(sk_inputs) - except sklearn.exceptions.NotFittedError as error: - raise PrimitiveNotFittedError("Primitive not fitted.") from error - # For primitives that allow predicting without fitting like GaussianProcessRegressor - if not self._fitted: - raise PrimitiveNotFittedError("Primitive not fitted.") - if sparse.issparse(sk_output): - sk_output = sk_output.toarray() - output = self._wrap_predictions(inputs, sk_output) - output.columns = self._target_names - output = [output] - else: - if self.hyperparams['error_on_no_input']: - raise RuntimeError("No input columns were selected") - self.logger.warn("No input columns were selected") - outputs = base_utils.combine_columns(return_result=self.hyperparams['return_result'], - add_index_columns=self.hyperparams['add_index_columns'], - inputs=inputs, column_indices=self._target_column_indices, - columns_list=output) - - return CallResult(outputs) - - - def get_params(self) -> Params: - if not self._fitted: - return Params( - class_log_prior_=None, - feature_log_prob_=None, - class_count_=None, - feature_count_=None, - classes_=None, - input_column_names=self._input_column_names, - training_indices_=self._training_indices, - target_names_=self._target_names, - target_column_indices_=self._target_column_indices, - target_columns_metadata_=self._target_columns_metadata - ) - - return Params( - class_log_prior_=getattr(self._clf, 'class_log_prior_', None), - feature_log_prob_=getattr(self._clf, 'feature_log_prob_', None), - class_count_=getattr(self._clf, 'class_count_', None), - feature_count_=getattr(self._clf, 'feature_count_', None), - classes_=getattr(self._clf, 'classes_', None), - input_column_names=self._input_column_names, - training_indices_=self._training_indices, - target_names_=self._target_names, - target_column_indices_=self._target_column_indices, - target_columns_metadata_=self._target_columns_metadata - ) - - def set_params(self, *, params: Params) -> None: - self._clf.class_log_prior_ = params['class_log_prior_'] - self._clf.feature_log_prob_ = params['feature_log_prob_'] - self._clf.class_count_ = params['class_count_'] - self._clf.feature_count_ = params['feature_count_'] - self._clf.classes_ = params['classes_'] - self._input_column_names = params['input_column_names'] - self._training_indices = params['training_indices_'] - self._target_names = params['target_names_'] - self._target_column_indices = params['target_column_indices_'] - self._target_columns_metadata = params['target_columns_metadata_'] - - if params['class_log_prior_'] is not None: - self._fitted = True - if params['feature_log_prob_'] is not None: - self._fitted = True - if params['class_count_'] is not None: - self._fitted = True - if params['feature_count_'] is not None: - self._fitted = True - if params['classes_'] is not None: - self._fitted = True - - - def log_likelihoods(self, *, - outputs: Outputs, - inputs: Inputs, - timeout: float = None, - iterations: int = None) -> CallResult[Sequence[float]]: - inputs = inputs.iloc[:, self._training_indices] # Get ndarray - outputs = outputs.iloc[:, self._target_column_indices] - - if len(inputs.columns) and len(outputs.columns): - - if outputs.shape[1] != self._clf.n_outputs_: - raise exceptions.InvalidArgumentValueError("\"outputs\" argument does not have the correct number of target columns.") - - log_proba = self._clf.predict_log_proba(inputs) - - # Making it always a list, even when only one target. - if self._clf.n_outputs_ == 1: - log_proba = [log_proba] - classes = [self._clf.classes_] - else: - classes = self._clf.classes_ - - samples_length = inputs.shape[0] - - log_likelihoods = [] - for k in range(self._clf.n_outputs_): - # We have to map each class to its internal (numerical) index used in the learner. - # This allows "outputs" to contain string classes. - outputs_column = outputs.iloc[:, k] - classes_map = pandas.Series(numpy.arange(len(classes[k])), index=classes[k]) - mapped_outputs_column = outputs_column.map(classes_map) - - # For each target column (column in "outputs"), for each sample (row) we pick the log - # likelihood for a given class. - log_likelihoods.append(log_proba[k][numpy.arange(samples_length), mapped_outputs_column]) - - results = d3m_dataframe(dict(enumerate(log_likelihoods)), generate_metadata=True) - results.columns = outputs.columns - - for k in range(self._clf.n_outputs_): - column_metadata = outputs.metadata.query_column(k) - if 'name' in column_metadata: - results.metadata = results.metadata.update_column(k, {'name': column_metadata['name']}) - - else: - results = d3m_dataframe(generate_metadata=True) - - return CallResult(results) - - - - - - @classmethod - def _get_columns_to_fit(cls, inputs: Inputs, hyperparams: Hyperparams): - if not hyperparams['use_semantic_types']: - return inputs, list(range(len(inputs.columns))) - - inputs_metadata = inputs.metadata - - def can_produce_column(column_index: int) -> bool: - return cls._can_produce_column(inputs_metadata, column_index, hyperparams) - - columns_to_produce, columns_not_to_produce = base_utils.get_columns_to_use(inputs_metadata, - use_columns=hyperparams['use_inputs_columns'], - exclude_columns=hyperparams['exclude_inputs_columns'], - can_use_column=can_produce_column) - return inputs.iloc[:, columns_to_produce], columns_to_produce - # return columns_to_produce - - @classmethod - def _can_produce_column(cls, inputs_metadata: metadata_base.DataMetadata, column_index: int, hyperparams: Hyperparams) -> bool: - column_metadata = inputs_metadata.query((metadata_base.ALL_ELEMENTS, column_index)) - - accepted_structural_types = (int, float, numpy.integer, numpy.float64) - accepted_semantic_types = set() - accepted_semantic_types.add("https://metadata.datadrivendiscovery.org/types/Attribute") - if not issubclass(column_metadata['structural_type'], accepted_structural_types): - return False - - semantic_types = set(column_metadata.get('semantic_types', [])) - - if len(semantic_types) == 0: - cls.logger.warning("No semantic types found in column metadata") - return False - # Making sure all accepted_semantic_types are available in semantic_types - if len(accepted_semantic_types - semantic_types) == 0: - return True - - return False - - @classmethod - def _get_targets(cls, data: d3m_dataframe, hyperparams: Hyperparams): - if not hyperparams['use_semantic_types']: - return data, list(data.columns), list(range(len(data.columns))) - - metadata = data.metadata - - def can_produce_column(column_index: int) -> bool: - accepted_semantic_types = set() - accepted_semantic_types.add("https://metadata.datadrivendiscovery.org/types/TrueTarget") - column_metadata = metadata.query((metadata_base.ALL_ELEMENTS, column_index)) - semantic_types = set(column_metadata.get('semantic_types', [])) - if len(semantic_types) == 0: - cls.logger.warning("No semantic types found in column metadata") - return False - # Making sure all accepted_semantic_types are available in semantic_types - if len(accepted_semantic_types - semantic_types) == 0: - return True - return False - - target_column_indices, target_columns_not_to_produce = base_utils.get_columns_to_use(metadata, - use_columns=hyperparams[ - 'use_outputs_columns'], - exclude_columns= - hyperparams[ - 'exclude_outputs_columns'], - can_use_column=can_produce_column) - targets = [] - if target_column_indices: - targets = data.select_columns(target_column_indices) - target_column_names = [] - for idx in target_column_indices: - target_column_names.append(data.columns[idx]) - return targets, target_column_names, target_column_indices - - @classmethod - def _get_target_columns_metadata(cls, outputs_metadata: metadata_base.DataMetadata, hyperparams) -> List[OrderedDict]: - outputs_length = outputs_metadata.query((metadata_base.ALL_ELEMENTS,))['dimension']['length'] - - target_columns_metadata: List[OrderedDict] = [] - for column_index in range(outputs_length): - column_metadata = OrderedDict(outputs_metadata.query_column(column_index)) - - # Update semantic types and prepare it for predicted targets. - semantic_types = set(column_metadata.get('semantic_types', [])) - semantic_types_to_remove = set(["https://metadata.datadrivendiscovery.org/types/TrueTarget","https://metadata.datadrivendiscovery.org/types/SuggestedTarget",]) - add_semantic_types = set(["https://metadata.datadrivendiscovery.org/types/PredictedTarget",]) - add_semantic_types.add(hyperparams["return_semantic_type"]) - semantic_types = semantic_types - semantic_types_to_remove - semantic_types = semantic_types.union(add_semantic_types) - column_metadata['semantic_types'] = list(semantic_types) - - target_columns_metadata.append(column_metadata) - - return target_columns_metadata - - @classmethod - def _update_predictions_metadata(cls, inputs_metadata: metadata_base.DataMetadata, outputs: Optional[Outputs], - target_columns_metadata: List[OrderedDict]) -> metadata_base.DataMetadata: - outputs_metadata = metadata_base.DataMetadata().generate(value=outputs) - - for column_index, column_metadata in enumerate(target_columns_metadata): - column_metadata.pop("structural_type", None) - outputs_metadata = outputs_metadata.update_column(column_index, column_metadata) - - return outputs_metadata - - def _wrap_predictions(self, inputs: Inputs, predictions: ndarray) -> Outputs: - outputs = d3m_dataframe(predictions, generate_metadata=False) - outputs.metadata = self._update_predictions_metadata(inputs.metadata, outputs, self._target_columns_metadata) - return outputs - - - @classmethod - def _add_target_columns_metadata(cls, outputs_metadata: metadata_base.DataMetadata): - outputs_length = outputs_metadata.query((metadata_base.ALL_ELEMENTS,))['dimension']['length'] - - target_columns_metadata: List[OrderedDict] = [] - for column_index in range(outputs_length): - column_metadata = OrderedDict() - semantic_types = [] - semantic_types.append('https://metadata.datadrivendiscovery.org/types/PredictedTarget') - column_name = outputs_metadata.query((metadata_base.ALL_ELEMENTS, column_index)).get("name") - if column_name is None: - column_name = "output_{}".format(column_index) - column_metadata["semantic_types"] = semantic_types - column_metadata["name"] = str(column_name) - target_columns_metadata.append(column_metadata) - - return target_columns_metadata - - -SKBernoulliNB.__doc__ = BernoulliNB.__doc__ \ No newline at end of file diff --git a/common-primitives/sklearn-wrap/sklearn_wrap/SKBinarizer.py b/common-primitives/sklearn-wrap/sklearn_wrap/SKBinarizer.py deleted file mode 100644 index 7d1166e..0000000 --- a/common-primitives/sklearn-wrap/sklearn_wrap/SKBinarizer.py +++ /dev/null @@ -1,330 +0,0 @@ -from typing import Any, Callable, List, Dict, Union, Optional, Sequence, Tuple -from numpy import ndarray -from collections import OrderedDict -from scipy import sparse -import os -import sklearn -import numpy -import typing - -# Custom import commands if any -from sklearn.preprocessing.data import Binarizer - - -from d3m.container.numpy import ndarray as d3m_ndarray -from d3m.container import DataFrame as d3m_dataframe -from d3m.metadata import hyperparams, params, base as metadata_base -from d3m import utils -from d3m.base import utils as base_utils -from d3m.exceptions import PrimitiveNotFittedError -from d3m.primitive_interfaces.base import CallResult, DockerContainer -from d3m.primitive_interfaces.unsupervised_learning import UnsupervisedLearnerPrimitiveBase - - -Inputs = d3m_dataframe -Outputs = d3m_dataframe - - -class Params(params.Params): - input_column_names: Optional[Any] - target_names_: Optional[Sequence[Any]] - training_indices_: Optional[Sequence[int]] - target_column_indices_: Optional[Sequence[int]] - target_columns_metadata_: Optional[List[OrderedDict]] - - - -class Hyperparams(hyperparams.Hyperparams): - threshold = hyperparams.Bounded[float]( - default=0.0, - lower=0.0, - upper=None, - description='Feature values below or equal to this are replaced by 0, above it by 1. Threshold may not be less than 0 for operations on sparse matrices.', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] - ) - - use_columns = hyperparams.Set( - elements=hyperparams.Hyperparameter[int](-1), - default=(), - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="A set of column indices to force primitive to operate on. If any specified column cannot be parsed, it is skipped.", - ) - exclude_columns = hyperparams.Set( - elements=hyperparams.Hyperparameter[int](-1), - default=(), - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="A set of column indices to not operate on. Applicable only if \"use_columns\" is not provided.", - ) - return_result = hyperparams.Enumeration( - values=['append', 'replace', 'new'], - default='new', - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="Should parsed columns be appended, should they replace original columns, or should only parsed columns be returned? This hyperparam is ignored if use_semantic_types is set to false.", - ) - use_semantic_types = hyperparams.UniformBool( - default=False, - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="Controls whether semantic_types metadata will be used for filtering columns in input dataframe. Setting this to false makes the code ignore return_result and will produce only the output dataframe" - ) - add_index_columns = hyperparams.UniformBool( - default=False, - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="Also include primary index columns if input data has them. Applicable only if \"return_result\" is set to \"new\".", - ) - error_on_no_input = hyperparams.UniformBool( - default=True, - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="Throw an exception if no input column is selected/provided. Defaults to true to behave like sklearn. To prevent pipelines from breaking set this to False.", - ) - - return_semantic_type = hyperparams.Enumeration[str]( - values=['https://metadata.datadrivendiscovery.org/types/Attribute', 'https://metadata.datadrivendiscovery.org/types/ConstructedAttribute'], - default='https://metadata.datadrivendiscovery.org/types/Attribute', - description='Decides what semantic type to attach to generated attributes', - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'] - ) - -class SKBinarizer(UnsupervisedLearnerPrimitiveBase[Inputs, Outputs, Params, Hyperparams]): - """ - Primitive wrapping for sklearn Binarizer - `sklearn documentation `_ - - """ - - __author__ = "JPL MARVIN" - metadata = metadata_base.PrimitiveMetadata({ - "algorithm_types": [metadata_base.PrimitiveAlgorithmType.FEATURE_SCALING, ], - "name": "sklearn.preprocessing.data.Binarizer", - "primitive_family": metadata_base.PrimitiveFamily.DATA_PREPROCESSING, - "python_path": "d3m.primitives.data_preprocessing.binarizer.SKlearn", - "source": {'name': 'JPL', 'contact': 'mailto:shah@jpl.nasa.gov', 'uris': ['https://gitlab.com/datadrivendiscovery/sklearn-wrap/issues', 'https://scikit-learn.org/stable/modules/generated/sklearn.preprocessing.Binarizer.html']}, - "version": "2019.11.13", - "id": "13777068-9dc0-3c5b-b4da-99350d67ee3f", - "hyperparams_to_tune": ['threshold'], - 'installation': [ - {'type': metadata_base.PrimitiveInstallationType.PIP, - 'package_uri': 'git+https://gitlab.com/datadrivendiscovery/sklearn-wrap.git@{git_commit}#egg=sklearn_wrap'.format( - git_commit=utils.current_git_commit(os.path.dirname(__file__)), - ), - }] - }) - - def __init__(self, *, - hyperparams: Hyperparams, - random_seed: int = 0, - docker_containers: Dict[str, DockerContainer] = None) -> None: - - super().__init__(hyperparams=hyperparams, random_seed=random_seed, docker_containers=docker_containers) - - # False - self._clf = Binarizer( - threshold=self.hyperparams['threshold'], - ) - - self._inputs = None - self._outputs = None - self._training_inputs = None - self._training_outputs = None - self._target_names = None - self._training_indices = None - self._target_column_indices = None - self._target_columns_metadata: List[OrderedDict] = None - self._input_column_names = None - self._fitted = False - - - def set_training_data(self, *, inputs: Inputs) -> None: - self._inputs = inputs - self._fitted = False - - def fit(self, *, timeout: float = None, iterations: int = None) -> CallResult[None]: - if self._fitted: - return CallResult(None) - - self._training_inputs, self._training_indices = self._get_columns_to_fit(self._inputs, self.hyperparams) - self._input_column_names = self._training_inputs.columns - - if self._training_inputs is None: - return CallResult(None) - - if len(self._training_indices) > 0: - self._clf.fit(self._training_inputs) - self._fitted = True - else: - if self.hyperparams['error_on_no_input']: - raise RuntimeError("No input columns were selected") - self.logger.warn("No input columns were selected") - return CallResult(None) - - def produce(self, *, inputs: Inputs, timeout: float = None, iterations: int = None) -> CallResult[Outputs]: - if not self._fitted: - raise PrimitiveNotFittedError("Primitive not fitted.") - sk_inputs = inputs - if self.hyperparams['use_semantic_types']: - sk_inputs = inputs.iloc[:, self._training_indices] - output_columns = [] - if len(self._training_indices) > 0: - sk_output = self._clf.transform(sk_inputs) - if sparse.issparse(sk_output): - sk_output = sk_output.toarray() - outputs = self._wrap_predictions(inputs, sk_output) - if len(outputs.columns) == len(self._input_column_names): - outputs.columns = self._input_column_names - output_columns = [outputs] - else: - if self.hyperparams['error_on_no_input']: - raise RuntimeError("No input columns were selected") - self.logger.warn("No input columns were selected") - outputs = base_utils.combine_columns(return_result=self.hyperparams['return_result'], - add_index_columns=self.hyperparams['add_index_columns'], - inputs=inputs, column_indices=self._training_indices, - columns_list=output_columns) - return CallResult(outputs) - - - def get_params(self) -> Params: - if not self._fitted: - return Params( - input_column_names=self._input_column_names, - training_indices_=self._training_indices, - target_names_=self._target_names, - target_column_indices_=self._target_column_indices, - target_columns_metadata_=self._target_columns_metadata - ) - - return Params( - input_column_names=self._input_column_names, - training_indices_=self._training_indices, - target_names_=self._target_names, - target_column_indices_=self._target_column_indices, - target_columns_metadata_=self._target_columns_metadata - ) - - def set_params(self, *, params: Params) -> None: - self._input_column_names = params['input_column_names'] - self._training_indices = params['training_indices_'] - self._target_names = params['target_names_'] - self._target_column_indices = params['target_column_indices_'] - self._target_columns_metadata = params['target_columns_metadata_'] - self._fitted = True - - - - - - @classmethod - def _get_columns_to_fit(cls, inputs: Inputs, hyperparams: Hyperparams): - if not hyperparams['use_semantic_types']: - return inputs, list(range(len(inputs.columns))) - - inputs_metadata = inputs.metadata - - def can_produce_column(column_index: int) -> bool: - return cls._can_produce_column(inputs_metadata, column_index, hyperparams) - - columns_to_produce, columns_not_to_produce = base_utils.get_columns_to_use(inputs_metadata, - use_columns=hyperparams['use_columns'], - exclude_columns=hyperparams['exclude_columns'], - can_use_column=can_produce_column) - return inputs.iloc[:, columns_to_produce], columns_to_produce - # return columns_to_produce - - @classmethod - def _can_produce_column(cls, inputs_metadata: metadata_base.DataMetadata, column_index: int, hyperparams: Hyperparams) -> bool: - column_metadata = inputs_metadata.query((metadata_base.ALL_ELEMENTS, column_index)) - - accepted_structural_types = (int, float, numpy.integer, numpy.float64) - accepted_semantic_types = set() - accepted_semantic_types.add("https://metadata.datadrivendiscovery.org/types/Attribute") - if not issubclass(column_metadata['structural_type'], accepted_structural_types): - return False - - semantic_types = set(column_metadata.get('semantic_types', [])) - - if len(semantic_types) == 0: - cls.logger.warning("No semantic types found in column metadata") - return False - - # Making sure all accepted_semantic_types are available in semantic_types - if len(accepted_semantic_types - semantic_types) == 0: - return True - - return False - - - @classmethod - def _get_target_columns_metadata(cls, outputs_metadata: metadata_base.DataMetadata, hyperparams) -> List[OrderedDict]: - outputs_length = outputs_metadata.query((metadata_base.ALL_ELEMENTS,))['dimension']['length'] - - target_columns_metadata: List[OrderedDict] = [] - for column_index in range(outputs_length): - column_metadata = OrderedDict(outputs_metadata.query_column(column_index)) - - # Update semantic types and prepare it for predicted targets. - semantic_types = set(column_metadata.get('semantic_types', [])) - semantic_types_to_remove = set([]) - add_semantic_types = [] - add_semantic_types.add(hyperparams["return_semantic_type"]) - semantic_types = semantic_types - semantic_types_to_remove - semantic_types = semantic_types.union(add_semantic_types) - column_metadata['semantic_types'] = list(semantic_types) - - target_columns_metadata.append(column_metadata) - - return target_columns_metadata - - @classmethod - def _update_predictions_metadata(cls, inputs_metadata: metadata_base.DataMetadata, outputs: Optional[Outputs], - target_columns_metadata: List[OrderedDict]) -> metadata_base.DataMetadata: - outputs_metadata = metadata_base.DataMetadata().generate(value=outputs) - - for column_index, column_metadata in enumerate(target_columns_metadata): - column_metadata.pop("structural_type", None) - outputs_metadata = outputs_metadata.update_column(column_index, column_metadata) - - return outputs_metadata - - def _wrap_predictions(self, inputs: Inputs, predictions: ndarray) -> Outputs: - outputs = d3m_dataframe(predictions, generate_metadata=True) - target_columns_metadata = self._copy_inputs_metadata(inputs.metadata, self._training_indices, outputs.metadata, self.hyperparams) - outputs.metadata = self._update_predictions_metadata(inputs.metadata, outputs, target_columns_metadata) - return outputs - - - @classmethod - def _copy_inputs_metadata(cls, inputs_metadata: metadata_base.DataMetadata, input_indices: List[int], - outputs_metadata: metadata_base.DataMetadata, hyperparams): - outputs_length = outputs_metadata.query((metadata_base.ALL_ELEMENTS,))['dimension']['length'] - target_columns_metadata: List[OrderedDict] = [] - for column_index in input_indices: - column_name = inputs_metadata.query((metadata_base.ALL_ELEMENTS, column_index)).get("name") - if column_name is None: - column_name = "output_{}".format(column_index) - - column_metadata = OrderedDict(inputs_metadata.query_column(column_index)) - semantic_types = set(column_metadata.get('semantic_types', [])) - semantic_types_to_remove = set([]) - add_semantic_types = set() - add_semantic_types.add(hyperparams["return_semantic_type"]) - semantic_types = semantic_types - semantic_types_to_remove - semantic_types = semantic_types.union(add_semantic_types) - column_metadata['semantic_types'] = list(semantic_types) - - column_metadata["name"] = str(column_name) - target_columns_metadata.append(column_metadata) - - # If outputs has more columns than index, add Attribute Type to all remaining - if outputs_length > len(input_indices): - for column_index in range(len(input_indices), outputs_length): - column_metadata = OrderedDict() - semantic_types = set() - semantic_types.add(hyperparams["return_semantic_type"]) - column_name = "output_{}".format(column_index) - column_metadata["semantic_types"] = list(semantic_types) - column_metadata["name"] = str(column_name) - target_columns_metadata.append(column_metadata) - - return target_columns_metadata - - -SKBinarizer.__doc__ = Binarizer.__doc__ \ No newline at end of file diff --git a/common-primitives/sklearn-wrap/sklearn_wrap/SKCountVectorizer.py b/common-primitives/sklearn-wrap/sklearn_wrap/SKCountVectorizer.py deleted file mode 100644 index 264c92f..0000000 --- a/common-primitives/sklearn-wrap/sklearn_wrap/SKCountVectorizer.py +++ /dev/null @@ -1,490 +0,0 @@ -from typing import Any, Callable, List, Dict, Union, Optional, Sequence, Tuple -from numpy import ndarray -from collections import OrderedDict -from scipy import sparse -import os -import sklearn -import numpy -import typing - -# Custom import commands if any -from sklearn.feature_extraction.text import CountVectorizer - - -from d3m.container.numpy import ndarray as d3m_ndarray -from d3m.container import DataFrame as d3m_dataframe -from d3m.metadata import hyperparams, params, base as metadata_base -from d3m import utils -from d3m.base import utils as base_utils -from d3m.exceptions import PrimitiveNotFittedError -from d3m.primitive_interfaces.base import CallResult, DockerContainer -from d3m.primitive_interfaces.unsupervised_learning import UnsupervisedLearnerPrimitiveBase -from d3m.metadata.base import ALL_ELEMENTS - - -Inputs = d3m_dataframe -Outputs = d3m_dataframe - - -class Params(params.Params): - vocabulary_: Optional[Sequence[dict]] - stop_words_: Optional[Sequence[set]] - fixed_vocabulary_: Optional[Sequence[bool]] - _stop_words_id: Optional[Sequence[int]] - target_names_: Optional[Sequence[Any]] - training_indices_: Optional[Sequence[int]] - - -class Hyperparams(hyperparams.Hyperparams): - strip_accents = hyperparams.Union( - configuration=OrderedDict({ - 'accents': hyperparams.Enumeration[str]( - default='ascii', - values=['ascii', 'unicode'], - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'], - ), - 'none': hyperparams.Constant( - default=None, - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'], - ) - }), - default='none', - description='Remove accents during the preprocessing step. \'ascii\' is a fast method that only works on characters that have an direct ASCII mapping. \'unicode\' is a slightly slower method that works on any characters. None (default) does nothing.', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] - ) - analyzer = hyperparams.Enumeration[str]( - default='word', - values=['word', 'char', 'char_wb'], - description='Whether the feature should be made of word or character n-grams. Option \'char_wb\' creates character n-grams only from text inside word boundaries; n-grams at the edges of words are padded with space. If a callable is passed it is used to extract the sequence of features out of the raw, unprocessed input.', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] - ) - ngram_range = hyperparams.SortedList( - elements=hyperparams.Bounded[int](1, None, 1), - default=(1, 1), - min_size=2, - max_size=2, - description='The lower and upper boundary of the range of n-values for different n-grams to be extracted. All values of n such that min_n <= n <= max_n will be used.', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] - ) - stop_words = hyperparams.Union( - configuration=OrderedDict({ - 'string': hyperparams.Hyperparameter[str]( - default='english', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'], - ), - 'list': hyperparams.List( - elements=hyperparams.Hyperparameter[str](''), - default=[], - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'], - ), - 'none': hyperparams.Constant( - default=None, - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'], - ) - }), - default='none', - description='If \'english\', a built-in stop word list for English is used. If a list, that list is assumed to contain stop words, all of which will be removed from the resulting tokens. Only applies if ``analyzer == \'word\'``. If None, no stop words will be used. max_df can be set to a value in the range [0.7, 1.0) to automatically detect and filter stop words based on intra corpus document frequency of terms.', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] - ) - lowercase = hyperparams.UniformBool( - default=True, - description='Convert all characters to lowercase before tokenizing.', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] - ) - token_pattern = hyperparams.Hyperparameter[str]( - default='(?u)\\b\w\w+\\b', - description='Regular expression denoting what constitutes a "token", only used if ``analyzer == \'word\'``. The default regexp select tokens of 2 or more alphanumeric characters (punctuation is completely ignored and always treated as a token separator).', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] - ) - max_df = hyperparams.Union( - configuration=OrderedDict({ - 'proportion': hyperparams.Bounded[float]( - default=1.0, - lower=0.0, - upper=1.0, - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'], - ), - 'absolute': hyperparams.Bounded[int]( - default=1, - lower=0, - upper=None, - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'], - ) - }), - default='proportion', - description='When building the vocabulary ignore terms that have a document frequency strictly higher than the given threshold (corpus-specific stop words). If float, the parameter represents a proportion of documents, integer absolute counts. This parameter is ignored if vocabulary is not None.', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] - ) - min_df = hyperparams.Union( - configuration=OrderedDict({ - 'proportion': hyperparams.Bounded[float]( - default=1.0, - lower=0.0, - upper=1.0, - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'], - ), - 'absolute': hyperparams.Bounded[int]( - default=1, - lower=0, - upper=None, - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'], - ) - }), - default='absolute', - description='When building the vocabulary ignore terms that have a document frequency strictly lower than the given threshold. This value is also called cut-off in the literature. If float, the parameter represents a proportion of documents, integer absolute counts. This parameter is ignored if vocabulary is not None.', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] - ) - max_features = hyperparams.Union( - configuration=OrderedDict({ - 'absolute': hyperparams.Bounded[int]( - default=1, - lower=0, - upper=None, - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'], - ), - 'none': hyperparams.Constant( - default=None, - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'], - ) - }), - default='none', - description='If not None, build a vocabulary that only consider the top max_features ordered by term frequency across the corpus. This parameter is ignored if vocabulary is not None.', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] - ) - binary = hyperparams.UniformBool( - default=False, - description='If True, all non zero counts are set to 1. This is useful for discrete probabilistic models that model binary events rather than integer counts.', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] - ) - - use_columns = hyperparams.Set( - elements=hyperparams.Hyperparameter[int](-1), - default=(), - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="A set of column indices to force primitive to operate on. If any specified column cannot be parsed, it is skipped.", - ) - exclude_columns = hyperparams.Set( - elements=hyperparams.Hyperparameter[int](-1), - default=(), - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="A set of column indices to not operate on. Applicable only if \"use_columns\" is not provided.", - ) - return_result = hyperparams.Enumeration( - values=['append', 'replace', 'new'], - default='new', - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="Should parsed columns be appended, should they replace original columns, or should only parsed columns be returned? This hyperparam is ignored if use_semantic_types is set to false.", - ) - use_semantic_types = hyperparams.UniformBool( - default=False, - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="Controls whether semantic_types metadata will be used for filtering columns in input dataframe. Setting this to false makes the code ignore return_result and will produce only the output dataframe" - ) - add_index_columns = hyperparams.UniformBool( - default=False, - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="Also include primary index columns if input data has them. Applicable only if \"return_result\" is set to \"new\".", - ) - error_on_no_input = hyperparams.UniformBool( - default=True, - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="Throw an exception if no input column is selected/provided. Defaults to true to behave like sklearn. To prevent pipelines from breaking set this to False.", - ) - - - -class SKCountVectorizer(UnsupervisedLearnerPrimitiveBase[Inputs, Outputs, Params, Hyperparams]): - """ - Primitive wrapping for sklearn CountVectorizer - `sklearn documentation `_ - - """ - - __author__ = "JPL MARVIN" - metadata = metadata_base.PrimitiveMetadata({ - "algorithm_types": [metadata_base.PrimitiveAlgorithmType.MINIMUM_REDUNDANCY_FEATURE_SELECTION, ], - "name": "sklearn.feature_extraction.text.CountVectorizer", - "primitive_family": metadata_base.PrimitiveFamily.DATA_PREPROCESSING, - "python_path": "d3m.primitives.data_preprocessing.count_vectorizer.SKlearn", - "source": {'name': 'JPL', 'contact': 'mailto:shah@jpl.nasa.gov', 'uris': ['https://gitlab.com/datadrivendiscovery/sklearn-wrap/issues', 'https://scikit-learn.org/stable/modules/generated/sklearn.feature_extraction.CountVectorizer.html']}, - "version": "2019.11.13", - "id": "0609859b-8ed9-397f-ac7a-7c4f63863560", - "hyperparams_to_tune": ['max_df', 'min_df'], - 'installation': [ - {'type': metadata_base.PrimitiveInstallationType.PIP, - 'package_uri': 'git+https://gitlab.com/datadrivendiscovery/sklearn-wrap.git@{git_commit}#egg=sklearn_wrap'.format( - git_commit=utils.current_git_commit(os.path.dirname(__file__)), - ), - }] - }) - - def __init__(self, *, - hyperparams: Hyperparams, - random_seed: int = 0, - docker_containers: Dict[str, DockerContainer] = None) -> None: - - super().__init__(hyperparams=hyperparams, random_seed=random_seed, docker_containers=docker_containers) - - # True - - self._clf = list() - - self._training_inputs = None - self._target_names = None - self._training_indices = None - self._fitted = False - - - def set_training_data(self, *, inputs: Inputs) -> None: - self._inputs = inputs - self._fitted = False - - def fit(self, *, timeout: float = None, iterations: int = None) -> CallResult[None]: - if self._fitted: - return CallResult(None) - - self._training_inputs, self._training_indices = self._get_columns_to_fit(self._inputs, self.hyperparams) - - if self._training_inputs is None: - raise ValueError("Missing training data.") - - if len(self._training_indices) > 0: - for column_index in range(len(self._training_inputs.columns)): - clf = self._create_new_sklearn_estimator() - clf.fit(self._training_inputs.iloc[:, column_index]) - self._clf.append(clf) - - self._fitted = True - else: - if self.hyperparams['error_on_no_input']: - raise RuntimeError("No input columns were selected") - self.logger.warn("No input columns were selected") - - return CallResult(None) - - def produce(self, *, inputs: Inputs, timeout: float = None, iterations: int = None) -> CallResult[Outputs]: - if not self._fitted: - raise PrimitiveNotFittedError("Primitive not fitted.") - sk_inputs = inputs - if self.hyperparams['use_semantic_types']: - sk_inputs, training_indices = self._get_columns_to_fit(inputs, self.hyperparams) - else: - training_indices = list(range(len(inputs))) - - # Iterating over all estimators and call transform on them. - # No. of estimators should be equal to the number of columns in the input - if len(self._clf) != len(sk_inputs.columns): - raise RuntimeError("Input data does not have the same number of columns as training data") - outputs = [] - if len(self._training_indices) > 0: - for column_index in range(len(sk_inputs.columns)): - clf = self._clf[column_index] - output = clf.transform(sk_inputs.iloc[:, column_index]) - column_name = sk_inputs.columns[column_index] - - if sparse.issparse(output): - output = output.toarray() - output = self._wrap_predictions(inputs, output) - - # Updating column names. - output.columns = map(lambda x: "{}_{}".format(column_name, x), clf.get_feature_names()) - for i, name in enumerate(clf.get_feature_names()): - output.metadata = output.metadata.update((ALL_ELEMENTS, i), {'name': name}) - - outputs.append(output) - else: - if self.hyperparams['error_on_no_input']: - raise RuntimeError("No input columns were selected") - self.logger.warn("No input columns were selected") - - outputs = base_utils.combine_columns(return_result=self.hyperparams['return_result'], - add_index_columns=self.hyperparams['add_index_columns'], - inputs=inputs, column_indices=self._training_indices, - columns_list=outputs) - - return CallResult(outputs) - - - def get_params(self) -> Params: - if not self._fitted: - return Params( - vocabulary_=None, - stop_words_=None, - fixed_vocabulary_=None, - _stop_words_id=None, - training_indices_=self._training_indices, - target_names_=self._target_names - ) - - return Params( - vocabulary_=list(map(lambda clf: getattr(clf, 'vocabulary_', None), self._clf)), - stop_words_=list(map(lambda clf: getattr(clf, 'stop_words_', None), self._clf)), - fixed_vocabulary_=list(map(lambda clf: getattr(clf, 'fixed_vocabulary_', None), self._clf)), - _stop_words_id=list(map(lambda clf: getattr(clf, '_stop_words_id', None), self._clf)), - training_indices_=self._training_indices, - target_names_=self._target_names - ) - - def set_params(self, *, params: Params) -> None: - for param, val in params.items(): - if val is not None and param not in ['target_names_', 'training_indices_']: - self._clf = list(map(lambda x: self._create_new_sklearn_estimator(), val)) - break - for index in range(len(self._clf)): - for param, val in params.items(): - if val is not None: - setattr(self._clf[index], param, val[index]) - else: - setattr(self._clf[index], param, None) - self._training_indices = params['training_indices_'] - self._target_names = params['target_names_'] - self._fitted = False - - if params['vocabulary_'] is not None: - self._fitted = True - if params['stop_words_'] is not None: - self._fitted = True - if params['fixed_vocabulary_'] is not None: - self._fitted = True - if params['_stop_words_id'] is not None: - self._fitted = True - - def _create_new_sklearn_estimator(self): - clf = CountVectorizer( - strip_accents=self.hyperparams['strip_accents'], - analyzer=self.hyperparams['analyzer'], - ngram_range=self.hyperparams['ngram_range'], - stop_words=self.hyperparams['stop_words'], - lowercase=self.hyperparams['lowercase'], - token_pattern=self.hyperparams['token_pattern'], - max_df=self.hyperparams['max_df'], - min_df=self.hyperparams['min_df'], - max_features=self.hyperparams['max_features'], - binary=self.hyperparams['binary'], - ) - return clf - - - - - - - @classmethod - def _get_columns_to_fit(cls, inputs: Inputs, hyperparams: Hyperparams): - if not hyperparams['use_semantic_types']: - return inputs, list(range(len(inputs.columns))) - - inputs_metadata = inputs.metadata - - def can_produce_column(column_index: int) -> bool: - return cls._can_produce_column(inputs_metadata, column_index, hyperparams) - - columns_to_produce, columns_not_to_produce = base_utils.get_columns_to_use(inputs_metadata, - use_columns=hyperparams['use_columns'], - exclude_columns=hyperparams['exclude_columns'], - can_use_column=can_produce_column) - return inputs.iloc[:, columns_to_produce], columns_to_produce - # return columns_to_produce - - @classmethod - def _can_produce_column(cls, inputs_metadata: metadata_base.DataMetadata, column_index: int, hyperparams: Hyperparams) -> bool: - column_metadata = inputs_metadata.query((metadata_base.ALL_ELEMENTS, column_index)) - - accepted_structural_types = (str,) - accepted_semantic_types = set(["http://schema.org/Text",]) - accepted_semantic_types.add("https://metadata.datadrivendiscovery.org/types/Attribute") - if not issubclass(column_metadata['structural_type'], accepted_structural_types): - return False - - semantic_types = set(column_metadata.get('semantic_types', [])) - - if len(semantic_types) == 0: - cls.logger.warning("No semantic types found in column metadata") - return False - - # Making sure all accepted_semantic_types are available in semantic_types - if len(accepted_semantic_types - semantic_types) == 0: - return True - - return False - - @classmethod - def _get_targets(cls, data: d3m_dataframe, hyperparams: Hyperparams): - if not hyperparams['use_semantic_types']: - return data, list(data.columns), [] - target_names = [] - target_semantic_type = [] - target_column_indices = [] - metadata = data.metadata - target_column_indices.extend(metadata.get_columns_with_semantic_type('https://metadata.datadrivendiscovery.org/types/TrueTarget')) - - for column_index in target_column_indices: - if column_index is metadata_base.ALL_ELEMENTS: - continue - column_index = typing.cast(metadata_base.SimpleSelectorSegment, column_index) - column_metadata = metadata.query((metadata_base.ALL_ELEMENTS, column_index)) - target_names.append(column_metadata.get('name', str(column_index))) - target_semantic_type.append(column_metadata.get('semantic_types', [])) - - targets = data.iloc[:, target_column_indices] - return targets, target_names, target_semantic_type - - @classmethod - def _get_target_columns_metadata(cls, outputs_metadata: metadata_base.DataMetadata, hyperparams) -> List[OrderedDict]: - outputs_length = outputs_metadata.query((metadata_base.ALL_ELEMENTS,))['dimension']['length'] - - target_columns_metadata: List[OrderedDict] = [] - for column_index in range(outputs_length): - column_metadata = OrderedDict(outputs_metadata.query_column(column_index)) - - # Update semantic types and prepare it for predicted targets. - semantic_types = set(column_metadata.get('semantic_types', [])) - semantic_types_to_remove = set([]) - add_semantic_types = [] - add_semantic_types.add(hyperparams["return_semantic_type"]) - semantic_types = semantic_types - semantic_types_to_remove - semantic_types = semantic_types.union(add_semantic_types) - column_metadata['semantic_types'] = list(semantic_types) - - target_columns_metadata.append(column_metadata) - - return target_columns_metadata - - @classmethod - def _update_predictions_metadata(cls, inputs_metadata: metadata_base.DataMetadata, outputs: Optional[Outputs], - target_columns_metadata: List[OrderedDict]) -> metadata_base.DataMetadata: - outputs_metadata = metadata_base.DataMetadata().generate(value=outputs) - - for column_index, column_metadata in enumerate(target_columns_metadata): - column_metadata.pop("structural_type", None) - outputs_metadata = outputs_metadata.update_column(column_index, column_metadata) - - return outputs_metadata - - def _wrap_predictions(self, inputs: Inputs, predictions: ndarray) -> Outputs: - outputs = d3m_dataframe(predictions, generate_metadata=True) - target_columns_metadata = self._add_target_columns_metadata(outputs.metadata) - outputs.metadata = self._update_predictions_metadata(inputs.metadata, outputs, target_columns_metadata) - return outputs - - - @classmethod - def _add_target_columns_metadata(cls, outputs_metadata: metadata_base.DataMetadata): - outputs_length = outputs_metadata.query((metadata_base.ALL_ELEMENTS,))['dimension']['length'] - - target_columns_metadata: List[OrderedDict] = [] - for column_index in range(outputs_length): - column_metadata = OrderedDict() - semantic_types = [] - semantic_types.append('https://metadata.datadrivendiscovery.org/types/Attribute') - column_name = outputs_metadata.query((metadata_base.ALL_ELEMENTS, column_index)).get("name") - if column_name is None: - column_name = "output_{}".format(column_index) - column_metadata["semantic_types"] = semantic_types - column_metadata["name"] = str(column_name) - target_columns_metadata.append(column_metadata) - - return target_columns_metadata - - -SKCountVectorizer.__doc__ = CountVectorizer.__doc__ \ No newline at end of file diff --git a/common-primitives/sklearn-wrap/sklearn_wrap/SKDecisionTreeClassifier.py b/common-primitives/sklearn-wrap/sklearn_wrap/SKDecisionTreeClassifier.py deleted file mode 100644 index 46d060a..0000000 --- a/common-primitives/sklearn-wrap/sklearn_wrap/SKDecisionTreeClassifier.py +++ /dev/null @@ -1,621 +0,0 @@ -from typing import Any, Callable, List, Dict, Union, Optional, Sequence, Tuple -from numpy import ndarray -from collections import OrderedDict -from scipy import sparse -import os -import sklearn -import numpy -import typing - -# Custom import commands if any -from sklearn.tree.tree import DecisionTreeClassifier -import numpy - - -from d3m.container.numpy import ndarray as d3m_ndarray -from d3m.container import DataFrame as d3m_dataframe -from d3m.metadata import hyperparams, params, base as metadata_base -from d3m import utils -from d3m.base import utils as base_utils -from d3m.exceptions import PrimitiveNotFittedError -from d3m.primitive_interfaces.base import CallResult, DockerContainer - -from d3m.primitive_interfaces.supervised_learning import SupervisedLearnerPrimitiveBase -from d3m.primitive_interfaces.base import ProbabilisticCompositionalityMixin, ContinueFitMixin -from d3m import exceptions -import pandas - - - -Inputs = d3m_dataframe -Outputs = d3m_dataframe - - -class Params(params.Params): - classes_: Optional[Union[ndarray, List[ndarray]]] - max_features_: Optional[int] - n_classes_: Optional[Union[numpy.int64, List[numpy.int64]]] - n_features_: Optional[int] - n_outputs_: Optional[int] - tree_: Optional[object] - input_column_names: Optional[Any] - target_names_: Optional[Sequence[Any]] - training_indices_: Optional[Sequence[int]] - target_column_indices_: Optional[Sequence[int]] - target_columns_metadata_: Optional[List[OrderedDict]] - - - -class Hyperparams(hyperparams.Hyperparams): - criterion = hyperparams.Enumeration[str]( - values=['gini', 'entropy'], - default='gini', - description='The function to measure the quality of a split. Supported criteria are "gini" for the Gini impurity and "entropy" for the information gain.', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] - ) - splitter = hyperparams.Enumeration[str]( - values=['best', 'random'], - default='best', - description='The strategy used to choose the split at each node. Supported strategies are "best" to choose the best split and "random" to choose the best random split.', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] - ) - max_depth = hyperparams.Union( - configuration=OrderedDict({ - 'int': hyperparams.Bounded[int]( - default=10, - lower=0, - upper=None, - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'], - ), - 'none': hyperparams.Constant( - default=None, - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'], - ) - }), - default='none', - description='The maximum depth of the tree. If None, then nodes are expanded until all leaves are pure or until all leaves contain less than min_samples_split samples.', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] - ) - min_samples_split = hyperparams.Union( - configuration=OrderedDict({ - 'absolute': hyperparams.Bounded[int]( - default=2, - lower=1, - upper=None, - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'], - ), - 'percent': hyperparams.Bounded[float]( - default=0.25, - lower=0, - upper=1, - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'], - ) - }), - default='absolute', - description='The minimum number of samples required to split an internal node: - If int, then consider `min_samples_split` as the minimum number. - If float, then `min_samples_split` is a percentage and `ceil(min_samples_split * n_samples)` are the minimum number of samples for each split. .. versionchanged:: 0.18 Added float values for percentages.', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] - ) - min_samples_leaf = hyperparams.Union( - configuration=OrderedDict({ - 'absolute': hyperparams.Bounded[int]( - default=1, - lower=1, - upper=None, - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'], - ), - 'percent': hyperparams.Bounded[float]( - default=0.25, - lower=0, - upper=0.5, - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'], - ) - }), - default='absolute', - description='The minimum number of samples required to be at a leaf node: - If int, then consider `min_samples_leaf` as the minimum number. - If float, then `min_samples_leaf` is a percentage and `ceil(min_samples_leaf * n_samples)` are the minimum number of samples for each node. .. versionchanged:: 0.18 Added float values for percentages.', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] - ) - min_weight_fraction_leaf = hyperparams.Bounded[float]( - default=0, - lower=0, - upper=0.5, - description='The minimum weighted fraction of the sum total of weights (of all the input samples) required to be at a leaf node. Samples have equal weight when sample_weight is not provided.', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] - ) - max_leaf_nodes = hyperparams.Union( - configuration=OrderedDict({ - 'int': hyperparams.Bounded[int]( - lower=0, - upper=None, - default=0, - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'], - ), - 'none': hyperparams.Constant( - default=None, - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'], - ) - }), - default='none', - description='Grow a tree with ``max_leaf_nodes`` in best-first fashion. Best nodes are defined as relative reduction in impurity. If None then unlimited number of leaf nodes.', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] - ) - max_features = hyperparams.Union( - configuration=OrderedDict({ - 'specified_int': hyperparams.Bounded[int]( - lower=0, - upper=None, - default=0, - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'], - ), - 'calculated': hyperparams.Enumeration[str]( - values=['auto', 'sqrt', 'log2'], - default='auto', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'], - ), - 'none': hyperparams.Constant( - default=None, - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'], - ), - 'percent': hyperparams.Bounded[float]( - default=0.25, - lower=0, - upper=1, - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'], - ) - }), - default='none', - description='The number of features to consider when looking for the best split: - If int, then consider `max_features` features at each split. - If float, then `max_features` is a percentage and `int(max_features * n_features)` features are considered at each split. - If "auto", then `max_features=sqrt(n_features)`. - If "sqrt", then `max_features=sqrt(n_features)`. - If "log2", then `max_features=log2(n_features)`. - If None, then `max_features=n_features`. Note: the search for a split does not stop until at least one valid partition of the node samples is found, even if it requires to effectively inspect more than ``max_features`` features.', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] - ) - min_impurity_decrease = hyperparams.Bounded[float]( - default=0.0, - lower=0.0, - upper=None, - description='A node will be split if this split induces a decrease of the impurity greater than or equal to this value. The weighted impurity decrease equation is the following:: N_t / N * (impurity - N_t_R / N_t * right_impurity - N_t_L / N_t * left_impurity) where ``N`` is the total number of samples, ``N_t`` is the number of samples at the current node, ``N_t_L`` is the number of samples in the left child, and ``N_t_R`` is the number of samples in the right child. ``N``, ``N_t``, ``N_t_R`` and ``N_t_L`` all refer to the weighted sum, if ``sample_weight`` is passed. .. versionadded:: 0.19 ', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] - ) - class_weight = hyperparams.Union( - configuration=OrderedDict({ - 'str': hyperparams.Constant( - default='balanced', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'], - ), - 'none': hyperparams.Constant( - default=None, - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'], - ) - }), - default='none', - description='Weights associated with classes in the form ``{class_label: weight}``. If not given, all classes are supposed to have weight one. For multi-output problems, a list of dicts can be provided in the same order as the columns of y. The "balanced" mode uses the values of y to automatically adjust weights inversely proportional to class frequencies in the input data as ``n_samples / (n_classes * np.bincount(y))`` For multi-output, the weights of each column of y will be multiplied. Note that these weights will be multiplied with sample_weight (passed through the fit method) if sample_weight is specified.', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] - ) - presort = hyperparams.UniformBool( - default=False, - description='Whether to presort the data to speed up the finding of best splits in fitting. For the default settings of a decision tree on large datasets, setting this to true may slow down the training process. When using either a smaller dataset or a restricted depth, this may speed up the training.', - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'] - ) - - use_inputs_columns = hyperparams.Set( - elements=hyperparams.Hyperparameter[int](-1), - default=(), - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="A set of column indices to force primitive to use as training input. If any specified column cannot be parsed, it is skipped.", - ) - use_outputs_columns = hyperparams.Set( - elements=hyperparams.Hyperparameter[int](-1), - default=(), - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="A set of column indices to force primitive to use as training target. If any specified column cannot be parsed, it is skipped.", - ) - exclude_inputs_columns = hyperparams.Set( - elements=hyperparams.Hyperparameter[int](-1), - default=(), - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="A set of column indices to not use as training inputs. Applicable only if \"use_columns\" is not provided.", - ) - exclude_outputs_columns = hyperparams.Set( - elements=hyperparams.Hyperparameter[int](-1), - default=(), - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="A set of column indices to not use as training target. Applicable only if \"use_columns\" is not provided.", - ) - return_result = hyperparams.Enumeration( - values=['append', 'replace', 'new'], - default='new', - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="Should parsed columns be appended, should they replace original columns, or should only parsed columns be returned? This hyperparam is ignored if use_semantic_types is set to false.", - ) - use_semantic_types = hyperparams.UniformBool( - default=False, - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="Controls whether semantic_types metadata will be used for filtering columns in input dataframe. Setting this to false makes the code ignore return_result and will produce only the output dataframe" - ) - add_index_columns = hyperparams.UniformBool( - default=False, - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="Also include primary index columns if input data has them. Applicable only if \"return_result\" is set to \"new\".", - ) - error_on_no_input = hyperparams.UniformBool( - default=True, - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="Throw an exception if no input column is selected/provided. Defaults to true to behave like sklearn. To prevent pipelines from breaking set this to False.", - ) - - return_semantic_type = hyperparams.Enumeration[str]( - values=['https://metadata.datadrivendiscovery.org/types/Attribute', 'https://metadata.datadrivendiscovery.org/types/ConstructedAttribute', 'https://metadata.datadrivendiscovery.org/types/PredictedTarget'], - default='https://metadata.datadrivendiscovery.org/types/PredictedTarget', - description='Decides what semantic type to attach to generated output', - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'] - ) - -class SKDecisionTreeClassifier(SupervisedLearnerPrimitiveBase[Inputs, Outputs, Params, Hyperparams], - ProbabilisticCompositionalityMixin[Inputs, Outputs, Params, Hyperparams]): - """ - Primitive wrapping for sklearn DecisionTreeClassifier - `sklearn documentation `_ - - """ - - __author__ = "JPL MARVIN" - metadata = metadata_base.PrimitiveMetadata({ - "algorithm_types": [metadata_base.PrimitiveAlgorithmType.DECISION_TREE, ], - "name": "sklearn.tree.tree.DecisionTreeClassifier", - "primitive_family": metadata_base.PrimitiveFamily.CLASSIFICATION, - "python_path": "d3m.primitives.classification.decision_tree.SKlearn", - "source": {'name': 'JPL', 'contact': 'mailto:shah@jpl.nasa.gov', 'uris': ['https://gitlab.com/datadrivendiscovery/sklearn-wrap/issues', 'https://scikit-learn.org/stable/modules/generated/sklearn.tree.DecisionTreeClassifier.html']}, - "version": "2019.11.13", - "id": "e20d003d-6a9f-35b0-b4b5-20e42b30282a", - "hyperparams_to_tune": ['max_depth', 'min_samples_split', 'min_samples_leaf', 'max_features'], - 'installation': [ - {'type': metadata_base.PrimitiveInstallationType.PIP, - 'package_uri': 'git+https://gitlab.com/datadrivendiscovery/sklearn-wrap.git@{git_commit}#egg=sklearn_wrap'.format( - git_commit=utils.current_git_commit(os.path.dirname(__file__)), - ), - }] - }) - - def __init__(self, *, - hyperparams: Hyperparams, - random_seed: int = 0, - docker_containers: Dict[str, DockerContainer] = None) -> None: - - super().__init__(hyperparams=hyperparams, random_seed=random_seed, docker_containers=docker_containers) - - # False - self._clf = DecisionTreeClassifier( - criterion=self.hyperparams['criterion'], - splitter=self.hyperparams['splitter'], - max_depth=self.hyperparams['max_depth'], - min_samples_split=self.hyperparams['min_samples_split'], - min_samples_leaf=self.hyperparams['min_samples_leaf'], - min_weight_fraction_leaf=self.hyperparams['min_weight_fraction_leaf'], - max_leaf_nodes=self.hyperparams['max_leaf_nodes'], - max_features=self.hyperparams['max_features'], - min_impurity_decrease=self.hyperparams['min_impurity_decrease'], - class_weight=self.hyperparams['class_weight'], - presort=self.hyperparams['presort'], - random_state=self.random_seed, - ) - - self._inputs = None - self._outputs = None - self._training_inputs = None - self._training_outputs = None - self._target_names = None - self._training_indices = None - self._target_column_indices = None - self._target_columns_metadata: List[OrderedDict] = None - self._input_column_names = None - self._fitted = False - self._new_training_data = False - - def set_training_data(self, *, inputs: Inputs, outputs: Outputs) -> None: - self._inputs = inputs - self._outputs = outputs - self._fitted = False - self._new_training_data = True - - def fit(self, *, timeout: float = None, iterations: int = None) -> CallResult[None]: - if self._inputs is None or self._outputs is None: - raise ValueError("Missing training data.") - - if not self._new_training_data: - return CallResult(None) - self._new_training_data = False - - self._training_inputs, self._training_indices = self._get_columns_to_fit(self._inputs, self.hyperparams) - self._training_outputs, self._target_names, self._target_column_indices = self._get_targets(self._outputs, self.hyperparams) - self._input_column_names = self._training_inputs.columns - - if len(self._training_indices) > 0 and len(self._target_column_indices) > 0: - self._target_columns_metadata = self._get_target_columns_metadata(self._training_outputs.metadata, self.hyperparams) - sk_training_output = self._training_outputs.values - - shape = sk_training_output.shape - if len(shape) == 2 and shape[1] == 1: - sk_training_output = numpy.ravel(sk_training_output) - - self._clf.fit(self._training_inputs, sk_training_output) - self._fitted = True - else: - if self.hyperparams['error_on_no_input']: - raise RuntimeError("No input columns were selected") - self.logger.warn("No input columns were selected") - - return CallResult(None) - - - - def produce(self, *, inputs: Inputs, timeout: float = None, iterations: int = None) -> CallResult[Outputs]: - sk_inputs, columns_to_use = self._get_columns_to_fit(inputs, self.hyperparams) - output = [] - if len(sk_inputs.columns): - try: - sk_output = self._clf.predict(sk_inputs) - except sklearn.exceptions.NotFittedError as error: - raise PrimitiveNotFittedError("Primitive not fitted.") from error - # For primitives that allow predicting without fitting like GaussianProcessRegressor - if not self._fitted: - raise PrimitiveNotFittedError("Primitive not fitted.") - if sparse.issparse(sk_output): - sk_output = sk_output.toarray() - output = self._wrap_predictions(inputs, sk_output) - output.columns = self._target_names - output = [output] - else: - if self.hyperparams['error_on_no_input']: - raise RuntimeError("No input columns were selected") - self.logger.warn("No input columns were selected") - outputs = base_utils.combine_columns(return_result=self.hyperparams['return_result'], - add_index_columns=self.hyperparams['add_index_columns'], - inputs=inputs, column_indices=self._target_column_indices, - columns_list=output) - - return CallResult(outputs) - - - def get_params(self) -> Params: - if not self._fitted: - return Params( - classes_=None, - max_features_=None, - n_classes_=None, - n_features_=None, - n_outputs_=None, - tree_=None, - input_column_names=self._input_column_names, - training_indices_=self._training_indices, - target_names_=self._target_names, - target_column_indices_=self._target_column_indices, - target_columns_metadata_=self._target_columns_metadata - ) - - return Params( - classes_=getattr(self._clf, 'classes_', None), - max_features_=getattr(self._clf, 'max_features_', None), - n_classes_=getattr(self._clf, 'n_classes_', None), - n_features_=getattr(self._clf, 'n_features_', None), - n_outputs_=getattr(self._clf, 'n_outputs_', None), - tree_=getattr(self._clf, 'tree_', None), - input_column_names=self._input_column_names, - training_indices_=self._training_indices, - target_names_=self._target_names, - target_column_indices_=self._target_column_indices, - target_columns_metadata_=self._target_columns_metadata - ) - - def set_params(self, *, params: Params) -> None: - self._clf.classes_ = params['classes_'] - self._clf.max_features_ = params['max_features_'] - self._clf.n_classes_ = params['n_classes_'] - self._clf.n_features_ = params['n_features_'] - self._clf.n_outputs_ = params['n_outputs_'] - self._clf.tree_ = params['tree_'] - self._input_column_names = params['input_column_names'] - self._training_indices = params['training_indices_'] - self._target_names = params['target_names_'] - self._target_column_indices = params['target_column_indices_'] - self._target_columns_metadata = params['target_columns_metadata_'] - - if params['classes_'] is not None: - self._fitted = True - if params['max_features_'] is not None: - self._fitted = True - if params['n_classes_'] is not None: - self._fitted = True - if params['n_features_'] is not None: - self._fitted = True - if params['n_outputs_'] is not None: - self._fitted = True - if params['tree_'] is not None: - self._fitted = True - - - def log_likelihoods(self, *, - outputs: Outputs, - inputs: Inputs, - timeout: float = None, - iterations: int = None) -> CallResult[Sequence[float]]: - inputs = inputs.iloc[:, self._training_indices] # Get ndarray - outputs = outputs.iloc[:, self._target_column_indices] - - if len(inputs.columns) and len(outputs.columns): - - if outputs.shape[1] != self._clf.n_outputs_: - raise exceptions.InvalidArgumentValueError("\"outputs\" argument does not have the correct number of target columns.") - - log_proba = self._clf.predict_log_proba(inputs) - - # Making it always a list, even when only one target. - if self._clf.n_outputs_ == 1: - log_proba = [log_proba] - classes = [self._clf.classes_] - else: - classes = self._clf.classes_ - - samples_length = inputs.shape[0] - - log_likelihoods = [] - for k in range(self._clf.n_outputs_): - # We have to map each class to its internal (numerical) index used in the learner. - # This allows "outputs" to contain string classes. - outputs_column = outputs.iloc[:, k] - classes_map = pandas.Series(numpy.arange(len(classes[k])), index=classes[k]) - mapped_outputs_column = outputs_column.map(classes_map) - - # For each target column (column in "outputs"), for each sample (row) we pick the log - # likelihood for a given class. - log_likelihoods.append(log_proba[k][numpy.arange(samples_length), mapped_outputs_column]) - - results = d3m_dataframe(dict(enumerate(log_likelihoods)), generate_metadata=True) - results.columns = outputs.columns - - for k in range(self._clf.n_outputs_): - column_metadata = outputs.metadata.query_column(k) - if 'name' in column_metadata: - results.metadata = results.metadata.update_column(k, {'name': column_metadata['name']}) - - else: - results = d3m_dataframe(generate_metadata=True) - - return CallResult(results) - - - - def produce_feature_importances(self, *, timeout: float = None, iterations: int = None) -> CallResult[d3m_dataframe]: - output = d3m_dataframe(self._clf.feature_importances_.reshape((1, len(self._input_column_names)))) - output.columns = self._input_column_names - for i in range(len(self._input_column_names)): - output.metadata = output.metadata.update_column(i, {"name": self._input_column_names[i]}) - return CallResult(output) - - @classmethod - def _get_columns_to_fit(cls, inputs: Inputs, hyperparams: Hyperparams): - if not hyperparams['use_semantic_types']: - return inputs, list(range(len(inputs.columns))) - - inputs_metadata = inputs.metadata - - def can_produce_column(column_index: int) -> bool: - return cls._can_produce_column(inputs_metadata, column_index, hyperparams) - - columns_to_produce, columns_not_to_produce = base_utils.get_columns_to_use(inputs_metadata, - use_columns=hyperparams['use_inputs_columns'], - exclude_columns=hyperparams['exclude_inputs_columns'], - can_use_column=can_produce_column) - return inputs.iloc[:, columns_to_produce], columns_to_produce - # return columns_to_produce - - @classmethod - def _can_produce_column(cls, inputs_metadata: metadata_base.DataMetadata, column_index: int, hyperparams: Hyperparams) -> bool: - column_metadata = inputs_metadata.query((metadata_base.ALL_ELEMENTS, column_index)) - - accepted_structural_types = (int, float, numpy.integer, numpy.float64) - accepted_semantic_types = set() - accepted_semantic_types.add("https://metadata.datadrivendiscovery.org/types/Attribute") - if not issubclass(column_metadata['structural_type'], accepted_structural_types): - return False - - semantic_types = set(column_metadata.get('semantic_types', [])) - - if len(semantic_types) == 0: - cls.logger.warning("No semantic types found in column metadata") - return False - # Making sure all accepted_semantic_types are available in semantic_types - if len(accepted_semantic_types - semantic_types) == 0: - return True - - return False - - @classmethod - def _get_targets(cls, data: d3m_dataframe, hyperparams: Hyperparams): - if not hyperparams['use_semantic_types']: - return data, list(data.columns), list(range(len(data.columns))) - - metadata = data.metadata - - def can_produce_column(column_index: int) -> bool: - accepted_semantic_types = set() - accepted_semantic_types.add("https://metadata.datadrivendiscovery.org/types/TrueTarget") - column_metadata = metadata.query((metadata_base.ALL_ELEMENTS, column_index)) - semantic_types = set(column_metadata.get('semantic_types', [])) - if len(semantic_types) == 0: - cls.logger.warning("No semantic types found in column metadata") - return False - # Making sure all accepted_semantic_types are available in semantic_types - if len(accepted_semantic_types - semantic_types) == 0: - return True - return False - - target_column_indices, target_columns_not_to_produce = base_utils.get_columns_to_use(metadata, - use_columns=hyperparams[ - 'use_outputs_columns'], - exclude_columns= - hyperparams[ - 'exclude_outputs_columns'], - can_use_column=can_produce_column) - targets = [] - if target_column_indices: - targets = data.select_columns(target_column_indices) - target_column_names = [] - for idx in target_column_indices: - target_column_names.append(data.columns[idx]) - return targets, target_column_names, target_column_indices - - @classmethod - def _get_target_columns_metadata(cls, outputs_metadata: metadata_base.DataMetadata, hyperparams) -> List[OrderedDict]: - outputs_length = outputs_metadata.query((metadata_base.ALL_ELEMENTS,))['dimension']['length'] - - target_columns_metadata: List[OrderedDict] = [] - for column_index in range(outputs_length): - column_metadata = OrderedDict(outputs_metadata.query_column(column_index)) - - # Update semantic types and prepare it for predicted targets. - semantic_types = set(column_metadata.get('semantic_types', [])) - semantic_types_to_remove = set(["https://metadata.datadrivendiscovery.org/types/TrueTarget","https://metadata.datadrivendiscovery.org/types/SuggestedTarget",]) - add_semantic_types = set(["https://metadata.datadrivendiscovery.org/types/PredictedTarget",]) - add_semantic_types.add(hyperparams["return_semantic_type"]) - semantic_types = semantic_types - semantic_types_to_remove - semantic_types = semantic_types.union(add_semantic_types) - column_metadata['semantic_types'] = list(semantic_types) - - target_columns_metadata.append(column_metadata) - - return target_columns_metadata - - @classmethod - def _update_predictions_metadata(cls, inputs_metadata: metadata_base.DataMetadata, outputs: Optional[Outputs], - target_columns_metadata: List[OrderedDict]) -> metadata_base.DataMetadata: - outputs_metadata = metadata_base.DataMetadata().generate(value=outputs) - - for column_index, column_metadata in enumerate(target_columns_metadata): - column_metadata.pop("structural_type", None) - outputs_metadata = outputs_metadata.update_column(column_index, column_metadata) - - return outputs_metadata - - def _wrap_predictions(self, inputs: Inputs, predictions: ndarray) -> Outputs: - outputs = d3m_dataframe(predictions, generate_metadata=False) - outputs.metadata = self._update_predictions_metadata(inputs.metadata, outputs, self._target_columns_metadata) - return outputs - - - @classmethod - def _add_target_columns_metadata(cls, outputs_metadata: metadata_base.DataMetadata): - outputs_length = outputs_metadata.query((metadata_base.ALL_ELEMENTS,))['dimension']['length'] - - target_columns_metadata: List[OrderedDict] = [] - for column_index in range(outputs_length): - column_metadata = OrderedDict() - semantic_types = [] - semantic_types.append('https://metadata.datadrivendiscovery.org/types/PredictedTarget') - column_name = outputs_metadata.query((metadata_base.ALL_ELEMENTS, column_index)).get("name") - if column_name is None: - column_name = "output_{}".format(column_index) - column_metadata["semantic_types"] = semantic_types - column_metadata["name"] = str(column_name) - target_columns_metadata.append(column_metadata) - - return target_columns_metadata - - -SKDecisionTreeClassifier.__doc__ = DecisionTreeClassifier.__doc__ \ No newline at end of file diff --git a/common-primitives/sklearn-wrap/sklearn_wrap/SKDecisionTreeRegressor.py b/common-primitives/sklearn-wrap/sklearn_wrap/SKDecisionTreeRegressor.py deleted file mode 100644 index 1886dd3..0000000 --- a/common-primitives/sklearn-wrap/sklearn_wrap/SKDecisionTreeRegressor.py +++ /dev/null @@ -1,565 +0,0 @@ -from typing import Any, Callable, List, Dict, Union, Optional, Sequence, Tuple -from numpy import ndarray -from collections import OrderedDict -from scipy import sparse -import os -import sklearn -import numpy -import typing - -# Custom import commands if any -from sklearn.tree.tree import DecisionTreeRegressor - - -from d3m.container.numpy import ndarray as d3m_ndarray -from d3m.container import DataFrame as d3m_dataframe -from d3m.metadata import hyperparams, params, base as metadata_base -from d3m import utils -from d3m.base import utils as base_utils -from d3m.exceptions import PrimitiveNotFittedError -from d3m.primitive_interfaces.base import CallResult, DockerContainer - -from d3m.primitive_interfaces.supervised_learning import SupervisedLearnerPrimitiveBase -from d3m.primitive_interfaces.base import ProbabilisticCompositionalityMixin, ContinueFitMixin -from d3m import exceptions -import pandas - - - -Inputs = d3m_dataframe -Outputs = d3m_dataframe - - -class Params(params.Params): - max_features_: Optional[int] - n_features_: Optional[int] - n_outputs_: Optional[int] - tree_: Optional[object] - classes_: Optional[Union[ndarray, List[ndarray]]] - n_classes_: Optional[Union[numpy.int64, List[numpy.int64]]] - class_weight: Optional[Union[str, dict, List[dict]]] - input_column_names: Optional[Any] - target_names_: Optional[Sequence[Any]] - training_indices_: Optional[Sequence[int]] - target_column_indices_: Optional[Sequence[int]] - target_columns_metadata_: Optional[List[OrderedDict]] - - - -class Hyperparams(hyperparams.Hyperparams): - criterion = hyperparams.Enumeration[str]( - values=['mse', 'friedman_mse', 'mae'], - default='mse', - description='The function to measure the quality of a split. Supported criteria are "mse" for the mean squared error, which is equal to variance reduction as feature selection criterion, and "mae" for the mean absolute error. .. versionadded:: 0.18 Mean Absolute Error (MAE) criterion.', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] - ) - splitter = hyperparams.Enumeration[str]( - values=['best', 'random'], - default='best', - description='The strategy used to choose the split at each node. Supported strategies are "best" to choose the best split and "random" to choose the best random split.', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] - ) - max_depth = hyperparams.Union( - configuration=OrderedDict({ - 'int': hyperparams.Bounded[int]( - lower=0, - upper=None, - default=5, - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'], - ), - 'none': hyperparams.Constant( - default=None, - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'], - ) - }), - default='none', - description='The maximum depth of the tree. If None, then nodes are expanded until all leaves are pure or until all leaves contain less than min_samples_split samples.', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] - ) - min_samples_split = hyperparams.Union( - configuration=OrderedDict({ - 'float': hyperparams.Bounded[float]( - lower=0, - upper=1, - default=1.0, - description='It\'s a percentage and `ceil(min_samples_split * n_samples)` is the minimum number of samples for each split.', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'], - ), - 'int': hyperparams.Bounded[int]( - lower=0, - upper=None, - default=2, - description='Minimum number.', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'], - ) - }), - default='int', - description='The minimum number of samples required to split an internal node: - If int, then consider `min_samples_split` as the minimum number. - If float, then `min_samples_split` is a percentage and `ceil(min_samples_split * n_samples)` are the minimum number of samples for each split. .. versionchanged:: 0.18 Added float values for percentages.', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] - ) - min_samples_leaf = hyperparams.Union( - configuration=OrderedDict({ - 'percent': hyperparams.Bounded[float]( - lower=0, - upper=0.5, - default=0.25, - description='It\'s a percentage and `ceil(min_samples_leaf * n_samples)` is the minimum number of samples for each node.', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'], - ), - 'absolute': hyperparams.Bounded[int]( - lower=1, - upper=None, - default=1, - description='Minimum number.', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'], - ) - }), - default='absolute', - description='The minimum number of samples required to be at a leaf node: - If int, then consider `min_samples_leaf` as the minimum number. - If float, then `min_samples_leaf` is a percentage and `ceil(min_samples_leaf * n_samples)` are the minimum number of samples for each node. .. versionchanged:: 0.18 Added float values for percentages.', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] - ) - min_weight_fraction_leaf = hyperparams.Bounded[float]( - default=0, - lower=0, - upper=0.5, - description='The minimum weighted fraction of the sum total of weights (of all the input samples) required to be at a leaf node. Samples have equal weight when sample_weight is not provided.', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] - ) - max_leaf_nodes = hyperparams.Union( - configuration=OrderedDict({ - 'int': hyperparams.Bounded[int]( - lower=0, - upper=None, - default=10, - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'], - ), - 'none': hyperparams.Constant( - default=None, - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'], - ) - }), - default='none', - description='Grow a tree with ``max_leaf_nodes`` in best-first fashion. Best nodes are defined as relative reduction in impurity. If None then unlimited number of leaf nodes.', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] - ) - max_features = hyperparams.Union( - configuration=OrderedDict({ - 'specified_int': hyperparams.Bounded[int]( - lower=0, - upper=None, - default=0, - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'], - ), - 'calculated': hyperparams.Enumeration[str]( - values=['auto', 'sqrt', 'log2'], - default='auto', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'], - ), - 'none': hyperparams.Constant( - default=None, - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'], - ), - 'percent': hyperparams.Bounded[float]( - default=0.25, - lower=0, - upper=1, - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'], - ) - }), - default='calculated', - description='The number of features to consider when looking for the best split: - If int, then consider `max_features` features at each split. - If float, then `max_features` is a percentage and `int(max_features * n_features)` features are considered at each split. - If "auto", then `max_features=n_features`. - If "sqrt", then `max_features=sqrt(n_features)`. - If "log2", then `max_features=log2(n_features)`. - If None, then `max_features=n_features`. Note: the search for a split does not stop until at least one valid partition of the node samples is found, even if it requires to effectively inspect more than ``max_features`` features.', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] - ) - min_impurity_decrease = hyperparams.Bounded[float]( - default=0.0, - lower=0.0, - upper=None, - description='A node will be split if this split induces a decrease of the impurity greater than or equal to this value. The weighted impurity decrease equation is the following:: N_t / N * (impurity - N_t_R / N_t * right_impurity - N_t_L / N_t * left_impurity) where ``N`` is the total number of samples, ``N_t`` is the number of samples at the current node, ``N_t_L`` is the number of samples in the left child, and ``N_t_R`` is the number of samples in the right child. ``N``, ``N_t``, ``N_t_R`` and ``N_t_L`` all refer to the weighted sum, if ``sample_weight`` is passed. .. versionadded:: 0.19 ', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] - ) - presort = hyperparams.UniformBool( - default=False, - description='Whether to presort the data to speed up the finding of best splits in fitting. For the default settings of a decision tree on large datasets, setting this to true may slow down the training process. When using either a smaller dataset or a restricted depth, this may speed up the training.', - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'] - ) - - use_inputs_columns = hyperparams.Set( - elements=hyperparams.Hyperparameter[int](-1), - default=(), - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="A set of column indices to force primitive to use as training input. If any specified column cannot be parsed, it is skipped.", - ) - use_outputs_columns = hyperparams.Set( - elements=hyperparams.Hyperparameter[int](-1), - default=(), - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="A set of column indices to force primitive to use as training target. If any specified column cannot be parsed, it is skipped.", - ) - exclude_inputs_columns = hyperparams.Set( - elements=hyperparams.Hyperparameter[int](-1), - default=(), - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="A set of column indices to not use as training inputs. Applicable only if \"use_columns\" is not provided.", - ) - exclude_outputs_columns = hyperparams.Set( - elements=hyperparams.Hyperparameter[int](-1), - default=(), - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="A set of column indices to not use as training target. Applicable only if \"use_columns\" is not provided.", - ) - return_result = hyperparams.Enumeration( - values=['append', 'replace', 'new'], - default='new', - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="Should parsed columns be appended, should they replace original columns, or should only parsed columns be returned? This hyperparam is ignored if use_semantic_types is set to false.", - ) - use_semantic_types = hyperparams.UniformBool( - default=False, - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="Controls whether semantic_types metadata will be used for filtering columns in input dataframe. Setting this to false makes the code ignore return_result and will produce only the output dataframe" - ) - add_index_columns = hyperparams.UniformBool( - default=False, - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="Also include primary index columns if input data has them. Applicable only if \"return_result\" is set to \"new\".", - ) - error_on_no_input = hyperparams.UniformBool( - default=True, - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="Throw an exception if no input column is selected/provided. Defaults to true to behave like sklearn. To prevent pipelines from breaking set this to False.", - ) - - return_semantic_type = hyperparams.Enumeration[str]( - values=['https://metadata.datadrivendiscovery.org/types/Attribute', 'https://metadata.datadrivendiscovery.org/types/ConstructedAttribute', 'https://metadata.datadrivendiscovery.org/types/PredictedTarget'], - default='https://metadata.datadrivendiscovery.org/types/PredictedTarget', - description='Decides what semantic type to attach to generated output', - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'] - ) - -class SKDecisionTreeRegressor(SupervisedLearnerPrimitiveBase[Inputs, Outputs, Params, Hyperparams]): - """ - Primitive wrapping for sklearn DecisionTreeRegressor - `sklearn documentation `_ - - """ - - __author__ = "JPL MARVIN" - metadata = metadata_base.PrimitiveMetadata({ - "algorithm_types": [metadata_base.PrimitiveAlgorithmType.DECISION_TREE, ], - "name": "sklearn.tree.tree.DecisionTreeRegressor", - "primitive_family": metadata_base.PrimitiveFamily.REGRESSION, - "python_path": "d3m.primitives.regression.decision_tree.SKlearn", - "source": {'name': 'JPL', 'contact': 'mailto:shah@jpl.nasa.gov', 'uris': ['https://gitlab.com/datadrivendiscovery/sklearn-wrap/issues', 'https://scikit-learn.org/stable/modules/generated/sklearn.tree.DecisionTreeRegressor.html']}, - "version": "2019.11.13", - "id": "6c420bd8-01d1-321f-9a35-afc4b758a5c6", - "hyperparams_to_tune": ['max_depth', 'min_samples_split', 'min_samples_leaf', 'max_features'], - 'installation': [ - {'type': metadata_base.PrimitiveInstallationType.PIP, - 'package_uri': 'git+https://gitlab.com/datadrivendiscovery/sklearn-wrap.git@{git_commit}#egg=sklearn_wrap'.format( - git_commit=utils.current_git_commit(os.path.dirname(__file__)), - ), - }] - }) - - def __init__(self, *, - hyperparams: Hyperparams, - random_seed: int = 0, - docker_containers: Dict[str, DockerContainer] = None) -> None: - - super().__init__(hyperparams=hyperparams, random_seed=random_seed, docker_containers=docker_containers) - - # False - self._clf = DecisionTreeRegressor( - criterion=self.hyperparams['criterion'], - splitter=self.hyperparams['splitter'], - max_depth=self.hyperparams['max_depth'], - min_samples_split=self.hyperparams['min_samples_split'], - min_samples_leaf=self.hyperparams['min_samples_leaf'], - min_weight_fraction_leaf=self.hyperparams['min_weight_fraction_leaf'], - max_leaf_nodes=self.hyperparams['max_leaf_nodes'], - max_features=self.hyperparams['max_features'], - min_impurity_decrease=self.hyperparams['min_impurity_decrease'], - presort=self.hyperparams['presort'], - random_state=self.random_seed, - ) - - self._inputs = None - self._outputs = None - self._training_inputs = None - self._training_outputs = None - self._target_names = None - self._training_indices = None - self._target_column_indices = None - self._target_columns_metadata: List[OrderedDict] = None - self._input_column_names = None - self._fitted = False - self._new_training_data = False - - def set_training_data(self, *, inputs: Inputs, outputs: Outputs) -> None: - self._inputs = inputs - self._outputs = outputs - self._fitted = False - self._new_training_data = True - - def fit(self, *, timeout: float = None, iterations: int = None) -> CallResult[None]: - if self._inputs is None or self._outputs is None: - raise ValueError("Missing training data.") - - if not self._new_training_data: - return CallResult(None) - self._new_training_data = False - - self._training_inputs, self._training_indices = self._get_columns_to_fit(self._inputs, self.hyperparams) - self._training_outputs, self._target_names, self._target_column_indices = self._get_targets(self._outputs, self.hyperparams) - self._input_column_names = self._training_inputs.columns - - if len(self._training_indices) > 0 and len(self._target_column_indices) > 0: - self._target_columns_metadata = self._get_target_columns_metadata(self._training_outputs.metadata, self.hyperparams) - sk_training_output = self._training_outputs.values - - shape = sk_training_output.shape - if len(shape) == 2 and shape[1] == 1: - sk_training_output = numpy.ravel(sk_training_output) - - self._clf.fit(self._training_inputs, sk_training_output) - self._fitted = True - else: - if self.hyperparams['error_on_no_input']: - raise RuntimeError("No input columns were selected") - self.logger.warn("No input columns were selected") - - return CallResult(None) - - - - def produce(self, *, inputs: Inputs, timeout: float = None, iterations: int = None) -> CallResult[Outputs]: - sk_inputs, columns_to_use = self._get_columns_to_fit(inputs, self.hyperparams) - output = [] - if len(sk_inputs.columns): - try: - sk_output = self._clf.predict(sk_inputs) - except sklearn.exceptions.NotFittedError as error: - raise PrimitiveNotFittedError("Primitive not fitted.") from error - # For primitives that allow predicting without fitting like GaussianProcessRegressor - if not self._fitted: - raise PrimitiveNotFittedError("Primitive not fitted.") - if sparse.issparse(sk_output): - sk_output = sk_output.toarray() - output = self._wrap_predictions(inputs, sk_output) - output.columns = self._target_names - output = [output] - else: - if self.hyperparams['error_on_no_input']: - raise RuntimeError("No input columns were selected") - self.logger.warn("No input columns were selected") - outputs = base_utils.combine_columns(return_result=self.hyperparams['return_result'], - add_index_columns=self.hyperparams['add_index_columns'], - inputs=inputs, column_indices=self._target_column_indices, - columns_list=output) - - return CallResult(outputs) - - - def get_params(self) -> Params: - if not self._fitted: - return Params( - max_features_=None, - n_features_=None, - n_outputs_=None, - tree_=None, - classes_=None, - n_classes_=None, - class_weight=None, - input_column_names=self._input_column_names, - training_indices_=self._training_indices, - target_names_=self._target_names, - target_column_indices_=self._target_column_indices, - target_columns_metadata_=self._target_columns_metadata - ) - - return Params( - max_features_=getattr(self._clf, 'max_features_', None), - n_features_=getattr(self._clf, 'n_features_', None), - n_outputs_=getattr(self._clf, 'n_outputs_', None), - tree_=getattr(self._clf, 'tree_', None), - classes_=getattr(self._clf, 'classes_', None), - n_classes_=getattr(self._clf, 'n_classes_', None), - class_weight=getattr(self._clf, 'class_weight', None), - input_column_names=self._input_column_names, - training_indices_=self._training_indices, - target_names_=self._target_names, - target_column_indices_=self._target_column_indices, - target_columns_metadata_=self._target_columns_metadata - ) - - def set_params(self, *, params: Params) -> None: - self._clf.max_features_ = params['max_features_'] - self._clf.n_features_ = params['n_features_'] - self._clf.n_outputs_ = params['n_outputs_'] - self._clf.tree_ = params['tree_'] - self._clf.classes_ = params['classes_'] - self._clf.n_classes_ = params['n_classes_'] - self._clf.class_weight = params['class_weight'] - self._input_column_names = params['input_column_names'] - self._training_indices = params['training_indices_'] - self._target_names = params['target_names_'] - self._target_column_indices = params['target_column_indices_'] - self._target_columns_metadata = params['target_columns_metadata_'] - - if params['max_features_'] is not None: - self._fitted = True - if params['n_features_'] is not None: - self._fitted = True - if params['n_outputs_'] is not None: - self._fitted = True - if params['tree_'] is not None: - self._fitted = True - if params['classes_'] is not None: - self._fitted = True - if params['n_classes_'] is not None: - self._fitted = True - if params['class_weight'] is not None: - self._fitted = True - - - - - - def produce_feature_importances(self, *, timeout: float = None, iterations: int = None) -> CallResult[d3m_dataframe]: - output = d3m_dataframe(self._clf.feature_importances_.reshape((1, len(self._input_column_names)))) - output.columns = self._input_column_names - for i in range(len(self._input_column_names)): - output.metadata = output.metadata.update_column(i, {"name": self._input_column_names[i]}) - return CallResult(output) - - @classmethod - def _get_columns_to_fit(cls, inputs: Inputs, hyperparams: Hyperparams): - if not hyperparams['use_semantic_types']: - return inputs, list(range(len(inputs.columns))) - - inputs_metadata = inputs.metadata - - def can_produce_column(column_index: int) -> bool: - return cls._can_produce_column(inputs_metadata, column_index, hyperparams) - - columns_to_produce, columns_not_to_produce = base_utils.get_columns_to_use(inputs_metadata, - use_columns=hyperparams['use_inputs_columns'], - exclude_columns=hyperparams['exclude_inputs_columns'], - can_use_column=can_produce_column) - return inputs.iloc[:, columns_to_produce], columns_to_produce - # return columns_to_produce - - @classmethod - def _can_produce_column(cls, inputs_metadata: metadata_base.DataMetadata, column_index: int, hyperparams: Hyperparams) -> bool: - column_metadata = inputs_metadata.query((metadata_base.ALL_ELEMENTS, column_index)) - - accepted_structural_types = (int, float, numpy.integer, numpy.float64) - accepted_semantic_types = set() - accepted_semantic_types.add("https://metadata.datadrivendiscovery.org/types/Attribute") - if not issubclass(column_metadata['structural_type'], accepted_structural_types): - return False - - semantic_types = set(column_metadata.get('semantic_types', [])) - - if len(semantic_types) == 0: - cls.logger.warning("No semantic types found in column metadata") - return False - # Making sure all accepted_semantic_types are available in semantic_types - if len(accepted_semantic_types - semantic_types) == 0: - return True - - return False - - @classmethod - def _get_targets(cls, data: d3m_dataframe, hyperparams: Hyperparams): - if not hyperparams['use_semantic_types']: - return data, list(data.columns), list(range(len(data.columns))) - - metadata = data.metadata - - def can_produce_column(column_index: int) -> bool: - accepted_semantic_types = set() - accepted_semantic_types.add("https://metadata.datadrivendiscovery.org/types/TrueTarget") - column_metadata = metadata.query((metadata_base.ALL_ELEMENTS, column_index)) - semantic_types = set(column_metadata.get('semantic_types', [])) - if len(semantic_types) == 0: - cls.logger.warning("No semantic types found in column metadata") - return False - # Making sure all accepted_semantic_types are available in semantic_types - if len(accepted_semantic_types - semantic_types) == 0: - return True - return False - - target_column_indices, target_columns_not_to_produce = base_utils.get_columns_to_use(metadata, - use_columns=hyperparams[ - 'use_outputs_columns'], - exclude_columns= - hyperparams[ - 'exclude_outputs_columns'], - can_use_column=can_produce_column) - targets = [] - if target_column_indices: - targets = data.select_columns(target_column_indices) - target_column_names = [] - for idx in target_column_indices: - target_column_names.append(data.columns[idx]) - return targets, target_column_names, target_column_indices - - @classmethod - def _get_target_columns_metadata(cls, outputs_metadata: metadata_base.DataMetadata, hyperparams) -> List[OrderedDict]: - outputs_length = outputs_metadata.query((metadata_base.ALL_ELEMENTS,))['dimension']['length'] - - target_columns_metadata: List[OrderedDict] = [] - for column_index in range(outputs_length): - column_metadata = OrderedDict(outputs_metadata.query_column(column_index)) - - # Update semantic types and prepare it for predicted targets. - semantic_types = set(column_metadata.get('semantic_types', [])) - semantic_types_to_remove = set(["https://metadata.datadrivendiscovery.org/types/TrueTarget","https://metadata.datadrivendiscovery.org/types/SuggestedTarget",]) - add_semantic_types = set(["https://metadata.datadrivendiscovery.org/types/PredictedTarget",]) - add_semantic_types.add(hyperparams["return_semantic_type"]) - semantic_types = semantic_types - semantic_types_to_remove - semantic_types = semantic_types.union(add_semantic_types) - column_metadata['semantic_types'] = list(semantic_types) - - target_columns_metadata.append(column_metadata) - - return target_columns_metadata - - @classmethod - def _update_predictions_metadata(cls, inputs_metadata: metadata_base.DataMetadata, outputs: Optional[Outputs], - target_columns_metadata: List[OrderedDict]) -> metadata_base.DataMetadata: - outputs_metadata = metadata_base.DataMetadata().generate(value=outputs) - - for column_index, column_metadata in enumerate(target_columns_metadata): - column_metadata.pop("structural_type", None) - outputs_metadata = outputs_metadata.update_column(column_index, column_metadata) - - return outputs_metadata - - def _wrap_predictions(self, inputs: Inputs, predictions: ndarray) -> Outputs: - outputs = d3m_dataframe(predictions, generate_metadata=False) - outputs.metadata = self._update_predictions_metadata(inputs.metadata, outputs, self._target_columns_metadata) - return outputs - - - @classmethod - def _add_target_columns_metadata(cls, outputs_metadata: metadata_base.DataMetadata): - outputs_length = outputs_metadata.query((metadata_base.ALL_ELEMENTS,))['dimension']['length'] - - target_columns_metadata: List[OrderedDict] = [] - for column_index in range(outputs_length): - column_metadata = OrderedDict() - semantic_types = [] - semantic_types.append('https://metadata.datadrivendiscovery.org/types/PredictedTarget') - column_name = outputs_metadata.query((metadata_base.ALL_ELEMENTS, column_index)).get("name") - if column_name is None: - column_name = "output_{}".format(column_index) - column_metadata["semantic_types"] = semantic_types - column_metadata["name"] = str(column_name) - target_columns_metadata.append(column_metadata) - - return target_columns_metadata - - -SKDecisionTreeRegressor.__doc__ = DecisionTreeRegressor.__doc__ \ No newline at end of file diff --git a/common-primitives/sklearn-wrap/sklearn_wrap/SKDummyClassifier.py b/common-primitives/sklearn-wrap/sklearn_wrap/SKDummyClassifier.py deleted file mode 100644 index 4425428..0000000 --- a/common-primitives/sklearn-wrap/sklearn_wrap/SKDummyClassifier.py +++ /dev/null @@ -1,503 +0,0 @@ -from typing import Any, Callable, List, Dict, Union, Optional, Sequence, Tuple -from numpy import ndarray -from collections import OrderedDict -from scipy import sparse -import os -import sklearn -import numpy -import typing - -# Custom import commands if any -from sklearn.dummy import DummyClassifier - - -from d3m.container.numpy import ndarray as d3m_ndarray -from d3m.container import DataFrame as d3m_dataframe -from d3m.metadata import hyperparams, params, base as metadata_base -from d3m import utils -from d3m.base import utils as base_utils -from d3m.exceptions import PrimitiveNotFittedError -from d3m.primitive_interfaces.base import CallResult, DockerContainer - -from d3m.primitive_interfaces.supervised_learning import SupervisedLearnerPrimitiveBase -from d3m.primitive_interfaces.base import ProbabilisticCompositionalityMixin, ContinueFitMixin -from d3m import exceptions -import pandas - - - -Inputs = d3m_dataframe -Outputs = d3m_dataframe - - -class Params(params.Params): - classes_: Optional[ndarray] - n_classes_: Optional[Union[int,ndarray]] - class_prior_: Optional[ndarray] - n_outputs_: Optional[int] - sparse_output_: Optional[bool] - output_2d_: Optional[bool] - input_column_names: Optional[Any] - target_names_: Optional[Sequence[Any]] - training_indices_: Optional[Sequence[int]] - target_column_indices_: Optional[Sequence[int]] - target_columns_metadata_: Optional[List[OrderedDict]] - - - -class Hyperparams(hyperparams.Hyperparams): - strategy = hyperparams.Choice( - choices={ - 'stratified': hyperparams.Hyperparams.define( - configuration=OrderedDict({}) - ), - 'most_frequent': hyperparams.Hyperparams.define( - configuration=OrderedDict({}) - ), - 'prior': hyperparams.Hyperparams.define( - configuration=OrderedDict({}) - ), - 'uniform': hyperparams.Hyperparams.define( - configuration=OrderedDict({}) - ), - 'constant': hyperparams.Hyperparams.define( - configuration=OrderedDict({ - 'constant': hyperparams.Union( - configuration=OrderedDict({ - 'str': hyperparams.Hyperparameter[str]( - default='one', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'], - ), - 'int': hyperparams.Bounded[int]( - default=1, - lower=0, - upper=None, - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'], - ), - 'ndarray': hyperparams.Hyperparameter[ndarray]( - default=numpy.array([]), - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'], - ) - }), - default='int', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] - ) - }) - ) - }, - default='stratified', - description='Strategy to use to generate predictions. * "stratified": generates predictions by respecting the training set\'s class distribution. * "most_frequent": always predicts the most frequent label in the training set. * "prior": always predicts the class that maximizes the class prior (like "most_frequent") and ``predict_proba`` returns the class prior. * "uniform": generates predictions uniformly at random. * "constant": always predicts a constant label that is provided by the user. This is useful for metrics that evaluate a non-majority class .. versionadded:: 0.17 Dummy Classifier now supports prior fitting strategy using parameter *prior*.', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] - ) - - use_inputs_columns = hyperparams.Set( - elements=hyperparams.Hyperparameter[int](-1), - default=(), - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="A set of column indices to force primitive to use as training input. If any specified column cannot be parsed, it is skipped.", - ) - use_outputs_columns = hyperparams.Set( - elements=hyperparams.Hyperparameter[int](-1), - default=(), - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="A set of column indices to force primitive to use as training target. If any specified column cannot be parsed, it is skipped.", - ) - exclude_inputs_columns = hyperparams.Set( - elements=hyperparams.Hyperparameter[int](-1), - default=(), - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="A set of column indices to not use as training inputs. Applicable only if \"use_columns\" is not provided.", - ) - exclude_outputs_columns = hyperparams.Set( - elements=hyperparams.Hyperparameter[int](-1), - default=(), - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="A set of column indices to not use as training target. Applicable only if \"use_columns\" is not provided.", - ) - return_result = hyperparams.Enumeration( - values=['append', 'replace', 'new'], - default='new', - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="Should parsed columns be appended, should they replace original columns, or should only parsed columns be returned? This hyperparam is ignored if use_semantic_types is set to false.", - ) - use_semantic_types = hyperparams.UniformBool( - default=False, - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="Controls whether semantic_types metadata will be used for filtering columns in input dataframe. Setting this to false makes the code ignore return_result and will produce only the output dataframe" - ) - add_index_columns = hyperparams.UniformBool( - default=False, - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="Also include primary index columns if input data has them. Applicable only if \"return_result\" is set to \"new\".", - ) - error_on_no_input = hyperparams.UniformBool( - default=True, - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="Throw an exception if no input column is selected/provided. Defaults to true to behave like sklearn. To prevent pipelines from breaking set this to False.", - ) - - return_semantic_type = hyperparams.Enumeration[str]( - values=['https://metadata.datadrivendiscovery.org/types/Attribute', 'https://metadata.datadrivendiscovery.org/types/ConstructedAttribute', 'https://metadata.datadrivendiscovery.org/types/PredictedTarget'], - default='https://metadata.datadrivendiscovery.org/types/PredictedTarget', - description='Decides what semantic type to attach to generated output', - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'] - ) - -class SKDummyClassifier(SupervisedLearnerPrimitiveBase[Inputs, Outputs, Params, Hyperparams], - ProbabilisticCompositionalityMixin[Inputs, Outputs, Params, Hyperparams]): - """ - Primitive wrapping for sklearn DummyClassifier - `sklearn documentation `_ - - """ - - __author__ = "JPL MARVIN" - metadata = metadata_base.PrimitiveMetadata({ - "algorithm_types": [metadata_base.PrimitiveAlgorithmType.RULE_BASED_MACHINE_LEARNING, ], - "name": "sklearn.dummy.DummyClassifier", - "primitive_family": metadata_base.PrimitiveFamily.CLASSIFICATION, - "python_path": "d3m.primitives.classification.dummy.SKlearn", - "source": {'name': 'JPL', 'contact': 'mailto:shah@jpl.nasa.gov', 'uris': ['https://gitlab.com/datadrivendiscovery/sklearn-wrap/issues', 'https://scikit-learn.org/stable/modules/generated/sklearn.dummy.DummyClassifier.html']}, - "version": "2019.11.13", - "id": "a1056ddf-2e89-3d8d-8308-2146170ae54d", - "hyperparams_to_tune": ['strategy'], - 'installation': [ - {'type': metadata_base.PrimitiveInstallationType.PIP, - 'package_uri': 'git+https://gitlab.com/datadrivendiscovery/sklearn-wrap.git@{git_commit}#egg=sklearn_wrap'.format( - git_commit=utils.current_git_commit(os.path.dirname(__file__)), - ), - }] - }) - - def __init__(self, *, - hyperparams: Hyperparams, - random_seed: int = 0, - docker_containers: Dict[str, DockerContainer] = None) -> None: - - super().__init__(hyperparams=hyperparams, random_seed=random_seed, docker_containers=docker_containers) - - # False - self._clf = DummyClassifier( - strategy=self.hyperparams['strategy']['choice'], - constant=self.hyperparams['strategy'].get('constant', 'int'), - random_state=self.random_seed, - ) - - self._inputs = None - self._outputs = None - self._training_inputs = None - self._training_outputs = None - self._target_names = None - self._training_indices = None - self._target_column_indices = None - self._target_columns_metadata: List[OrderedDict] = None - self._input_column_names = None - self._fitted = False - self._new_training_data = False - - def set_training_data(self, *, inputs: Inputs, outputs: Outputs) -> None: - self._inputs = inputs - self._outputs = outputs - self._fitted = False - self._new_training_data = True - - def fit(self, *, timeout: float = None, iterations: int = None) -> CallResult[None]: - if self._inputs is None or self._outputs is None: - raise ValueError("Missing training data.") - - if not self._new_training_data: - return CallResult(None) - self._new_training_data = False - - self._training_inputs, self._training_indices = self._get_columns_to_fit(self._inputs, self.hyperparams) - self._training_outputs, self._target_names, self._target_column_indices = self._get_targets(self._outputs, self.hyperparams) - self._input_column_names = self._training_inputs.columns - - if len(self._training_indices) > 0 and len(self._target_column_indices) > 0: - self._target_columns_metadata = self._get_target_columns_metadata(self._training_outputs.metadata, self.hyperparams) - sk_training_output = self._training_outputs.values - - shape = sk_training_output.shape - if len(shape) == 2 and shape[1] == 1: - sk_training_output = numpy.ravel(sk_training_output) - - self._clf.fit(self._training_inputs, sk_training_output) - self._fitted = True - else: - if self.hyperparams['error_on_no_input']: - raise RuntimeError("No input columns were selected") - self.logger.warn("No input columns were selected") - - return CallResult(None) - - - - def produce(self, *, inputs: Inputs, timeout: float = None, iterations: int = None) -> CallResult[Outputs]: - sk_inputs, columns_to_use = self._get_columns_to_fit(inputs, self.hyperparams) - output = [] - if len(sk_inputs.columns): - try: - sk_output = self._clf.predict(sk_inputs) - except sklearn.exceptions.NotFittedError as error: - raise PrimitiveNotFittedError("Primitive not fitted.") from error - # For primitives that allow predicting without fitting like GaussianProcessRegressor - if not self._fitted: - raise PrimitiveNotFittedError("Primitive not fitted.") - if sparse.issparse(sk_output): - sk_output = sk_output.toarray() - output = self._wrap_predictions(inputs, sk_output) - output.columns = self._target_names - output = [output] - else: - if self.hyperparams['error_on_no_input']: - raise RuntimeError("No input columns were selected") - self.logger.warn("No input columns were selected") - outputs = base_utils.combine_columns(return_result=self.hyperparams['return_result'], - add_index_columns=self.hyperparams['add_index_columns'], - inputs=inputs, column_indices=self._target_column_indices, - columns_list=output) - - return CallResult(outputs) - - - def get_params(self) -> Params: - if not self._fitted: - return Params( - classes_=None, - n_classes_=None, - class_prior_=None, - n_outputs_=None, - sparse_output_=None, - output_2d_=None, - input_column_names=self._input_column_names, - training_indices_=self._training_indices, - target_names_=self._target_names, - target_column_indices_=self._target_column_indices, - target_columns_metadata_=self._target_columns_metadata - ) - - return Params( - classes_=getattr(self._clf, 'classes_', None), - n_classes_=getattr(self._clf, 'n_classes_', None), - class_prior_=getattr(self._clf, 'class_prior_', None), - n_outputs_=getattr(self._clf, 'n_outputs_', None), - sparse_output_=getattr(self._clf, 'sparse_output_', None), - output_2d_=getattr(self._clf, 'output_2d_', None), - input_column_names=self._input_column_names, - training_indices_=self._training_indices, - target_names_=self._target_names, - target_column_indices_=self._target_column_indices, - target_columns_metadata_=self._target_columns_metadata - ) - - def set_params(self, *, params: Params) -> None: - self._clf.classes_ = params['classes_'] - self._clf.n_classes_ = params['n_classes_'] - self._clf.class_prior_ = params['class_prior_'] - self._clf.n_outputs_ = params['n_outputs_'] - self._clf.sparse_output_ = params['sparse_output_'] - self._clf.output_2d_ = params['output_2d_'] - self._input_column_names = params['input_column_names'] - self._training_indices = params['training_indices_'] - self._target_names = params['target_names_'] - self._target_column_indices = params['target_column_indices_'] - self._target_columns_metadata = params['target_columns_metadata_'] - - if params['classes_'] is not None: - self._fitted = True - if params['n_classes_'] is not None: - self._fitted = True - if params['class_prior_'] is not None: - self._fitted = True - if params['n_outputs_'] is not None: - self._fitted = True - if params['sparse_output_'] is not None: - self._fitted = True - if params['output_2d_'] is not None: - self._fitted = True - - - def log_likelihoods(self, *, - outputs: Outputs, - inputs: Inputs, - timeout: float = None, - iterations: int = None) -> CallResult[Sequence[float]]: - inputs = inputs.iloc[:, self._training_indices] # Get ndarray - outputs = outputs.iloc[:, self._target_column_indices] - - if len(inputs.columns) and len(outputs.columns): - - if outputs.shape[1] != self._clf.n_outputs_: - raise exceptions.InvalidArgumentValueError("\"outputs\" argument does not have the correct number of target columns.") - - log_proba = self._clf.predict_log_proba(inputs) - - # Making it always a list, even when only one target. - if self._clf.n_outputs_ == 1: - log_proba = [log_proba] - classes = [self._clf.classes_] - else: - classes = self._clf.classes_ - - samples_length = inputs.shape[0] - - log_likelihoods = [] - for k in range(self._clf.n_outputs_): - # We have to map each class to its internal (numerical) index used in the learner. - # This allows "outputs" to contain string classes. - outputs_column = outputs.iloc[:, k] - classes_map = pandas.Series(numpy.arange(len(classes[k])), index=classes[k]) - mapped_outputs_column = outputs_column.map(classes_map) - - # For each target column (column in "outputs"), for each sample (row) we pick the log - # likelihood for a given class. - log_likelihoods.append(log_proba[k][numpy.arange(samples_length), mapped_outputs_column]) - - results = d3m_dataframe(dict(enumerate(log_likelihoods)), generate_metadata=True) - results.columns = outputs.columns - - for k in range(self._clf.n_outputs_): - column_metadata = outputs.metadata.query_column(k) - if 'name' in column_metadata: - results.metadata = results.metadata.update_column(k, {'name': column_metadata['name']}) - - else: - results = d3m_dataframe(generate_metadata=True) - - return CallResult(results) - - - - - - @classmethod - def _get_columns_to_fit(cls, inputs: Inputs, hyperparams: Hyperparams): - if not hyperparams['use_semantic_types']: - return inputs, list(range(len(inputs.columns))) - - inputs_metadata = inputs.metadata - - def can_produce_column(column_index: int) -> bool: - return cls._can_produce_column(inputs_metadata, column_index, hyperparams) - - columns_to_produce, columns_not_to_produce = base_utils.get_columns_to_use(inputs_metadata, - use_columns=hyperparams['use_inputs_columns'], - exclude_columns=hyperparams['exclude_inputs_columns'], - can_use_column=can_produce_column) - return inputs.iloc[:, columns_to_produce], columns_to_produce - # return columns_to_produce - - @classmethod - def _can_produce_column(cls, inputs_metadata: metadata_base.DataMetadata, column_index: int, hyperparams: Hyperparams) -> bool: - column_metadata = inputs_metadata.query((metadata_base.ALL_ELEMENTS, column_index)) - - accepted_structural_types = (int, float, numpy.integer, numpy.float64) - accepted_semantic_types = set() - accepted_semantic_types.add("https://metadata.datadrivendiscovery.org/types/Attribute") - if not issubclass(column_metadata['structural_type'], accepted_structural_types): - return False - - semantic_types = set(column_metadata.get('semantic_types', [])) - - if len(semantic_types) == 0: - cls.logger.warning("No semantic types found in column metadata") - return False - # Making sure all accepted_semantic_types are available in semantic_types - if len(accepted_semantic_types - semantic_types) == 0: - return True - - return False - - @classmethod - def _get_targets(cls, data: d3m_dataframe, hyperparams: Hyperparams): - if not hyperparams['use_semantic_types']: - return data, list(data.columns), list(range(len(data.columns))) - - metadata = data.metadata - - def can_produce_column(column_index: int) -> bool: - accepted_semantic_types = set() - accepted_semantic_types.add("https://metadata.datadrivendiscovery.org/types/TrueTarget") - column_metadata = metadata.query((metadata_base.ALL_ELEMENTS, column_index)) - semantic_types = set(column_metadata.get('semantic_types', [])) - if len(semantic_types) == 0: - cls.logger.warning("No semantic types found in column metadata") - return False - # Making sure all accepted_semantic_types are available in semantic_types - if len(accepted_semantic_types - semantic_types) == 0: - return True - return False - - target_column_indices, target_columns_not_to_produce = base_utils.get_columns_to_use(metadata, - use_columns=hyperparams[ - 'use_outputs_columns'], - exclude_columns= - hyperparams[ - 'exclude_outputs_columns'], - can_use_column=can_produce_column) - targets = [] - if target_column_indices: - targets = data.select_columns(target_column_indices) - target_column_names = [] - for idx in target_column_indices: - target_column_names.append(data.columns[idx]) - return targets, target_column_names, target_column_indices - - @classmethod - def _get_target_columns_metadata(cls, outputs_metadata: metadata_base.DataMetadata, hyperparams) -> List[OrderedDict]: - outputs_length = outputs_metadata.query((metadata_base.ALL_ELEMENTS,))['dimension']['length'] - - target_columns_metadata: List[OrderedDict] = [] - for column_index in range(outputs_length): - column_metadata = OrderedDict(outputs_metadata.query_column(column_index)) - - # Update semantic types and prepare it for predicted targets. - semantic_types = set(column_metadata.get('semantic_types', [])) - semantic_types_to_remove = set(["https://metadata.datadrivendiscovery.org/types/TrueTarget","https://metadata.datadrivendiscovery.org/types/SuggestedTarget",]) - add_semantic_types = set(["https://metadata.datadrivendiscovery.org/types/PredictedTarget",]) - add_semantic_types.add(hyperparams["return_semantic_type"]) - semantic_types = semantic_types - semantic_types_to_remove - semantic_types = semantic_types.union(add_semantic_types) - column_metadata['semantic_types'] = list(semantic_types) - - target_columns_metadata.append(column_metadata) - - return target_columns_metadata - - @classmethod - def _update_predictions_metadata(cls, inputs_metadata: metadata_base.DataMetadata, outputs: Optional[Outputs], - target_columns_metadata: List[OrderedDict]) -> metadata_base.DataMetadata: - outputs_metadata = metadata_base.DataMetadata().generate(value=outputs) - - for column_index, column_metadata in enumerate(target_columns_metadata): - column_metadata.pop("structural_type", None) - outputs_metadata = outputs_metadata.update_column(column_index, column_metadata) - - return outputs_metadata - - def _wrap_predictions(self, inputs: Inputs, predictions: ndarray) -> Outputs: - outputs = d3m_dataframe(predictions, generate_metadata=False) - outputs.metadata = self._update_predictions_metadata(inputs.metadata, outputs, self._target_columns_metadata) - return outputs - - - @classmethod - def _add_target_columns_metadata(cls, outputs_metadata: metadata_base.DataMetadata): - outputs_length = outputs_metadata.query((metadata_base.ALL_ELEMENTS,))['dimension']['length'] - - target_columns_metadata: List[OrderedDict] = [] - for column_index in range(outputs_length): - column_metadata = OrderedDict() - semantic_types = [] - semantic_types.append('https://metadata.datadrivendiscovery.org/types/PredictedTarget') - column_name = outputs_metadata.query((metadata_base.ALL_ELEMENTS, column_index)).get("name") - if column_name is None: - column_name = "output_{}".format(column_index) - column_metadata["semantic_types"] = semantic_types - column_metadata["name"] = str(column_name) - target_columns_metadata.append(column_metadata) - - return target_columns_metadata - - -SKDummyClassifier.__doc__ = DummyClassifier.__doc__ \ No newline at end of file diff --git a/common-primitives/sklearn-wrap/sklearn_wrap/SKDummyRegressor.py b/common-primitives/sklearn-wrap/sklearn_wrap/SKDummyRegressor.py deleted file mode 100644 index 020942d..0000000 --- a/common-primitives/sklearn-wrap/sklearn_wrap/SKDummyRegressor.py +++ /dev/null @@ -1,442 +0,0 @@ -from typing import Any, Callable, List, Dict, Union, Optional, Sequence, Tuple -from numpy import ndarray -from collections import OrderedDict -from scipy import sparse -import os -import sklearn -import numpy -import typing - -# Custom import commands if any -from sklearn.dummy import DummyRegressor - - -from d3m.container.numpy import ndarray as d3m_ndarray -from d3m.container import DataFrame as d3m_dataframe -from d3m.metadata import hyperparams, params, base as metadata_base -from d3m import utils -from d3m.base import utils as base_utils -from d3m.exceptions import PrimitiveNotFittedError -from d3m.primitive_interfaces.base import CallResult, DockerContainer - -from d3m.primitive_interfaces.supervised_learning import SupervisedLearnerPrimitiveBase -from d3m.primitive_interfaces.base import ProbabilisticCompositionalityMixin, ContinueFitMixin -from d3m import exceptions -import pandas - - - -Inputs = d3m_dataframe -Outputs = d3m_dataframe - - -class Params(params.Params): - constant_: Optional[Union[float, ndarray]] - n_outputs_: Optional[int] - output_2d_: Optional[bool] - input_column_names: Optional[Any] - target_names_: Optional[Sequence[Any]] - training_indices_: Optional[Sequence[int]] - target_column_indices_: Optional[Sequence[int]] - target_columns_metadata_: Optional[List[OrderedDict]] - - - -class Hyperparams(hyperparams.Hyperparams): - strategy = hyperparams.Choice( - choices={ - 'mean': hyperparams.Hyperparams.define( - configuration=OrderedDict({}) - ), - 'median': hyperparams.Hyperparams.define( - configuration=OrderedDict({}) - ), - 'quantile': hyperparams.Hyperparams.define( - configuration=OrderedDict({ - 'quantile': hyperparams.Uniform( - default=0.5, - lower=0, - upper=1.0, - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] - ) - }) - ), - 'constant': hyperparams.Hyperparams.define( - configuration=OrderedDict({ - 'constant': hyperparams.Union( - configuration=OrderedDict({ - 'float': hyperparams.Bounded[float]( - lower=0, - upper=None, - default=1.0, - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'], - ), - 'int': hyperparams.Bounded[int]( - default=1, - lower=0, - upper=None, - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'], - ), - 'ndarray': hyperparams.Hyperparameter[ndarray]( - default=numpy.array([]), - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'], - ) - }), - default='float', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] - ) - }) - ) - }, - default='mean', - description='Strategy to use to generate predictions. * "mean": always predicts the mean of the training set * "median": always predicts the median of the training set * "quantile": always predicts a specified quantile of the training set, provided with the quantile parameter. * "constant": always predicts a constant value that is provided by the user.', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] - ) - - use_inputs_columns = hyperparams.Set( - elements=hyperparams.Hyperparameter[int](-1), - default=(), - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="A set of column indices to force primitive to use as training input. If any specified column cannot be parsed, it is skipped.", - ) - use_outputs_columns = hyperparams.Set( - elements=hyperparams.Hyperparameter[int](-1), - default=(), - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="A set of column indices to force primitive to use as training target. If any specified column cannot be parsed, it is skipped.", - ) - exclude_inputs_columns = hyperparams.Set( - elements=hyperparams.Hyperparameter[int](-1), - default=(), - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="A set of column indices to not use as training inputs. Applicable only if \"use_columns\" is not provided.", - ) - exclude_outputs_columns = hyperparams.Set( - elements=hyperparams.Hyperparameter[int](-1), - default=(), - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="A set of column indices to not use as training target. Applicable only if \"use_columns\" is not provided.", - ) - return_result = hyperparams.Enumeration( - values=['append', 'replace', 'new'], - default='new', - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="Should parsed columns be appended, should they replace original columns, or should only parsed columns be returned? This hyperparam is ignored if use_semantic_types is set to false.", - ) - use_semantic_types = hyperparams.UniformBool( - default=False, - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="Controls whether semantic_types metadata will be used for filtering columns in input dataframe. Setting this to false makes the code ignore return_result and will produce only the output dataframe" - ) - add_index_columns = hyperparams.UniformBool( - default=False, - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="Also include primary index columns if input data has them. Applicable only if \"return_result\" is set to \"new\".", - ) - error_on_no_input = hyperparams.UniformBool( - default=True, - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="Throw an exception if no input column is selected/provided. Defaults to true to behave like sklearn. To prevent pipelines from breaking set this to False.", - ) - - return_semantic_type = hyperparams.Enumeration[str]( - values=['https://metadata.datadrivendiscovery.org/types/Attribute', 'https://metadata.datadrivendiscovery.org/types/ConstructedAttribute', 'https://metadata.datadrivendiscovery.org/types/PredictedTarget'], - default='https://metadata.datadrivendiscovery.org/types/PredictedTarget', - description='Decides what semantic type to attach to generated output', - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'] - ) - -class SKDummyRegressor(SupervisedLearnerPrimitiveBase[Inputs, Outputs, Params, Hyperparams]): - """ - Primitive wrapping for sklearn DummyRegressor - `sklearn documentation `_ - - """ - - __author__ = "JPL MARVIN" - metadata = metadata_base.PrimitiveMetadata({ - "algorithm_types": [metadata_base.PrimitiveAlgorithmType.RULE_BASED_MACHINE_LEARNING, ], - "name": "sklearn.dummy.DummyRegressor", - "primitive_family": metadata_base.PrimitiveFamily.REGRESSION, - "python_path": "d3m.primitives.regression.dummy.SKlearn", - "source": {'name': 'JPL', 'contact': 'mailto:shah@jpl.nasa.gov', 'uris': ['https://gitlab.com/datadrivendiscovery/sklearn-wrap/issues', 'https://scikit-learn.org/stable/modules/generated/sklearn.dummy.DummyRegressor.html']}, - "version": "2019.11.13", - "id": "05aa5b6a-3b27-34dc-9ba7-8511fb13f253", - "hyperparams_to_tune": ['strategy'], - 'installation': [ - {'type': metadata_base.PrimitiveInstallationType.PIP, - 'package_uri': 'git+https://gitlab.com/datadrivendiscovery/sklearn-wrap.git@{git_commit}#egg=sklearn_wrap'.format( - git_commit=utils.current_git_commit(os.path.dirname(__file__)), - ), - }] - }) - - def __init__(self, *, - hyperparams: Hyperparams, - random_seed: int = 0, - docker_containers: Dict[str, DockerContainer] = None) -> None: - - super().__init__(hyperparams=hyperparams, random_seed=random_seed, docker_containers=docker_containers) - - # False - self._clf = DummyRegressor( - strategy=self.hyperparams['strategy']['choice'], - quantile=self.hyperparams['strategy'].get('quantile', 0.5), - constant=self.hyperparams['strategy'].get('constant', 'float'), - ) - - self._inputs = None - self._outputs = None - self._training_inputs = None - self._training_outputs = None - self._target_names = None - self._training_indices = None - self._target_column_indices = None - self._target_columns_metadata: List[OrderedDict] = None - self._input_column_names = None - self._fitted = False - self._new_training_data = False - - def set_training_data(self, *, inputs: Inputs, outputs: Outputs) -> None: - self._inputs = inputs - self._outputs = outputs - self._fitted = False - self._new_training_data = True - - def fit(self, *, timeout: float = None, iterations: int = None) -> CallResult[None]: - if self._inputs is None or self._outputs is None: - raise ValueError("Missing training data.") - - if not self._new_training_data: - return CallResult(None) - self._new_training_data = False - - self._training_inputs, self._training_indices = self._get_columns_to_fit(self._inputs, self.hyperparams) - self._training_outputs, self._target_names, self._target_column_indices = self._get_targets(self._outputs, self.hyperparams) - self._input_column_names = self._training_inputs.columns - - if len(self._training_indices) > 0 and len(self._target_column_indices) > 0: - self._target_columns_metadata = self._get_target_columns_metadata(self._training_outputs.metadata, self.hyperparams) - sk_training_output = self._training_outputs.values - - shape = sk_training_output.shape - if len(shape) == 2 and shape[1] == 1: - sk_training_output = numpy.ravel(sk_training_output) - - self._clf.fit(self._training_inputs, sk_training_output) - self._fitted = True - else: - if self.hyperparams['error_on_no_input']: - raise RuntimeError("No input columns were selected") - self.logger.warn("No input columns were selected") - - return CallResult(None) - - - - def produce(self, *, inputs: Inputs, timeout: float = None, iterations: int = None) -> CallResult[Outputs]: - sk_inputs, columns_to_use = self._get_columns_to_fit(inputs, self.hyperparams) - output = [] - if len(sk_inputs.columns): - try: - sk_output = self._clf.predict(sk_inputs) - except sklearn.exceptions.NotFittedError as error: - raise PrimitiveNotFittedError("Primitive not fitted.") from error - # For primitives that allow predicting without fitting like GaussianProcessRegressor - if not self._fitted: - raise PrimitiveNotFittedError("Primitive not fitted.") - if sparse.issparse(sk_output): - sk_output = sk_output.toarray() - output = self._wrap_predictions(inputs, sk_output) - output.columns = self._target_names - output = [output] - else: - if self.hyperparams['error_on_no_input']: - raise RuntimeError("No input columns were selected") - self.logger.warn("No input columns were selected") - outputs = base_utils.combine_columns(return_result=self.hyperparams['return_result'], - add_index_columns=self.hyperparams['add_index_columns'], - inputs=inputs, column_indices=self._target_column_indices, - columns_list=output) - - return CallResult(outputs) - - - def get_params(self) -> Params: - if not self._fitted: - return Params( - constant_=None, - n_outputs_=None, - output_2d_=None, - input_column_names=self._input_column_names, - training_indices_=self._training_indices, - target_names_=self._target_names, - target_column_indices_=self._target_column_indices, - target_columns_metadata_=self._target_columns_metadata - ) - - return Params( - constant_=getattr(self._clf, 'constant_', None), - n_outputs_=getattr(self._clf, 'n_outputs_', None), - output_2d_=getattr(self._clf, 'output_2d_', None), - input_column_names=self._input_column_names, - training_indices_=self._training_indices, - target_names_=self._target_names, - target_column_indices_=self._target_column_indices, - target_columns_metadata_=self._target_columns_metadata - ) - - def set_params(self, *, params: Params) -> None: - self._clf.constant_ = params['constant_'] - self._clf.n_outputs_ = params['n_outputs_'] - self._clf.output_2d_ = params['output_2d_'] - self._input_column_names = params['input_column_names'] - self._training_indices = params['training_indices_'] - self._target_names = params['target_names_'] - self._target_column_indices = params['target_column_indices_'] - self._target_columns_metadata = params['target_columns_metadata_'] - - if params['constant_'] is not None: - self._fitted = True - if params['n_outputs_'] is not None: - self._fitted = True - if params['output_2d_'] is not None: - self._fitted = True - - - - - - - - @classmethod - def _get_columns_to_fit(cls, inputs: Inputs, hyperparams: Hyperparams): - if not hyperparams['use_semantic_types']: - return inputs, list(range(len(inputs.columns))) - - inputs_metadata = inputs.metadata - - def can_produce_column(column_index: int) -> bool: - return cls._can_produce_column(inputs_metadata, column_index, hyperparams) - - columns_to_produce, columns_not_to_produce = base_utils.get_columns_to_use(inputs_metadata, - use_columns=hyperparams['use_inputs_columns'], - exclude_columns=hyperparams['exclude_inputs_columns'], - can_use_column=can_produce_column) - return inputs.iloc[:, columns_to_produce], columns_to_produce - # return columns_to_produce - - @classmethod - def _can_produce_column(cls, inputs_metadata: metadata_base.DataMetadata, column_index: int, hyperparams: Hyperparams) -> bool: - column_metadata = inputs_metadata.query((metadata_base.ALL_ELEMENTS, column_index)) - - accepted_structural_types = (int, float, numpy.integer, numpy.float64) - accepted_semantic_types = set() - accepted_semantic_types.add("https://metadata.datadrivendiscovery.org/types/Attribute") - if not issubclass(column_metadata['structural_type'], accepted_structural_types): - return False - - semantic_types = set(column_metadata.get('semantic_types', [])) - - if len(semantic_types) == 0: - cls.logger.warning("No semantic types found in column metadata") - return False - # Making sure all accepted_semantic_types are available in semantic_types - if len(accepted_semantic_types - semantic_types) == 0: - return True - - return False - - @classmethod - def _get_targets(cls, data: d3m_dataframe, hyperparams: Hyperparams): - if not hyperparams['use_semantic_types']: - return data, list(data.columns), list(range(len(data.columns))) - - metadata = data.metadata - - def can_produce_column(column_index: int) -> bool: - accepted_semantic_types = set() - accepted_semantic_types.add("https://metadata.datadrivendiscovery.org/types/TrueTarget") - column_metadata = metadata.query((metadata_base.ALL_ELEMENTS, column_index)) - semantic_types = set(column_metadata.get('semantic_types', [])) - if len(semantic_types) == 0: - cls.logger.warning("No semantic types found in column metadata") - return False - # Making sure all accepted_semantic_types are available in semantic_types - if len(accepted_semantic_types - semantic_types) == 0: - return True - return False - - target_column_indices, target_columns_not_to_produce = base_utils.get_columns_to_use(metadata, - use_columns=hyperparams[ - 'use_outputs_columns'], - exclude_columns= - hyperparams[ - 'exclude_outputs_columns'], - can_use_column=can_produce_column) - targets = [] - if target_column_indices: - targets = data.select_columns(target_column_indices) - target_column_names = [] - for idx in target_column_indices: - target_column_names.append(data.columns[idx]) - return targets, target_column_names, target_column_indices - - @classmethod - def _get_target_columns_metadata(cls, outputs_metadata: metadata_base.DataMetadata, hyperparams) -> List[OrderedDict]: - outputs_length = outputs_metadata.query((metadata_base.ALL_ELEMENTS,))['dimension']['length'] - - target_columns_metadata: List[OrderedDict] = [] - for column_index in range(outputs_length): - column_metadata = OrderedDict(outputs_metadata.query_column(column_index)) - - # Update semantic types and prepare it for predicted targets. - semantic_types = set(column_metadata.get('semantic_types', [])) - semantic_types_to_remove = set(["https://metadata.datadrivendiscovery.org/types/TrueTarget","https://metadata.datadrivendiscovery.org/types/SuggestedTarget",]) - add_semantic_types = set(["https://metadata.datadrivendiscovery.org/types/PredictedTarget",]) - add_semantic_types.add(hyperparams["return_semantic_type"]) - semantic_types = semantic_types - semantic_types_to_remove - semantic_types = semantic_types.union(add_semantic_types) - column_metadata['semantic_types'] = list(semantic_types) - - target_columns_metadata.append(column_metadata) - - return target_columns_metadata - - @classmethod - def _update_predictions_metadata(cls, inputs_metadata: metadata_base.DataMetadata, outputs: Optional[Outputs], - target_columns_metadata: List[OrderedDict]) -> metadata_base.DataMetadata: - outputs_metadata = metadata_base.DataMetadata().generate(value=outputs) - - for column_index, column_metadata in enumerate(target_columns_metadata): - column_metadata.pop("structural_type", None) - outputs_metadata = outputs_metadata.update_column(column_index, column_metadata) - - return outputs_metadata - - def _wrap_predictions(self, inputs: Inputs, predictions: ndarray) -> Outputs: - outputs = d3m_dataframe(predictions, generate_metadata=False) - outputs.metadata = self._update_predictions_metadata(inputs.metadata, outputs, self._target_columns_metadata) - return outputs - - - @classmethod - def _add_target_columns_metadata(cls, outputs_metadata: metadata_base.DataMetadata): - outputs_length = outputs_metadata.query((metadata_base.ALL_ELEMENTS,))['dimension']['length'] - - target_columns_metadata: List[OrderedDict] = [] - for column_index in range(outputs_length): - column_metadata = OrderedDict() - semantic_types = [] - semantic_types.append('https://metadata.datadrivendiscovery.org/types/PredictedTarget') - column_name = outputs_metadata.query((metadata_base.ALL_ELEMENTS, column_index)).get("name") - if column_name is None: - column_name = "output_{}".format(column_index) - column_metadata["semantic_types"] = semantic_types - column_metadata["name"] = str(column_name) - target_columns_metadata.append(column_metadata) - - return target_columns_metadata - - -SKDummyRegressor.__doc__ = DummyRegressor.__doc__ \ No newline at end of file diff --git a/common-primitives/sklearn-wrap/sklearn_wrap/SKElasticNet.py b/common-primitives/sklearn-wrap/sklearn_wrap/SKElasticNet.py deleted file mode 100644 index 894fcad..0000000 --- a/common-primitives/sklearn-wrap/sklearn_wrap/SKElasticNet.py +++ /dev/null @@ -1,466 +0,0 @@ -from typing import Any, Callable, List, Dict, Union, Optional, Sequence, Tuple -from numpy import ndarray -from collections import OrderedDict -from scipy import sparse -import os -import sklearn -import numpy -import typing - -# Custom import commands if any -from sklearn.linear_model.coordinate_descent import ElasticNet - - -from d3m.container.numpy import ndarray as d3m_ndarray -from d3m.container import DataFrame as d3m_dataframe -from d3m.metadata import hyperparams, params, base as metadata_base -from d3m import utils -from d3m.base import utils as base_utils -from d3m.exceptions import PrimitiveNotFittedError -from d3m.primitive_interfaces.base import CallResult, DockerContainer - -from d3m.primitive_interfaces.supervised_learning import SupervisedLearnerPrimitiveBase -from d3m.primitive_interfaces.base import ProbabilisticCompositionalityMixin, ContinueFitMixin -from d3m import exceptions -import pandas - - - -Inputs = d3m_dataframe -Outputs = d3m_dataframe - - -class Params(params.Params): - coef_: Optional[ndarray] - intercept_: Optional[float] - n_iter_: Optional[int] - dual_gap_: Optional[float] - input_column_names: Optional[Any] - target_names_: Optional[Sequence[Any]] - training_indices_: Optional[Sequence[int]] - target_column_indices_: Optional[Sequence[int]] - target_columns_metadata_: Optional[List[OrderedDict]] - - - -class Hyperparams(hyperparams.Hyperparams): - alpha = hyperparams.Bounded[float]( - default=1.0, - lower=0, - upper=None, - description='Constant that multiplies the penalty terms. Defaults to 1.0. See the notes for the exact mathematical meaning of this parameter.``alpha = 0`` is equivalent to an ordinary least square, solved by the :class:`LinearRegression` object. For numerical reasons, using ``alpha = 0`` with the ``Lasso`` object is not advised. Given this, you should use the :class:`LinearRegression` object.', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] - ) - l1_ratio = hyperparams.Uniform( - default=0.5, - lower=0, - upper=1, - description='The ElasticNet mixing parameter, with ``0 <= l1_ratio <= 1``. For ``l1_ratio = 0`` the penalty is an L2 penalty. ``For l1_ratio = 1`` it is an L1 penalty. For ``0 < l1_ratio < 1``, the penalty is a combination of L1 and L2.', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] - ) - fit_intercept = hyperparams.UniformBool( - default=True, - description='Whether the intercept should be estimated or not. If ``False``, the data is assumed to be already centered.', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] - ) - normalize = hyperparams.UniformBool( - default=False, - description='This parameter is ignored when ``fit_intercept`` is set to False. If True, the regressors X will be normalized before regression by subtracting the mean and dividing by the l2-norm. If you wish to standardize, please use :class:`sklearn.preprocessing.StandardScaler` before calling ``fit`` on an estimator with ``normalize=False``.', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] - ) - precompute = hyperparams.UniformBool( - default=False, - description='Whether to use a precomputed Gram matrix to speed up calculations. The Gram matrix can also be passed as argument. For sparse input this option is always ``True`` to preserve sparsity.', - semantic_types=['https://metadata.datadrivendiscovery.org/types/ResourcesUseParameter'] - ) - max_iter = hyperparams.Bounded[int]( - default=1000, - lower=0, - upper=None, - description='The maximum number of iterations', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] - ) - tol = hyperparams.Bounded[float]( - default=0.0001, - lower=0, - upper=None, - description='The tolerance for the optimization: if the updates are smaller than ``tol``, the optimization code checks the dual gap for optimality and continues until it is smaller than ``tol``.', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] - ) - positive = hyperparams.UniformBool( - default=False, - description='When set to ``True``, forces the coefficients to be positive.', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] - ) - selection = hyperparams.Enumeration[str]( - default='cyclic', - values=['cyclic', 'random'], - description='If set to \'random\', a random coefficient is updated every iteration rather than looping over features sequentially by default. This (setting to \'random\') often leads to significantly faster convergence especially when tol is higher than 1e-4.', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] - ) - warm_start = hyperparams.UniformBool( - default=False, - description='When set to ``True``, reuse the solution of the previous call to fit as initialization, otherwise, just erase the previous solution. See :term:`the Glossary `.', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] - ) - - use_inputs_columns = hyperparams.Set( - elements=hyperparams.Hyperparameter[int](-1), - default=(), - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="A set of column indices to force primitive to use as training input. If any specified column cannot be parsed, it is skipped.", - ) - use_outputs_columns = hyperparams.Set( - elements=hyperparams.Hyperparameter[int](-1), - default=(), - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="A set of column indices to force primitive to use as training target. If any specified column cannot be parsed, it is skipped.", - ) - exclude_inputs_columns = hyperparams.Set( - elements=hyperparams.Hyperparameter[int](-1), - default=(), - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="A set of column indices to not use as training inputs. Applicable only if \"use_columns\" is not provided.", - ) - exclude_outputs_columns = hyperparams.Set( - elements=hyperparams.Hyperparameter[int](-1), - default=(), - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="A set of column indices to not use as training target. Applicable only if \"use_columns\" is not provided.", - ) - return_result = hyperparams.Enumeration( - values=['append', 'replace', 'new'], - default='new', - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="Should parsed columns be appended, should they replace original columns, or should only parsed columns be returned? This hyperparam is ignored if use_semantic_types is set to false.", - ) - use_semantic_types = hyperparams.UniformBool( - default=False, - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="Controls whether semantic_types metadata will be used for filtering columns in input dataframe. Setting this to false makes the code ignore return_result and will produce only the output dataframe" - ) - add_index_columns = hyperparams.UniformBool( - default=False, - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="Also include primary index columns if input data has them. Applicable only if \"return_result\" is set to \"new\".", - ) - error_on_no_input = hyperparams.UniformBool( - default=True, - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="Throw an exception if no input column is selected/provided. Defaults to true to behave like sklearn. To prevent pipelines from breaking set this to False.", - ) - - return_semantic_type = hyperparams.Enumeration[str]( - values=['https://metadata.datadrivendiscovery.org/types/Attribute', 'https://metadata.datadrivendiscovery.org/types/ConstructedAttribute', 'https://metadata.datadrivendiscovery.org/types/PredictedTarget'], - default='https://metadata.datadrivendiscovery.org/types/PredictedTarget', - description='Decides what semantic type to attach to generated output', - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'] - ) - -class SKElasticNet(SupervisedLearnerPrimitiveBase[Inputs, Outputs, Params, Hyperparams]): - """ - Primitive wrapping for sklearn ElasticNet - `sklearn documentation `_ - - """ - - __author__ = "JPL MARVIN" - metadata = metadata_base.PrimitiveMetadata({ - "algorithm_types": [metadata_base.PrimitiveAlgorithmType.ELASTIC_NET_REGULARIZATION, ], - "name": "sklearn.linear_model.coordinate_descent.ElasticNet", - "primitive_family": metadata_base.PrimitiveFamily.REGRESSION, - "python_path": "d3m.primitives.regression.elastic_net.SKlearn", - "source": {'name': 'JPL', 'contact': 'mailto:shah@jpl.nasa.gov', 'uris': ['https://gitlab.com/datadrivendiscovery/sklearn-wrap/issues', 'https://scikit-learn.org/stable/modules/generated/sklearn.linear_model.ElasticNet.html']}, - "version": "2019.11.13", - "id": "a85d4ffb-49ab-35b1-a70c-6df209312aae", - "hyperparams_to_tune": ['alpha', 'max_iter', 'l1_ratio'], - 'installation': [ - {'type': metadata_base.PrimitiveInstallationType.PIP, - 'package_uri': 'git+https://gitlab.com/datadrivendiscovery/sklearn-wrap.git@{git_commit}#egg=sklearn_wrap'.format( - git_commit=utils.current_git_commit(os.path.dirname(__file__)), - ), - }] - }) - - def __init__(self, *, - hyperparams: Hyperparams, - random_seed: int = 0, - docker_containers: Dict[str, DockerContainer] = None) -> None: - - super().__init__(hyperparams=hyperparams, random_seed=random_seed, docker_containers=docker_containers) - - # False - self._clf = ElasticNet( - alpha=self.hyperparams['alpha'], - l1_ratio=self.hyperparams['l1_ratio'], - fit_intercept=self.hyperparams['fit_intercept'], - normalize=self.hyperparams['normalize'], - precompute=self.hyperparams['precompute'], - max_iter=self.hyperparams['max_iter'], - tol=self.hyperparams['tol'], - positive=self.hyperparams['positive'], - selection=self.hyperparams['selection'], - warm_start=self.hyperparams['warm_start'], - random_state=self.random_seed, - ) - - self._inputs = None - self._outputs = None - self._training_inputs = None - self._training_outputs = None - self._target_names = None - self._training_indices = None - self._target_column_indices = None - self._target_columns_metadata: List[OrderedDict] = None - self._input_column_names = None - self._fitted = False - self._new_training_data = False - - def set_training_data(self, *, inputs: Inputs, outputs: Outputs) -> None: - self._inputs = inputs - self._outputs = outputs - self._fitted = False - self._new_training_data = True - - def fit(self, *, timeout: float = None, iterations: int = None) -> CallResult[None]: - if self._inputs is None or self._outputs is None: - raise ValueError("Missing training data.") - - if not self._new_training_data: - return CallResult(None) - self._new_training_data = False - - self._training_inputs, self._training_indices = self._get_columns_to_fit(self._inputs, self.hyperparams) - self._training_outputs, self._target_names, self._target_column_indices = self._get_targets(self._outputs, self.hyperparams) - self._input_column_names = self._training_inputs.columns - - if len(self._training_indices) > 0 and len(self._target_column_indices) > 0: - self._target_columns_metadata = self._get_target_columns_metadata(self._training_outputs.metadata, self.hyperparams) - sk_training_output = self._training_outputs.values - - shape = sk_training_output.shape - if len(shape) == 2 and shape[1] == 1: - sk_training_output = numpy.ravel(sk_training_output) - - self._clf.fit(self._training_inputs, sk_training_output) - self._fitted = True - else: - if self.hyperparams['error_on_no_input']: - raise RuntimeError("No input columns were selected") - self.logger.warn("No input columns were selected") - - return CallResult(None) - - - - def produce(self, *, inputs: Inputs, timeout: float = None, iterations: int = None) -> CallResult[Outputs]: - sk_inputs, columns_to_use = self._get_columns_to_fit(inputs, self.hyperparams) - output = [] - if len(sk_inputs.columns): - try: - sk_output = self._clf.predict(sk_inputs) - except sklearn.exceptions.NotFittedError as error: - raise PrimitiveNotFittedError("Primitive not fitted.") from error - # For primitives that allow predicting without fitting like GaussianProcessRegressor - if not self._fitted: - raise PrimitiveNotFittedError("Primitive not fitted.") - if sparse.issparse(sk_output): - sk_output = sk_output.toarray() - output = self._wrap_predictions(inputs, sk_output) - output.columns = self._target_names - output = [output] - else: - if self.hyperparams['error_on_no_input']: - raise RuntimeError("No input columns were selected") - self.logger.warn("No input columns were selected") - outputs = base_utils.combine_columns(return_result=self.hyperparams['return_result'], - add_index_columns=self.hyperparams['add_index_columns'], - inputs=inputs, column_indices=self._target_column_indices, - columns_list=output) - - return CallResult(outputs) - - - def get_params(self) -> Params: - if not self._fitted: - return Params( - coef_=None, - intercept_=None, - n_iter_=None, - dual_gap_=None, - input_column_names=self._input_column_names, - training_indices_=self._training_indices, - target_names_=self._target_names, - target_column_indices_=self._target_column_indices, - target_columns_metadata_=self._target_columns_metadata - ) - - return Params( - coef_=getattr(self._clf, 'coef_', None), - intercept_=getattr(self._clf, 'intercept_', None), - n_iter_=getattr(self._clf, 'n_iter_', None), - dual_gap_=getattr(self._clf, 'dual_gap_', None), - input_column_names=self._input_column_names, - training_indices_=self._training_indices, - target_names_=self._target_names, - target_column_indices_=self._target_column_indices, - target_columns_metadata_=self._target_columns_metadata - ) - - def set_params(self, *, params: Params) -> None: - self._clf.coef_ = params['coef_'] - self._clf.intercept_ = params['intercept_'] - self._clf.n_iter_ = params['n_iter_'] - self._clf.dual_gap_ = params['dual_gap_'] - self._input_column_names = params['input_column_names'] - self._training_indices = params['training_indices_'] - self._target_names = params['target_names_'] - self._target_column_indices = params['target_column_indices_'] - self._target_columns_metadata = params['target_columns_metadata_'] - - if params['coef_'] is not None: - self._fitted = True - if params['intercept_'] is not None: - self._fitted = True - if params['n_iter_'] is not None: - self._fitted = True - if params['dual_gap_'] is not None: - self._fitted = True - - - - - - - - @classmethod - def _get_columns_to_fit(cls, inputs: Inputs, hyperparams: Hyperparams): - if not hyperparams['use_semantic_types']: - return inputs, list(range(len(inputs.columns))) - - inputs_metadata = inputs.metadata - - def can_produce_column(column_index: int) -> bool: - return cls._can_produce_column(inputs_metadata, column_index, hyperparams) - - columns_to_produce, columns_not_to_produce = base_utils.get_columns_to_use(inputs_metadata, - use_columns=hyperparams['use_inputs_columns'], - exclude_columns=hyperparams['exclude_inputs_columns'], - can_use_column=can_produce_column) - return inputs.iloc[:, columns_to_produce], columns_to_produce - # return columns_to_produce - - @classmethod - def _can_produce_column(cls, inputs_metadata: metadata_base.DataMetadata, column_index: int, hyperparams: Hyperparams) -> bool: - column_metadata = inputs_metadata.query((metadata_base.ALL_ELEMENTS, column_index)) - - accepted_structural_types = (int, float, numpy.integer, numpy.float64) - accepted_semantic_types = set() - accepted_semantic_types.add("https://metadata.datadrivendiscovery.org/types/Attribute") - if not issubclass(column_metadata['structural_type'], accepted_structural_types): - return False - - semantic_types = set(column_metadata.get('semantic_types', [])) - - if len(semantic_types) == 0: - cls.logger.warning("No semantic types found in column metadata") - return False - # Making sure all accepted_semantic_types are available in semantic_types - if len(accepted_semantic_types - semantic_types) == 0: - return True - - return False - - @classmethod - def _get_targets(cls, data: d3m_dataframe, hyperparams: Hyperparams): - if not hyperparams['use_semantic_types']: - return data, list(data.columns), list(range(len(data.columns))) - - metadata = data.metadata - - def can_produce_column(column_index: int) -> bool: - accepted_semantic_types = set() - accepted_semantic_types.add("https://metadata.datadrivendiscovery.org/types/TrueTarget") - column_metadata = metadata.query((metadata_base.ALL_ELEMENTS, column_index)) - semantic_types = set(column_metadata.get('semantic_types', [])) - if len(semantic_types) == 0: - cls.logger.warning("No semantic types found in column metadata") - return False - # Making sure all accepted_semantic_types are available in semantic_types - if len(accepted_semantic_types - semantic_types) == 0: - return True - return False - - target_column_indices, target_columns_not_to_produce = base_utils.get_columns_to_use(metadata, - use_columns=hyperparams[ - 'use_outputs_columns'], - exclude_columns= - hyperparams[ - 'exclude_outputs_columns'], - can_use_column=can_produce_column) - targets = [] - if target_column_indices: - targets = data.select_columns(target_column_indices) - target_column_names = [] - for idx in target_column_indices: - target_column_names.append(data.columns[idx]) - return targets, target_column_names, target_column_indices - - @classmethod - def _get_target_columns_metadata(cls, outputs_metadata: metadata_base.DataMetadata, hyperparams) -> List[OrderedDict]: - outputs_length = outputs_metadata.query((metadata_base.ALL_ELEMENTS,))['dimension']['length'] - - target_columns_metadata: List[OrderedDict] = [] - for column_index in range(outputs_length): - column_metadata = OrderedDict(outputs_metadata.query_column(column_index)) - - # Update semantic types and prepare it for predicted targets. - semantic_types = set(column_metadata.get('semantic_types', [])) - semantic_types_to_remove = set(["https://metadata.datadrivendiscovery.org/types/TrueTarget","https://metadata.datadrivendiscovery.org/types/SuggestedTarget",]) - add_semantic_types = set(["https://metadata.datadrivendiscovery.org/types/PredictedTarget",]) - add_semantic_types.add(hyperparams["return_semantic_type"]) - semantic_types = semantic_types - semantic_types_to_remove - semantic_types = semantic_types.union(add_semantic_types) - column_metadata['semantic_types'] = list(semantic_types) - - target_columns_metadata.append(column_metadata) - - return target_columns_metadata - - @classmethod - def _update_predictions_metadata(cls, inputs_metadata: metadata_base.DataMetadata, outputs: Optional[Outputs], - target_columns_metadata: List[OrderedDict]) -> metadata_base.DataMetadata: - outputs_metadata = metadata_base.DataMetadata().generate(value=outputs) - - for column_index, column_metadata in enumerate(target_columns_metadata): - column_metadata.pop("structural_type", None) - outputs_metadata = outputs_metadata.update_column(column_index, column_metadata) - - return outputs_metadata - - def _wrap_predictions(self, inputs: Inputs, predictions: ndarray) -> Outputs: - outputs = d3m_dataframe(predictions, generate_metadata=False) - outputs.metadata = self._update_predictions_metadata(inputs.metadata, outputs, self._target_columns_metadata) - return outputs - - - @classmethod - def _add_target_columns_metadata(cls, outputs_metadata: metadata_base.DataMetadata): - outputs_length = outputs_metadata.query((metadata_base.ALL_ELEMENTS,))['dimension']['length'] - - target_columns_metadata: List[OrderedDict] = [] - for column_index in range(outputs_length): - column_metadata = OrderedDict() - semantic_types = [] - semantic_types.append('https://metadata.datadrivendiscovery.org/types/PredictedTarget') - column_name = outputs_metadata.query((metadata_base.ALL_ELEMENTS, column_index)).get("name") - if column_name is None: - column_name = "output_{}".format(column_index) - column_metadata["semantic_types"] = semantic_types - column_metadata["name"] = str(column_name) - target_columns_metadata.append(column_metadata) - - return target_columns_metadata - - -SKElasticNet.__doc__ = ElasticNet.__doc__ \ No newline at end of file diff --git a/common-primitives/sklearn-wrap/sklearn_wrap/SKExtraTreesClassifier.py b/common-primitives/sklearn-wrap/sklearn_wrap/SKExtraTreesClassifier.py deleted file mode 100644 index 51d77c9..0000000 --- a/common-primitives/sklearn-wrap/sklearn_wrap/SKExtraTreesClassifier.py +++ /dev/null @@ -1,675 +0,0 @@ -from typing import Any, Callable, List, Dict, Union, Optional, Sequence, Tuple -from numpy import ndarray -from collections import OrderedDict -from scipy import sparse -import os -import sklearn -import numpy -import typing - -# Custom import commands if any -from sklearn.ensemble.forest import ExtraTreesClassifier - - -from d3m.container.numpy import ndarray as d3m_ndarray -from d3m.container import DataFrame as d3m_dataframe -from d3m.metadata import hyperparams, params, base as metadata_base -from d3m import utils -from d3m.base import utils as base_utils -from d3m.exceptions import PrimitiveNotFittedError -from d3m.primitive_interfaces.base import CallResult, DockerContainer - -from d3m.primitive_interfaces.supervised_learning import SupervisedLearnerPrimitiveBase -from d3m.primitive_interfaces.base import ProbabilisticCompositionalityMixin, ContinueFitMixin -from d3m import exceptions -import pandas - - - -Inputs = d3m_dataframe -Outputs = d3m_dataframe - - -class Params(params.Params): - estimators_: Optional[Sequence[sklearn.base.BaseEstimator]] - classes_: Optional[Union[ndarray, List[ndarray]]] - n_classes_: Optional[Union[int, List[int]]] - n_features_: Optional[int] - n_outputs_: Optional[int] - oob_score_: Optional[float] - oob_decision_function_: Optional[ndarray] - base_estimator_: Optional[object] - estimator_params: Optional[tuple] - base_estimator: Optional[object] - input_column_names: Optional[Any] - target_names_: Optional[Sequence[Any]] - training_indices_: Optional[Sequence[int]] - target_column_indices_: Optional[Sequence[int]] - target_columns_metadata_: Optional[List[OrderedDict]] - - - -class Hyperparams(hyperparams.Hyperparams): - n_estimators = hyperparams.Bounded[int]( - default=10, - lower=1, - upper=None, - description='The number of trees in the forest.', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] - ) - criterion = hyperparams.Enumeration[str]( - values=['gini', 'entropy'], - default='gini', - description='The function to measure the quality of a split. Supported criteria are "gini" for the Gini impurity and "entropy" for the information gain.', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] - ) - max_depth = hyperparams.Union( - configuration=OrderedDict({ - 'int': hyperparams.Bounded[int]( - lower=0, - upper=None, - default=10, - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'], - ), - 'none': hyperparams.Constant( - default=None, - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'], - ) - }), - default='none', - description='The maximum depth of the tree. If None, then nodes are expanded until all leaves are pure or until all leaves contain less than min_samples_split samples.', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] - ) - min_samples_split = hyperparams.Union( - configuration=OrderedDict({ - 'absolute': hyperparams.Bounded[int]( - default=2, - lower=1, - upper=None, - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'], - ), - 'percent': hyperparams.Bounded[float]( - default=0.25, - lower=0, - upper=1, - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'], - ) - }), - default='absolute', - description='The minimum number of samples required to split an internal node: - If int, then consider `min_samples_split` as the minimum number. - If float, then `min_samples_split` is a percentage and `ceil(min_samples_split * n_samples)` are the minimum number of samples for each split. .. versionchanged:: 0.18 Added float values for percentages.', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] - ) - min_samples_leaf = hyperparams.Union( - configuration=OrderedDict({ - 'absolute': hyperparams.Bounded[int]( - default=1, - lower=1, - upper=None, - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'], - ), - 'percent': hyperparams.Bounded[float]( - default=0.25, - lower=0, - upper=0.5, - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'], - ) - }), - default='absolute', - description='The minimum number of samples required to be at a leaf node: - If int, then consider `min_samples_leaf` as the minimum number. - If float, then `min_samples_leaf` is a percentage and `ceil(min_samples_leaf * n_samples)` are the minimum number of samples for each node. .. versionchanged:: 0.18 Added float values for percentages.', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] - ) - min_weight_fraction_leaf = hyperparams.Bounded[float]( - default=0, - lower=0, - upper=0.5, - description='The minimum weighted fraction of the sum total of weights (of all the input samples) required to be at a leaf node. Samples have equal weight when sample_weight is not provided.', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] - ) - max_leaf_nodes = hyperparams.Union( - configuration=OrderedDict({ - 'int': hyperparams.Bounded[int]( - default=10, - lower=0, - upper=None, - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'], - ), - 'none': hyperparams.Constant( - default=None, - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'], - ) - }), - default='none', - description='Grow trees with ``max_leaf_nodes`` in best-first fashion. Best nodes are defined as relative reduction in impurity. If None then unlimited number of leaf nodes.', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] - ) - max_features = hyperparams.Union( - configuration=OrderedDict({ - 'specified_int': hyperparams.Bounded[int]( - lower=0, - upper=None, - default=0, - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'], - ), - 'calculated': hyperparams.Enumeration[str]( - values=['auto', 'sqrt', 'log2'], - default='auto', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'], - ), - 'none': hyperparams.Constant( - default=None, - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'], - ), - 'percent': hyperparams.Bounded[float]( - default=0.25, - lower=0, - upper=1, - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'], - ) - }), - default='calculated', - description='The number of features to consider when looking for the best split: - If int, then consider `max_features` features at each split. - If float, then `max_features` is a percentage and `int(max_features * n_features)` features are considered at each split. - If "auto", then `max_features=sqrt(n_features)`. - If "sqrt", then `max_features=sqrt(n_features)`. - If "log2", then `max_features=log2(n_features)`. - If None, then `max_features=n_features`. Note: the search for a split does not stop until at least one valid partition of the node samples is found, even if it requires to effectively inspect more than ``max_features`` features.', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] - ) - min_impurity_decrease = hyperparams.Bounded[float]( - default=0.0, - lower=0.0, - upper=None, - description='A node will be split if this split induces a decrease of the impurity greater than or equal to this value. The weighted impurity decrease equation is the following:: N_t / N * (impurity - N_t_R / N_t * right_impurity - N_t_L / N_t * left_impurity) where ``N`` is the total number of samples, ``N_t`` is the number of samples at the current node, ``N_t_L`` is the number of samples in the left child, and ``N_t_R`` is the number of samples in the right child. ``N``, ``N_t``, ``N_t_R`` and ``N_t_L`` all refer to the weighted sum, if ``sample_weight`` is passed. .. versionadded:: 0.19 ', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] - ) - bootstrap = hyperparams.Enumeration[str]( - values=['bootstrap', 'bootstrap_with_oob_score', 'disabled'], - default='bootstrap', - description='Whether bootstrap samples are used when building trees.' - ' And whether to use out-of-bag samples to estimate the generalization accuracy.', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'], - ) - n_jobs = hyperparams.Union( - configuration=OrderedDict({ - 'limit': hyperparams.Bounded[int]( - default=1, - lower=1, - upper=None, - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'], - ), - 'all_cores': hyperparams.Constant( - default=-1, - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'], - ) - }), - default='limit', - description='The number of jobs to run in parallel for both `fit` and `predict`. If -1, then the number of jobs is set to the number of cores.', - semantic_types=['https://metadata.datadrivendiscovery.org/types/ResourcesUseParameter'] - ) - warm_start = hyperparams.UniformBool( - default=False, - description='When set to ``True``, reuse the solution of the previous call to fit and add more estimators to the ensemble, otherwise, just fit a whole new forest.', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] - ) - class_weight = hyperparams.Union( - configuration=OrderedDict({ - 'str': hyperparams.Enumeration[str]( - default='balanced', - values=['balanced', 'balanced_subsample'], - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'], - ), - 'none': hyperparams.Constant( - default=None, - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'], - ) - }), - default='none', - description='Weights associated with classes in the form ``{class_label: weight}``. If not given, all classes are supposed to have weight one. For multi-output problems, a list of dicts can be provided in the same order as the columns of y. The "balanced" mode uses the values of y to automatically adjust weights inversely proportional to class frequencies in the input data as ``n_samples / (n_classes * np.bincount(y))`` The "balanced_subsample" mode is the same as "balanced" except that weights are computed based on the bootstrap sample for every tree grown. For multi-output, the weights of each column of y will be multiplied. Note that these weights will be multiplied with sample_weight (passed through the fit method) if sample_weight is specified.', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] - ) - - use_inputs_columns = hyperparams.Set( - elements=hyperparams.Hyperparameter[int](-1), - default=(), - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="A set of column indices to force primitive to use as training input. If any specified column cannot be parsed, it is skipped.", - ) - use_outputs_columns = hyperparams.Set( - elements=hyperparams.Hyperparameter[int](-1), - default=(), - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="A set of column indices to force primitive to use as training target. If any specified column cannot be parsed, it is skipped.", - ) - exclude_inputs_columns = hyperparams.Set( - elements=hyperparams.Hyperparameter[int](-1), - default=(), - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="A set of column indices to not use as training inputs. Applicable only if \"use_columns\" is not provided.", - ) - exclude_outputs_columns = hyperparams.Set( - elements=hyperparams.Hyperparameter[int](-1), - default=(), - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="A set of column indices to not use as training target. Applicable only if \"use_columns\" is not provided.", - ) - return_result = hyperparams.Enumeration( - values=['append', 'replace', 'new'], - default='new', - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="Should parsed columns be appended, should they replace original columns, or should only parsed columns be returned? This hyperparam is ignored if use_semantic_types is set to false.", - ) - use_semantic_types = hyperparams.UniformBool( - default=False, - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="Controls whether semantic_types metadata will be used for filtering columns in input dataframe. Setting this to false makes the code ignore return_result and will produce only the output dataframe" - ) - add_index_columns = hyperparams.UniformBool( - default=False, - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="Also include primary index columns if input data has them. Applicable only if \"return_result\" is set to \"new\".", - ) - error_on_no_input = hyperparams.UniformBool( - default=True, - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="Throw an exception if no input column is selected/provided. Defaults to true to behave like sklearn. To prevent pipelines from breaking set this to False.", - ) - - return_semantic_type = hyperparams.Enumeration[str]( - values=['https://metadata.datadrivendiscovery.org/types/Attribute', 'https://metadata.datadrivendiscovery.org/types/ConstructedAttribute', 'https://metadata.datadrivendiscovery.org/types/PredictedTarget'], - default='https://metadata.datadrivendiscovery.org/types/PredictedTarget', - description='Decides what semantic type to attach to generated output', - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'] - ) - -class SKExtraTreesClassifier(SupervisedLearnerPrimitiveBase[Inputs, Outputs, Params, Hyperparams], - ProbabilisticCompositionalityMixin[Inputs, Outputs, Params, Hyperparams]): - """ - Primitive wrapping for sklearn ExtraTreesClassifier - `sklearn documentation `_ - - """ - - __author__ = "JPL MARVIN" - metadata = metadata_base.PrimitiveMetadata({ - "algorithm_types": [metadata_base.PrimitiveAlgorithmType.DECISION_TREE, ], - "name": "sklearn.ensemble.forest.ExtraTreesClassifier", - "primitive_family": metadata_base.PrimitiveFamily.CLASSIFICATION, - "python_path": "d3m.primitives.classification.extra_trees.SKlearn", - "source": {'name': 'JPL', 'contact': 'mailto:shah@jpl.nasa.gov', 'uris': ['https://gitlab.com/datadrivendiscovery/sklearn-wrap/issues', 'https://scikit-learn.org/stable/modules/generated/sklearn.ensemble.ExtraTreesClassifier.html']}, - "version": "2019.11.13", - "id": "c8a28f02-ef4a-35a8-87f1-cf79980f5c3e", - "hyperparams_to_tune": ['n_estimators', 'max_depth', 'min_samples_split', 'min_samples_leaf', 'max_features'], - 'installation': [ - {'type': metadata_base.PrimitiveInstallationType.PIP, - 'package_uri': 'git+https://gitlab.com/datadrivendiscovery/sklearn-wrap.git@{git_commit}#egg=sklearn_wrap'.format( - git_commit=utils.current_git_commit(os.path.dirname(__file__)), - ), - }] - }) - - def __init__(self, *, - hyperparams: Hyperparams, - random_seed: int = 0, - docker_containers: Dict[str, DockerContainer] = None, - _verbose: int = 0) -> None: - - super().__init__(hyperparams=hyperparams, random_seed=random_seed, docker_containers=docker_containers) - - # False - self._clf = ExtraTreesClassifier( - n_estimators=self.hyperparams['n_estimators'], - criterion=self.hyperparams['criterion'], - max_depth=self.hyperparams['max_depth'], - min_samples_split=self.hyperparams['min_samples_split'], - min_samples_leaf=self.hyperparams['min_samples_leaf'], - min_weight_fraction_leaf=self.hyperparams['min_weight_fraction_leaf'], - max_leaf_nodes=self.hyperparams['max_leaf_nodes'], - max_features=self.hyperparams['max_features'], - min_impurity_decrease=self.hyperparams['min_impurity_decrease'], - bootstrap=self.hyperparams['bootstrap'] in ['bootstrap', 'bootstrap_with_oob_score'], - oob_score=self.hyperparams['bootstrap'] in ['bootstrap_with_oob_score'], - n_jobs=self.hyperparams['n_jobs'], - warm_start=self.hyperparams['warm_start'], - class_weight=self.hyperparams['class_weight'], - random_state=self.random_seed, - verbose=_verbose - ) - - self._inputs = None - self._outputs = None - self._training_inputs = None - self._training_outputs = None - self._target_names = None - self._training_indices = None - self._target_column_indices = None - self._target_columns_metadata: List[OrderedDict] = None - self._input_column_names = None - self._fitted = False - self._new_training_data = False - - def set_training_data(self, *, inputs: Inputs, outputs: Outputs) -> None: - self._inputs = inputs - self._outputs = outputs - self._fitted = False - self._new_training_data = True - - def fit(self, *, timeout: float = None, iterations: int = None) -> CallResult[None]: - if self._inputs is None or self._outputs is None: - raise ValueError("Missing training data.") - - if not self._new_training_data: - return CallResult(None) - self._new_training_data = False - - self._training_inputs, self._training_indices = self._get_columns_to_fit(self._inputs, self.hyperparams) - self._training_outputs, self._target_names, self._target_column_indices = self._get_targets(self._outputs, self.hyperparams) - self._input_column_names = self._training_inputs.columns - - if len(self._training_indices) > 0 and len(self._target_column_indices) > 0: - self._target_columns_metadata = self._get_target_columns_metadata(self._training_outputs.metadata, self.hyperparams) - sk_training_output = self._training_outputs.values - - shape = sk_training_output.shape - if len(shape) == 2 and shape[1] == 1: - sk_training_output = numpy.ravel(sk_training_output) - - self._clf.fit(self._training_inputs, sk_training_output) - self._fitted = True - else: - if self.hyperparams['error_on_no_input']: - raise RuntimeError("No input columns were selected") - self.logger.warn("No input columns were selected") - - return CallResult(None) - - - - def produce(self, *, inputs: Inputs, timeout: float = None, iterations: int = None) -> CallResult[Outputs]: - sk_inputs, columns_to_use = self._get_columns_to_fit(inputs, self.hyperparams) - output = [] - if len(sk_inputs.columns): - try: - sk_output = self._clf.predict(sk_inputs) - except sklearn.exceptions.NotFittedError as error: - raise PrimitiveNotFittedError("Primitive not fitted.") from error - # For primitives that allow predicting without fitting like GaussianProcessRegressor - if not self._fitted: - raise PrimitiveNotFittedError("Primitive not fitted.") - if sparse.issparse(sk_output): - sk_output = sk_output.toarray() - output = self._wrap_predictions(inputs, sk_output) - output.columns = self._target_names - output = [output] - else: - if self.hyperparams['error_on_no_input']: - raise RuntimeError("No input columns were selected") - self.logger.warn("No input columns were selected") - outputs = base_utils.combine_columns(return_result=self.hyperparams['return_result'], - add_index_columns=self.hyperparams['add_index_columns'], - inputs=inputs, column_indices=self._target_column_indices, - columns_list=output) - - return CallResult(outputs) - - - def get_params(self) -> Params: - if not self._fitted: - return Params( - estimators_=None, - classes_=None, - n_classes_=None, - n_features_=None, - n_outputs_=None, - oob_score_=None, - oob_decision_function_=None, - base_estimator_=None, - estimator_params=None, - base_estimator=None, - input_column_names=self._input_column_names, - training_indices_=self._training_indices, - target_names_=self._target_names, - target_column_indices_=self._target_column_indices, - target_columns_metadata_=self._target_columns_metadata - ) - - return Params( - estimators_=getattr(self._clf, 'estimators_', None), - classes_=getattr(self._clf, 'classes_', None), - n_classes_=getattr(self._clf, 'n_classes_', None), - n_features_=getattr(self._clf, 'n_features_', None), - n_outputs_=getattr(self._clf, 'n_outputs_', None), - oob_score_=getattr(self._clf, 'oob_score_', None), - oob_decision_function_=getattr(self._clf, 'oob_decision_function_', None), - base_estimator_=getattr(self._clf, 'base_estimator_', None), - estimator_params=getattr(self._clf, 'estimator_params', None), - base_estimator=getattr(self._clf, 'base_estimator', None), - input_column_names=self._input_column_names, - training_indices_=self._training_indices, - target_names_=self._target_names, - target_column_indices_=self._target_column_indices, - target_columns_metadata_=self._target_columns_metadata - ) - - def set_params(self, *, params: Params) -> None: - self._clf.estimators_ = params['estimators_'] - self._clf.classes_ = params['classes_'] - self._clf.n_classes_ = params['n_classes_'] - self._clf.n_features_ = params['n_features_'] - self._clf.n_outputs_ = params['n_outputs_'] - self._clf.oob_score_ = params['oob_score_'] - self._clf.oob_decision_function_ = params['oob_decision_function_'] - self._clf.base_estimator_ = params['base_estimator_'] - self._clf.estimator_params = params['estimator_params'] - self._clf.base_estimator = params['base_estimator'] - self._input_column_names = params['input_column_names'] - self._training_indices = params['training_indices_'] - self._target_names = params['target_names_'] - self._target_column_indices = params['target_column_indices_'] - self._target_columns_metadata = params['target_columns_metadata_'] - - if params['estimators_'] is not None: - self._fitted = True - if params['classes_'] is not None: - self._fitted = True - if params['n_classes_'] is not None: - self._fitted = True - if params['n_features_'] is not None: - self._fitted = True - if params['n_outputs_'] is not None: - self._fitted = True - if params['oob_score_'] is not None: - self._fitted = True - if params['oob_decision_function_'] is not None: - self._fitted = True - if params['base_estimator_'] is not None: - self._fitted = True - if params['estimator_params'] is not None: - self._fitted = True - if params['base_estimator'] is not None: - self._fitted = True - - - def log_likelihoods(self, *, - outputs: Outputs, - inputs: Inputs, - timeout: float = None, - iterations: int = None) -> CallResult[Sequence[float]]: - inputs = inputs.iloc[:, self._training_indices] # Get ndarray - outputs = outputs.iloc[:, self._target_column_indices] - - if len(inputs.columns) and len(outputs.columns): - - if outputs.shape[1] != self._clf.n_outputs_: - raise exceptions.InvalidArgumentValueError("\"outputs\" argument does not have the correct number of target columns.") - - log_proba = self._clf.predict_log_proba(inputs) - - # Making it always a list, even when only one target. - if self._clf.n_outputs_ == 1: - log_proba = [log_proba] - classes = [self._clf.classes_] - else: - classes = self._clf.classes_ - - samples_length = inputs.shape[0] - - log_likelihoods = [] - for k in range(self._clf.n_outputs_): - # We have to map each class to its internal (numerical) index used in the learner. - # This allows "outputs" to contain string classes. - outputs_column = outputs.iloc[:, k] - classes_map = pandas.Series(numpy.arange(len(classes[k])), index=classes[k]) - mapped_outputs_column = outputs_column.map(classes_map) - - # For each target column (column in "outputs"), for each sample (row) we pick the log - # likelihood for a given class. - log_likelihoods.append(log_proba[k][numpy.arange(samples_length), mapped_outputs_column]) - - results = d3m_dataframe(dict(enumerate(log_likelihoods)), generate_metadata=True) - results.columns = outputs.columns - - for k in range(self._clf.n_outputs_): - column_metadata = outputs.metadata.query_column(k) - if 'name' in column_metadata: - results.metadata = results.metadata.update_column(k, {'name': column_metadata['name']}) - - else: - results = d3m_dataframe(generate_metadata=True) - - return CallResult(results) - - - - def produce_feature_importances(self, *, timeout: float = None, iterations: int = None) -> CallResult[d3m_dataframe]: - output = d3m_dataframe(self._clf.feature_importances_.reshape((1, len(self._input_column_names)))) - output.columns = self._input_column_names - for i in range(len(self._input_column_names)): - output.metadata = output.metadata.update_column(i, {"name": self._input_column_names[i]}) - return CallResult(output) - - @classmethod - def _get_columns_to_fit(cls, inputs: Inputs, hyperparams: Hyperparams): - if not hyperparams['use_semantic_types']: - return inputs, list(range(len(inputs.columns))) - - inputs_metadata = inputs.metadata - - def can_produce_column(column_index: int) -> bool: - return cls._can_produce_column(inputs_metadata, column_index, hyperparams) - - columns_to_produce, columns_not_to_produce = base_utils.get_columns_to_use(inputs_metadata, - use_columns=hyperparams['use_inputs_columns'], - exclude_columns=hyperparams['exclude_inputs_columns'], - can_use_column=can_produce_column) - return inputs.iloc[:, columns_to_produce], columns_to_produce - # return columns_to_produce - - @classmethod - def _can_produce_column(cls, inputs_metadata: metadata_base.DataMetadata, column_index: int, hyperparams: Hyperparams) -> bool: - column_metadata = inputs_metadata.query((metadata_base.ALL_ELEMENTS, column_index)) - - accepted_structural_types = (int, float, numpy.integer, numpy.float64) - accepted_semantic_types = set() - accepted_semantic_types.add("https://metadata.datadrivendiscovery.org/types/Attribute") - if not issubclass(column_metadata['structural_type'], accepted_structural_types): - return False - - semantic_types = set(column_metadata.get('semantic_types', [])) - - if len(semantic_types) == 0: - cls.logger.warning("No semantic types found in column metadata") - return False - # Making sure all accepted_semantic_types are available in semantic_types - if len(accepted_semantic_types - semantic_types) == 0: - return True - - return False - - @classmethod - def _get_targets(cls, data: d3m_dataframe, hyperparams: Hyperparams): - if not hyperparams['use_semantic_types']: - return data, list(data.columns), list(range(len(data.columns))) - - metadata = data.metadata - - def can_produce_column(column_index: int) -> bool: - accepted_semantic_types = set() - accepted_semantic_types.add("https://metadata.datadrivendiscovery.org/types/TrueTarget") - column_metadata = metadata.query((metadata_base.ALL_ELEMENTS, column_index)) - semantic_types = set(column_metadata.get('semantic_types', [])) - if len(semantic_types) == 0: - cls.logger.warning("No semantic types found in column metadata") - return False - # Making sure all accepted_semantic_types are available in semantic_types - if len(accepted_semantic_types - semantic_types) == 0: - return True - return False - - target_column_indices, target_columns_not_to_produce = base_utils.get_columns_to_use(metadata, - use_columns=hyperparams[ - 'use_outputs_columns'], - exclude_columns= - hyperparams[ - 'exclude_outputs_columns'], - can_use_column=can_produce_column) - targets = [] - if target_column_indices: - targets = data.select_columns(target_column_indices) - target_column_names = [] - for idx in target_column_indices: - target_column_names.append(data.columns[idx]) - return targets, target_column_names, target_column_indices - - @classmethod - def _get_target_columns_metadata(cls, outputs_metadata: metadata_base.DataMetadata, hyperparams) -> List[OrderedDict]: - outputs_length = outputs_metadata.query((metadata_base.ALL_ELEMENTS,))['dimension']['length'] - - target_columns_metadata: List[OrderedDict] = [] - for column_index in range(outputs_length): - column_metadata = OrderedDict(outputs_metadata.query_column(column_index)) - - # Update semantic types and prepare it for predicted targets. - semantic_types = set(column_metadata.get('semantic_types', [])) - semantic_types_to_remove = set(["https://metadata.datadrivendiscovery.org/types/TrueTarget","https://metadata.datadrivendiscovery.org/types/SuggestedTarget",]) - add_semantic_types = set(["https://metadata.datadrivendiscovery.org/types/PredictedTarget",]) - add_semantic_types.add(hyperparams["return_semantic_type"]) - semantic_types = semantic_types - semantic_types_to_remove - semantic_types = semantic_types.union(add_semantic_types) - column_metadata['semantic_types'] = list(semantic_types) - - target_columns_metadata.append(column_metadata) - - return target_columns_metadata - - @classmethod - def _update_predictions_metadata(cls, inputs_metadata: metadata_base.DataMetadata, outputs: Optional[Outputs], - target_columns_metadata: List[OrderedDict]) -> metadata_base.DataMetadata: - outputs_metadata = metadata_base.DataMetadata().generate(value=outputs) - - for column_index, column_metadata in enumerate(target_columns_metadata): - column_metadata.pop("structural_type", None) - outputs_metadata = outputs_metadata.update_column(column_index, column_metadata) - - return outputs_metadata - - def _wrap_predictions(self, inputs: Inputs, predictions: ndarray) -> Outputs: - outputs = d3m_dataframe(predictions, generate_metadata=False) - outputs.metadata = self._update_predictions_metadata(inputs.metadata, outputs, self._target_columns_metadata) - return outputs - - - @classmethod - def _add_target_columns_metadata(cls, outputs_metadata: metadata_base.DataMetadata): - outputs_length = outputs_metadata.query((metadata_base.ALL_ELEMENTS,))['dimension']['length'] - - target_columns_metadata: List[OrderedDict] = [] - for column_index in range(outputs_length): - column_metadata = OrderedDict() - semantic_types = [] - semantic_types.append('https://metadata.datadrivendiscovery.org/types/PredictedTarget') - column_name = outputs_metadata.query((metadata_base.ALL_ELEMENTS, column_index)).get("name") - if column_name is None: - column_name = "output_{}".format(column_index) - column_metadata["semantic_types"] = semantic_types - column_metadata["name"] = str(column_name) - target_columns_metadata.append(column_metadata) - - return target_columns_metadata - - -SKExtraTreesClassifier.__doc__ = ExtraTreesClassifier.__doc__ \ No newline at end of file diff --git a/common-primitives/sklearn-wrap/sklearn_wrap/SKExtraTreesRegressor.py b/common-primitives/sklearn-wrap/sklearn_wrap/SKExtraTreesRegressor.py deleted file mode 100644 index 4e4b10c..0000000 --- a/common-primitives/sklearn-wrap/sklearn_wrap/SKExtraTreesRegressor.py +++ /dev/null @@ -1,607 +0,0 @@ -from typing import Any, Callable, List, Dict, Union, Optional, Sequence, Tuple -from numpy import ndarray -from collections import OrderedDict -from scipy import sparse -import os -import sklearn -import numpy -import typing - -# Custom import commands if any -from sklearn.ensemble.forest import ExtraTreesRegressor - - -from d3m.container.numpy import ndarray as d3m_ndarray -from d3m.container import DataFrame as d3m_dataframe -from d3m.metadata import hyperparams, params, base as metadata_base -from d3m import utils -from d3m.base import utils as base_utils -from d3m.exceptions import PrimitiveNotFittedError -from d3m.primitive_interfaces.base import CallResult, DockerContainer - -from d3m.primitive_interfaces.supervised_learning import SupervisedLearnerPrimitiveBase -from d3m.primitive_interfaces.base import ProbabilisticCompositionalityMixin, ContinueFitMixin -from d3m import exceptions -import pandas - - - -Inputs = d3m_dataframe -Outputs = d3m_dataframe - - -class Params(params.Params): - estimators_: Optional[List[sklearn.tree.ExtraTreeRegressor]] - n_features_: Optional[int] - n_outputs_: Optional[int] - oob_score_: Optional[float] - oob_prediction_: Optional[ndarray] - base_estimator_: Optional[object] - estimator_params: Optional[tuple] - class_weight: Optional[Union[str, dict, List[dict]]] - base_estimator: Optional[object] - input_column_names: Optional[Any] - target_names_: Optional[Sequence[Any]] - training_indices_: Optional[Sequence[int]] - target_column_indices_: Optional[Sequence[int]] - target_columns_metadata_: Optional[List[OrderedDict]] - - - -class Hyperparams(hyperparams.Hyperparams): - n_estimators = hyperparams.Bounded[int]( - default=10, - lower=1, - upper=None, - description='The number of trees in the forest.', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] - ) - criterion = hyperparams.Enumeration[str]( - values=['mse', 'mae'], - default='mse', - description='The function to measure the quality of a split. Supported criteria are "mse" for the mean squared error, which is equal to variance reduction as feature selection criterion, and "mae" for the mean absolute error. .. versionadded:: 0.18 Mean Absolute Error (MAE) criterion.', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] - ) - max_depth = hyperparams.Union( - configuration=OrderedDict({ - 'int': hyperparams.Bounded[int]( - lower=0, - upper=None, - default=5, - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'], - ), - 'none': hyperparams.Constant( - default=None, - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'], - ) - }), - default='none', - description='The maximum depth of the tree. If None, then nodes are expanded until all leaves are pure or until all leaves contain less than min_samples_split samples.', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] - ) - min_samples_split = hyperparams.Union( - configuration=OrderedDict({ - 'float': hyperparams.Bounded[float]( - lower=0, - upper=1, - default=1.0, - description='It\'s a percentage and `ceil(min_samples_split * n_samples)` is the minimum number of samples for each split.', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'], - ), - 'int': hyperparams.Bounded[int]( - lower=0, - upper=None, - default=2, - description='Minimum number.', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'], - ) - }), - default='int', - description='The minimum number of samples required to split an internal node: - If int, then consider `min_samples_split` as the minimum number. - If float, then `min_samples_split` is a percentage and `ceil(min_samples_split * n_samples)` are the minimum number of samples for each split. .. versionchanged:: 0.18 Added float values for percentages.', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] - ) - min_samples_leaf = hyperparams.Union( - configuration=OrderedDict({ - 'percent': hyperparams.Bounded[float]( - lower=0, - upper=0.5, - default=0.25, - description='It\'s a percentage and `ceil(min_samples_leaf * n_samples)` is the minimum number of samples for each node.', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'], - ), - 'absolute': hyperparams.Bounded[int]( - lower=1, - upper=None, - default=1, - description='Minimum number.', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'], - ) - }), - default='absolute', - description='The minimum number of samples required to be at a leaf node: - If int, then consider `min_samples_leaf` as the minimum number. - If float, then `min_samples_leaf` is a percentage and `ceil(min_samples_leaf * n_samples)` are the minimum number of samples for each node. .. versionchanged:: 0.18 Added float values for percentages.', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] - ) - min_weight_fraction_leaf = hyperparams.Bounded[float]( - default=0, - lower=0, - upper=0.5, - description='The minimum weighted fraction of the sum total of weights (of all the input samples) required to be at a leaf node. Samples have equal weight when sample_weight is not provided.', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] - ) - max_leaf_nodes = hyperparams.Union( - configuration=OrderedDict({ - 'int': hyperparams.Bounded[int]( - lower=0, - upper=None, - default=10, - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'], - ), - 'none': hyperparams.Constant( - default=None, - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'], - ) - }), - default='none', - description='Grow trees with ``max_leaf_nodes`` in best-first fashion. Best nodes are defined as relative reduction in impurity. If None then unlimited number of leaf nodes.', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] - ) - max_features = hyperparams.Union( - configuration=OrderedDict({ - 'specified_int': hyperparams.Bounded[int]( - lower=0, - upper=None, - default=0, - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'], - ), - 'calculated': hyperparams.Enumeration[str]( - values=['auto', 'sqrt', 'log2'], - default='auto', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'], - ), - 'none': hyperparams.Constant( - default=None, - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'], - ), - 'percent': hyperparams.Bounded[float]( - default=0.25, - lower=0, - upper=1, - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'], - ) - }), - default='calculated', - description='The number of features to consider when looking for the best split: - If int, then consider `max_features` features at each split. - If float, then `max_features` is a percentage and `int(max_features * n_features)` features are considered at each split. - If "auto", then `max_features=n_features`. - If "sqrt", then `max_features=sqrt(n_features)`. - If "log2", then `max_features=log2(n_features)`. - If None, then `max_features=n_features`. Note: the search for a split does not stop until at least one valid partition of the node samples is found, even if it requires to effectively inspect more than ``max_features`` features.', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] - ) - min_impurity_decrease = hyperparams.Bounded[float]( - default=0.0, - lower=0.0, - upper=None, - description='A node will be split if this split induces a decrease of the impurity greater than or equal to this value. The weighted impurity decrease equation is the following:: N_t / N * (impurity - N_t_R / N_t * right_impurity - N_t_L / N_t * left_impurity) where ``N`` is the total number of samples, ``N_t`` is the number of samples at the current node, ``N_t_L`` is the number of samples in the left child, and ``N_t_R`` is the number of samples in the right child. ``N``, ``N_t``, ``N_t_R`` and ``N_t_L`` all refer to the weighted sum, if ``sample_weight`` is passed. .. versionadded:: 0.19 ', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] - ) - bootstrap = hyperparams.Enumeration[str]( - values=['bootstrap', 'bootstrap_with_oob_score', 'disabled'], - default='bootstrap', - description='Whether bootstrap samples are used when building trees.' - ' And whether to use out-of-bag samples to estimate the generalization accuracy.', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'], - ) - warm_start = hyperparams.UniformBool( - default=False, - description='When set to ``True``, reuse the solution of the previous call to fit and add more estimators to the ensemble, otherwise, just fit a whole new forest.', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] - ) - n_jobs = hyperparams.Union( - configuration=OrderedDict({ - 'limit': hyperparams.Bounded[int]( - default=1, - lower=1, - upper=None, - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'], - ), - 'all_cores': hyperparams.Constant( - default=-1, - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'], - ) - }), - default='limit', - description='The number of jobs to run in parallel for both `fit` and `predict`. If -1, then the number of jobs is set to the number of cores.', - semantic_types=['https://metadata.datadrivendiscovery.org/types/ResourcesUseParameter'] - ) - - use_inputs_columns = hyperparams.Set( - elements=hyperparams.Hyperparameter[int](-1), - default=(), - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="A set of column indices to force primitive to use as training input. If any specified column cannot be parsed, it is skipped.", - ) - use_outputs_columns = hyperparams.Set( - elements=hyperparams.Hyperparameter[int](-1), - default=(), - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="A set of column indices to force primitive to use as training target. If any specified column cannot be parsed, it is skipped.", - ) - exclude_inputs_columns = hyperparams.Set( - elements=hyperparams.Hyperparameter[int](-1), - default=(), - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="A set of column indices to not use as training inputs. Applicable only if \"use_columns\" is not provided.", - ) - exclude_outputs_columns = hyperparams.Set( - elements=hyperparams.Hyperparameter[int](-1), - default=(), - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="A set of column indices to not use as training target. Applicable only if \"use_columns\" is not provided.", - ) - return_result = hyperparams.Enumeration( - values=['append', 'replace', 'new'], - default='new', - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="Should parsed columns be appended, should they replace original columns, or should only parsed columns be returned? This hyperparam is ignored if use_semantic_types is set to false.", - ) - use_semantic_types = hyperparams.UniformBool( - default=False, - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="Controls whether semantic_types metadata will be used for filtering columns in input dataframe. Setting this to false makes the code ignore return_result and will produce only the output dataframe" - ) - add_index_columns = hyperparams.UniformBool( - default=False, - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="Also include primary index columns if input data has them. Applicable only if \"return_result\" is set to \"new\".", - ) - error_on_no_input = hyperparams.UniformBool( - default=True, - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="Throw an exception if no input column is selected/provided. Defaults to true to behave like sklearn. To prevent pipelines from breaking set this to False.", - ) - - return_semantic_type = hyperparams.Enumeration[str]( - values=['https://metadata.datadrivendiscovery.org/types/Attribute', 'https://metadata.datadrivendiscovery.org/types/ConstructedAttribute', 'https://metadata.datadrivendiscovery.org/types/PredictedTarget'], - default='https://metadata.datadrivendiscovery.org/types/PredictedTarget', - description='Decides what semantic type to attach to generated output', - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'] - ) - -class SKExtraTreesRegressor(SupervisedLearnerPrimitiveBase[Inputs, Outputs, Params, Hyperparams]): - """ - Primitive wrapping for sklearn ExtraTreesRegressor - `sklearn documentation `_ - - """ - - __author__ = "JPL MARVIN" - metadata = metadata_base.PrimitiveMetadata({ - "algorithm_types": [metadata_base.PrimitiveAlgorithmType.DECISION_TREE, ], - "name": "sklearn.ensemble.forest.ExtraTreesRegressor", - "primitive_family": metadata_base.PrimitiveFamily.REGRESSION, - "python_path": "d3m.primitives.regression.extra_trees.SKlearn", - "source": {'name': 'JPL', 'contact': 'mailto:shah@jpl.nasa.gov', 'uris': ['https://gitlab.com/datadrivendiscovery/sklearn-wrap/issues', 'https://scikit-learn.org/stable/modules/generated/sklearn.ensemble.ExtraTreesRegressor.html']}, - "version": "2019.11.13", - "id": "35321059-2a1a-31fd-9509-5494efc751c7", - "hyperparams_to_tune": ['n_estimators', 'max_depth', 'min_samples_split', 'min_samples_leaf', 'max_features'], - 'installation': [ - {'type': metadata_base.PrimitiveInstallationType.PIP, - 'package_uri': 'git+https://gitlab.com/datadrivendiscovery/sklearn-wrap.git@{git_commit}#egg=sklearn_wrap'.format( - git_commit=utils.current_git_commit(os.path.dirname(__file__)), - ), - }] - }) - - def __init__(self, *, - hyperparams: Hyperparams, - random_seed: int = 0, - docker_containers: Dict[str, DockerContainer] = None, - _verbose: int = 0) -> None: - - super().__init__(hyperparams=hyperparams, random_seed=random_seed, docker_containers=docker_containers) - - # False - self._clf = ExtraTreesRegressor( - n_estimators=self.hyperparams['n_estimators'], - criterion=self.hyperparams['criterion'], - max_depth=self.hyperparams['max_depth'], - min_samples_split=self.hyperparams['min_samples_split'], - min_samples_leaf=self.hyperparams['min_samples_leaf'], - min_weight_fraction_leaf=self.hyperparams['min_weight_fraction_leaf'], - max_leaf_nodes=self.hyperparams['max_leaf_nodes'], - max_features=self.hyperparams['max_features'], - min_impurity_decrease=self.hyperparams['min_impurity_decrease'], - bootstrap=self.hyperparams['bootstrap'] in ['bootstrap', 'bootstrap_with_oob_score'], - oob_score=self.hyperparams['bootstrap'] in ['bootstrap_with_oob_score'], - warm_start=self.hyperparams['warm_start'], - n_jobs=self.hyperparams['n_jobs'], - random_state=self.random_seed, - verbose=_verbose - ) - - self._inputs = None - self._outputs = None - self._training_inputs = None - self._training_outputs = None - self._target_names = None - self._training_indices = None - self._target_column_indices = None - self._target_columns_metadata: List[OrderedDict] = None - self._input_column_names = None - self._fitted = False - self._new_training_data = False - - def set_training_data(self, *, inputs: Inputs, outputs: Outputs) -> None: - self._inputs = inputs - self._outputs = outputs - self._fitted = False - self._new_training_data = True - - def fit(self, *, timeout: float = None, iterations: int = None) -> CallResult[None]: - if self._inputs is None or self._outputs is None: - raise ValueError("Missing training data.") - - if not self._new_training_data: - return CallResult(None) - self._new_training_data = False - - self._training_inputs, self._training_indices = self._get_columns_to_fit(self._inputs, self.hyperparams) - self._training_outputs, self._target_names, self._target_column_indices = self._get_targets(self._outputs, self.hyperparams) - self._input_column_names = self._training_inputs.columns - - if len(self._training_indices) > 0 and len(self._target_column_indices) > 0: - self._target_columns_metadata = self._get_target_columns_metadata(self._training_outputs.metadata, self.hyperparams) - sk_training_output = self._training_outputs.values - - shape = sk_training_output.shape - if len(shape) == 2 and shape[1] == 1: - sk_training_output = numpy.ravel(sk_training_output) - - self._clf.fit(self._training_inputs, sk_training_output) - self._fitted = True - else: - if self.hyperparams['error_on_no_input']: - raise RuntimeError("No input columns were selected") - self.logger.warn("No input columns were selected") - - return CallResult(None) - - - - def produce(self, *, inputs: Inputs, timeout: float = None, iterations: int = None) -> CallResult[Outputs]: - sk_inputs, columns_to_use = self._get_columns_to_fit(inputs, self.hyperparams) - output = [] - if len(sk_inputs.columns): - try: - sk_output = self._clf.predict(sk_inputs) - except sklearn.exceptions.NotFittedError as error: - raise PrimitiveNotFittedError("Primitive not fitted.") from error - # For primitives that allow predicting without fitting like GaussianProcessRegressor - if not self._fitted: - raise PrimitiveNotFittedError("Primitive not fitted.") - if sparse.issparse(sk_output): - sk_output = sk_output.toarray() - output = self._wrap_predictions(inputs, sk_output) - output.columns = self._target_names - output = [output] - else: - if self.hyperparams['error_on_no_input']: - raise RuntimeError("No input columns were selected") - self.logger.warn("No input columns were selected") - outputs = base_utils.combine_columns(return_result=self.hyperparams['return_result'], - add_index_columns=self.hyperparams['add_index_columns'], - inputs=inputs, column_indices=self._target_column_indices, - columns_list=output) - - return CallResult(outputs) - - - def get_params(self) -> Params: - if not self._fitted: - return Params( - estimators_=None, - n_features_=None, - n_outputs_=None, - oob_score_=None, - oob_prediction_=None, - base_estimator_=None, - estimator_params=None, - class_weight=None, - base_estimator=None, - input_column_names=self._input_column_names, - training_indices_=self._training_indices, - target_names_=self._target_names, - target_column_indices_=self._target_column_indices, - target_columns_metadata_=self._target_columns_metadata - ) - - return Params( - estimators_=getattr(self._clf, 'estimators_', None), - n_features_=getattr(self._clf, 'n_features_', None), - n_outputs_=getattr(self._clf, 'n_outputs_', None), - oob_score_=getattr(self._clf, 'oob_score_', None), - oob_prediction_=getattr(self._clf, 'oob_prediction_', None), - base_estimator_=getattr(self._clf, 'base_estimator_', None), - estimator_params=getattr(self._clf, 'estimator_params', None), - class_weight=getattr(self._clf, 'class_weight', None), - base_estimator=getattr(self._clf, 'base_estimator', None), - input_column_names=self._input_column_names, - training_indices_=self._training_indices, - target_names_=self._target_names, - target_column_indices_=self._target_column_indices, - target_columns_metadata_=self._target_columns_metadata - ) - - def set_params(self, *, params: Params) -> None: - self._clf.estimators_ = params['estimators_'] - self._clf.n_features_ = params['n_features_'] - self._clf.n_outputs_ = params['n_outputs_'] - self._clf.oob_score_ = params['oob_score_'] - self._clf.oob_prediction_ = params['oob_prediction_'] - self._clf.base_estimator_ = params['base_estimator_'] - self._clf.estimator_params = params['estimator_params'] - self._clf.class_weight = params['class_weight'] - self._clf.base_estimator = params['base_estimator'] - self._input_column_names = params['input_column_names'] - self._training_indices = params['training_indices_'] - self._target_names = params['target_names_'] - self._target_column_indices = params['target_column_indices_'] - self._target_columns_metadata = params['target_columns_metadata_'] - - if params['estimators_'] is not None: - self._fitted = True - if params['n_features_'] is not None: - self._fitted = True - if params['n_outputs_'] is not None: - self._fitted = True - if params['oob_score_'] is not None: - self._fitted = True - if params['oob_prediction_'] is not None: - self._fitted = True - if params['base_estimator_'] is not None: - self._fitted = True - if params['estimator_params'] is not None: - self._fitted = True - if params['class_weight'] is not None: - self._fitted = True - if params['base_estimator'] is not None: - self._fitted = True - - - - - - def produce_feature_importances(self, *, timeout: float = None, iterations: int = None) -> CallResult[d3m_dataframe]: - output = d3m_dataframe(self._clf.feature_importances_.reshape((1, len(self._input_column_names)))) - output.columns = self._input_column_names - for i in range(len(self._input_column_names)): - output.metadata = output.metadata.update_column(i, {"name": self._input_column_names[i]}) - return CallResult(output) - - @classmethod - def _get_columns_to_fit(cls, inputs: Inputs, hyperparams: Hyperparams): - if not hyperparams['use_semantic_types']: - return inputs, list(range(len(inputs.columns))) - - inputs_metadata = inputs.metadata - - def can_produce_column(column_index: int) -> bool: - return cls._can_produce_column(inputs_metadata, column_index, hyperparams) - - columns_to_produce, columns_not_to_produce = base_utils.get_columns_to_use(inputs_metadata, - use_columns=hyperparams['use_inputs_columns'], - exclude_columns=hyperparams['exclude_inputs_columns'], - can_use_column=can_produce_column) - return inputs.iloc[:, columns_to_produce], columns_to_produce - # return columns_to_produce - - @classmethod - def _can_produce_column(cls, inputs_metadata: metadata_base.DataMetadata, column_index: int, hyperparams: Hyperparams) -> bool: - column_metadata = inputs_metadata.query((metadata_base.ALL_ELEMENTS, column_index)) - - accepted_structural_types = (int, float, numpy.integer, numpy.float64) - accepted_semantic_types = set() - accepted_semantic_types.add("https://metadata.datadrivendiscovery.org/types/Attribute") - if not issubclass(column_metadata['structural_type'], accepted_structural_types): - return False - - semantic_types = set(column_metadata.get('semantic_types', [])) - - if len(semantic_types) == 0: - cls.logger.warning("No semantic types found in column metadata") - return False - # Making sure all accepted_semantic_types are available in semantic_types - if len(accepted_semantic_types - semantic_types) == 0: - return True - - return False - - @classmethod - def _get_targets(cls, data: d3m_dataframe, hyperparams: Hyperparams): - if not hyperparams['use_semantic_types']: - return data, list(data.columns), list(range(len(data.columns))) - - metadata = data.metadata - - def can_produce_column(column_index: int) -> bool: - accepted_semantic_types = set() - accepted_semantic_types.add("https://metadata.datadrivendiscovery.org/types/TrueTarget") - column_metadata = metadata.query((metadata_base.ALL_ELEMENTS, column_index)) - semantic_types = set(column_metadata.get('semantic_types', [])) - if len(semantic_types) == 0: - cls.logger.warning("No semantic types found in column metadata") - return False - # Making sure all accepted_semantic_types are available in semantic_types - if len(accepted_semantic_types - semantic_types) == 0: - return True - return False - - target_column_indices, target_columns_not_to_produce = base_utils.get_columns_to_use(metadata, - use_columns=hyperparams[ - 'use_outputs_columns'], - exclude_columns= - hyperparams[ - 'exclude_outputs_columns'], - can_use_column=can_produce_column) - targets = [] - if target_column_indices: - targets = data.select_columns(target_column_indices) - target_column_names = [] - for idx in target_column_indices: - target_column_names.append(data.columns[idx]) - return targets, target_column_names, target_column_indices - - @classmethod - def _get_target_columns_metadata(cls, outputs_metadata: metadata_base.DataMetadata, hyperparams) -> List[OrderedDict]: - outputs_length = outputs_metadata.query((metadata_base.ALL_ELEMENTS,))['dimension']['length'] - - target_columns_metadata: List[OrderedDict] = [] - for column_index in range(outputs_length): - column_metadata = OrderedDict(outputs_metadata.query_column(column_index)) - - # Update semantic types and prepare it for predicted targets. - semantic_types = set(column_metadata.get('semantic_types', [])) - semantic_types_to_remove = set(["https://metadata.datadrivendiscovery.org/types/TrueTarget","https://metadata.datadrivendiscovery.org/types/SuggestedTarget",]) - add_semantic_types = set(["https://metadata.datadrivendiscovery.org/types/PredictedTarget",]) - add_semantic_types.add(hyperparams["return_semantic_type"]) - semantic_types = semantic_types - semantic_types_to_remove - semantic_types = semantic_types.union(add_semantic_types) - column_metadata['semantic_types'] = list(semantic_types) - - target_columns_metadata.append(column_metadata) - - return target_columns_metadata - - @classmethod - def _update_predictions_metadata(cls, inputs_metadata: metadata_base.DataMetadata, outputs: Optional[Outputs], - target_columns_metadata: List[OrderedDict]) -> metadata_base.DataMetadata: - outputs_metadata = metadata_base.DataMetadata().generate(value=outputs) - - for column_index, column_metadata in enumerate(target_columns_metadata): - column_metadata.pop("structural_type", None) - outputs_metadata = outputs_metadata.update_column(column_index, column_metadata) - - return outputs_metadata - - def _wrap_predictions(self, inputs: Inputs, predictions: ndarray) -> Outputs: - outputs = d3m_dataframe(predictions, generate_metadata=False) - outputs.metadata = self._update_predictions_metadata(inputs.metadata, outputs, self._target_columns_metadata) - return outputs - - - @classmethod - def _add_target_columns_metadata(cls, outputs_metadata: metadata_base.DataMetadata): - outputs_length = outputs_metadata.query((metadata_base.ALL_ELEMENTS,))['dimension']['length'] - - target_columns_metadata: List[OrderedDict] = [] - for column_index in range(outputs_length): - column_metadata = OrderedDict() - semantic_types = [] - semantic_types.append('https://metadata.datadrivendiscovery.org/types/PredictedTarget') - column_name = outputs_metadata.query((metadata_base.ALL_ELEMENTS, column_index)).get("name") - if column_name is None: - column_name = "output_{}".format(column_index) - column_metadata["semantic_types"] = semantic_types - column_metadata["name"] = str(column_name) - target_columns_metadata.append(column_metadata) - - return target_columns_metadata - - -SKExtraTreesRegressor.__doc__ = ExtraTreesRegressor.__doc__ \ No newline at end of file diff --git a/common-primitives/sklearn-wrap/sklearn_wrap/SKFastICA.py b/common-primitives/sklearn-wrap/sklearn_wrap/SKFastICA.py deleted file mode 100644 index f160a02..0000000 --- a/common-primitives/sklearn-wrap/sklearn_wrap/SKFastICA.py +++ /dev/null @@ -1,439 +0,0 @@ -from typing import Any, Callable, List, Dict, Union, Optional, Sequence, Tuple -from numpy import ndarray -from collections import OrderedDict -from scipy import sparse -import os -import sklearn -import numpy -import typing - -# Custom import commands if any -from sklearn.decomposition.fastica_ import FastICA - - -from d3m.container.numpy import ndarray as d3m_ndarray -from d3m.container import DataFrame as d3m_dataframe -from d3m.metadata import hyperparams, params, base as metadata_base -from d3m import utils -from d3m.base import utils as base_utils -from d3m.exceptions import PrimitiveNotFittedError -from d3m.primitive_interfaces.base import CallResult, DockerContainer -from d3m.primitive_interfaces.unsupervised_learning import UnsupervisedLearnerPrimitiveBase - - -Inputs = d3m_dataframe -Outputs = d3m_dataframe - - -class Params(params.Params): - n_iter_: Optional[int] - mixing_: Optional[ndarray] - components_: Optional[ndarray] - mean_: Optional[ndarray] - whitening_: Optional[ndarray] - input_column_names: Optional[Any] - target_names_: Optional[Sequence[Any]] - training_indices_: Optional[Sequence[int]] - target_column_indices_: Optional[Sequence[int]] - target_columns_metadata_: Optional[List[OrderedDict]] - - - -class Hyperparams(hyperparams.Hyperparams): - n_components = hyperparams.Union( - configuration=OrderedDict({ - 'int': hyperparams.Bounded[int]( - lower=0, - upper=None, - default=0, - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'], - ), - 'none': hyperparams.Constant( - default=None, - description='All components are used.', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'], - ) - }), - default='none', - description='Number of components to extract. If None no dimension reduction is performed.', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] - ) - algorithm = hyperparams.Enumeration[str]( - default='parallel', - values=['parallel', 'deflation'], - description='Apply a parallel or deflational FASTICA algorithm.', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] - ) - whiten = hyperparams.UniformBool( - default=True, - description='If True perform an initial whitening of the data. If False, the data is assumed to have already been preprocessed: it should be centered, normed and white. Otherwise you will get incorrect results. In this case the parameter n_components will be ignored.', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] - ) - fun = hyperparams.Choice( - choices={ - 'logcosh': hyperparams.Hyperparams.define( - configuration=OrderedDict({ - 'alpha': hyperparams.Hyperparameter[float]( - default=1, - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] - ) - }) - ), - 'exp': hyperparams.Hyperparams.define( - configuration=OrderedDict({}) - ), - 'cube': hyperparams.Hyperparams.define( - configuration=OrderedDict({}) - ) - }, - default='logcosh', - description='The functional form of the G function used in the approximation to neg-entropy. Could be either \'logcosh\', \'exp\', or \'cube\'. You can also provide your own function. It should return a tuple containing the value of the function, and of its derivative, in the point. Example: def my_g(x): return x ** 3, 3 * x ** 2', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] - ) - max_iter = hyperparams.Bounded[int]( - default=200, - lower=0, - upper=None, - description='Maximum number of iterations to perform. tol: float, optional A positive scalar giving the tolerance at which the un-mixing matrix is considered to have converged.', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] - ) - tol = hyperparams.Bounded[float]( - default=0.0001, - lower=0, - upper=None, - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] - ) - w_init = hyperparams.Union( - configuration=OrderedDict({ - 'ndarray': hyperparams.Hyperparameter[ndarray]( - default=numpy.array([]), - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'], - ), - 'none': hyperparams.Constant( - default=None, - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'], - ) - }), - default='none', - description='Initial un-mixing array of dimension (n.comp,n.comp). If None (default) then an array of normal r.v.\'s is used.', - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'] - ) - - use_columns = hyperparams.Set( - elements=hyperparams.Hyperparameter[int](-1), - default=(), - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="A set of column indices to force primitive to operate on. If any specified column cannot be parsed, it is skipped.", - ) - exclude_columns = hyperparams.Set( - elements=hyperparams.Hyperparameter[int](-1), - default=(), - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="A set of column indices to not operate on. Applicable only if \"use_columns\" is not provided.", - ) - return_result = hyperparams.Enumeration( - values=['append', 'replace', 'new'], - default='new', - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="Should parsed columns be appended, should they replace original columns, or should only parsed columns be returned? This hyperparam is ignored if use_semantic_types is set to false.", - ) - use_semantic_types = hyperparams.UniformBool( - default=False, - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="Controls whether semantic_types metadata will be used for filtering columns in input dataframe. Setting this to false makes the code ignore return_result and will produce only the output dataframe" - ) - add_index_columns = hyperparams.UniformBool( - default=False, - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="Also include primary index columns if input data has them. Applicable only if \"return_result\" is set to \"new\".", - ) - error_on_no_input = hyperparams.UniformBool( - default=True, - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="Throw an exception if no input column is selected/provided. Defaults to true to behave like sklearn. To prevent pipelines from breaking set this to False.", - ) - - return_semantic_type = hyperparams.Enumeration[str]( - values=['https://metadata.datadrivendiscovery.org/types/Attribute', 'https://metadata.datadrivendiscovery.org/types/ConstructedAttribute'], - default='https://metadata.datadrivendiscovery.org/types/Attribute', - description='Decides what semantic type to attach to generated attributes', - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'] - ) - -class SKFastICA(UnsupervisedLearnerPrimitiveBase[Inputs, Outputs, Params, Hyperparams]): - """ - Primitive wrapping for sklearn FastICA - `sklearn documentation `_ - - """ - - __author__ = "JPL MARVIN" - metadata = metadata_base.PrimitiveMetadata({ - "algorithm_types": [metadata_base.PrimitiveAlgorithmType.PRINCIPAL_COMPONENT_ANALYSIS, ], - "name": "sklearn.decomposition.fastica_.FastICA", - "primitive_family": metadata_base.PrimitiveFamily.DATA_TRANSFORMATION, - "python_path": "d3m.primitives.data_transformation.fast_ica.SKlearn", - "source": {'name': 'JPL', 'contact': 'mailto:shah@jpl.nasa.gov', 'uris': ['https://gitlab.com/datadrivendiscovery/sklearn-wrap/issues', 'https://scikit-learn.org/stable/modules/generated/sklearn.decomposition.FastICA.html']}, - "version": "2019.11.13", - "id": "03633ffa-425e-37d4-9f1c-bbb552f1e995", - "hyperparams_to_tune": ['n_components', 'algorithm'], - 'installation': [ - {'type': metadata_base.PrimitiveInstallationType.PIP, - 'package_uri': 'git+https://gitlab.com/datadrivendiscovery/sklearn-wrap.git@{git_commit}#egg=sklearn_wrap'.format( - git_commit=utils.current_git_commit(os.path.dirname(__file__)), - ), - }] - }) - - def __init__(self, *, - hyperparams: Hyperparams, - random_seed: int = 0, - docker_containers: Dict[str, DockerContainer] = None) -> None: - - super().__init__(hyperparams=hyperparams, random_seed=random_seed, docker_containers=docker_containers) - - # False - self._clf = FastICA( - n_components=self.hyperparams['n_components'], - algorithm=self.hyperparams['algorithm'], - whiten=self.hyperparams['whiten'], - fun=self.hyperparams['fun']['choice'], - fun_args=self.hyperparams['fun'], - max_iter=self.hyperparams['max_iter'], - tol=self.hyperparams['tol'], - w_init=self.hyperparams['w_init'], - random_state=self.random_seed, - ) - - self._inputs = None - self._outputs = None - self._training_inputs = None - self._training_outputs = None - self._target_names = None - self._training_indices = None - self._target_column_indices = None - self._target_columns_metadata: List[OrderedDict] = None - self._input_column_names = None - self._fitted = False - - - def set_training_data(self, *, inputs: Inputs) -> None: - self._inputs = inputs - self._fitted = False - - def fit(self, *, timeout: float = None, iterations: int = None) -> CallResult[None]: - if self._fitted: - return CallResult(None) - - self._training_inputs, self._training_indices = self._get_columns_to_fit(self._inputs, self.hyperparams) - self._input_column_names = self._training_inputs.columns - - if self._training_inputs is None: - return CallResult(None) - - if len(self._training_indices) > 0: - self._clf.fit(self._training_inputs) - self._fitted = True - else: - if self.hyperparams['error_on_no_input']: - raise RuntimeError("No input columns were selected") - self.logger.warn("No input columns were selected") - return CallResult(None) - - def produce(self, *, inputs: Inputs, timeout: float = None, iterations: int = None) -> CallResult[Outputs]: - if not self._fitted: - raise PrimitiveNotFittedError("Primitive not fitted.") - sk_inputs = inputs - if self.hyperparams['use_semantic_types']: - sk_inputs = inputs.iloc[:, self._training_indices] - output_columns = [] - if len(self._training_indices) > 0: - sk_output = self._clf.transform(sk_inputs) - if sparse.issparse(sk_output): - sk_output = sk_output.toarray() - outputs = self._wrap_predictions(inputs, sk_output) - if len(outputs.columns) == len(self._input_column_names): - outputs.columns = self._input_column_names - output_columns = [outputs] - else: - if self.hyperparams['error_on_no_input']: - raise RuntimeError("No input columns were selected") - self.logger.warn("No input columns were selected") - outputs = base_utils.combine_columns(return_result=self.hyperparams['return_result'], - add_index_columns=self.hyperparams['add_index_columns'], - inputs=inputs, column_indices=self._training_indices, - columns_list=output_columns) - return CallResult(outputs) - - - def get_params(self) -> Params: - if not self._fitted: - return Params( - n_iter_=None, - mixing_=None, - components_=None, - mean_=None, - whitening_=None, - input_column_names=self._input_column_names, - training_indices_=self._training_indices, - target_names_=self._target_names, - target_column_indices_=self._target_column_indices, - target_columns_metadata_=self._target_columns_metadata - ) - - return Params( - n_iter_=getattr(self._clf, 'n_iter_', None), - mixing_=getattr(self._clf, 'mixing_', None), - components_=getattr(self._clf, 'components_', None), - mean_=getattr(self._clf, 'mean_', None), - whitening_=getattr(self._clf, 'whitening_', None), - input_column_names=self._input_column_names, - training_indices_=self._training_indices, - target_names_=self._target_names, - target_column_indices_=self._target_column_indices, - target_columns_metadata_=self._target_columns_metadata - ) - - def set_params(self, *, params: Params) -> None: - self._clf.n_iter_ = params['n_iter_'] - self._clf.mixing_ = params['mixing_'] - self._clf.components_ = params['components_'] - self._clf.mean_ = params['mean_'] - self._clf.whitening_ = params['whitening_'] - self._input_column_names = params['input_column_names'] - self._training_indices = params['training_indices_'] - self._target_names = params['target_names_'] - self._target_column_indices = params['target_column_indices_'] - self._target_columns_metadata = params['target_columns_metadata_'] - - if params['n_iter_'] is not None: - self._fitted = True - if params['mixing_'] is not None: - self._fitted = True - if params['components_'] is not None: - self._fitted = True - if params['mean_'] is not None: - self._fitted = True - if params['whitening_'] is not None: - self._fitted = True - - - - - - @classmethod - def _get_columns_to_fit(cls, inputs: Inputs, hyperparams: Hyperparams): - if not hyperparams['use_semantic_types']: - return inputs, list(range(len(inputs.columns))) - - inputs_metadata = inputs.metadata - - def can_produce_column(column_index: int) -> bool: - return cls._can_produce_column(inputs_metadata, column_index, hyperparams) - - columns_to_produce, columns_not_to_produce = base_utils.get_columns_to_use(inputs_metadata, - use_columns=hyperparams['use_columns'], - exclude_columns=hyperparams['exclude_columns'], - can_use_column=can_produce_column) - return inputs.iloc[:, columns_to_produce], columns_to_produce - # return columns_to_produce - - @classmethod - def _can_produce_column(cls, inputs_metadata: metadata_base.DataMetadata, column_index: int, hyperparams: Hyperparams) -> bool: - column_metadata = inputs_metadata.query((metadata_base.ALL_ELEMENTS, column_index)) - - accepted_structural_types = (int, float, numpy.integer, numpy.float64) - accepted_semantic_types = set() - accepted_semantic_types.add("https://metadata.datadrivendiscovery.org/types/Attribute") - if not issubclass(column_metadata['structural_type'], accepted_structural_types): - return False - - semantic_types = set(column_metadata.get('semantic_types', [])) - - if len(semantic_types) == 0: - cls.logger.warning("No semantic types found in column metadata") - return False - - # Making sure all accepted_semantic_types are available in semantic_types - if len(accepted_semantic_types - semantic_types) == 0: - return True - - return False - - - @classmethod - def _get_target_columns_metadata(cls, outputs_metadata: metadata_base.DataMetadata, hyperparams) -> List[OrderedDict]: - outputs_length = outputs_metadata.query((metadata_base.ALL_ELEMENTS,))['dimension']['length'] - - target_columns_metadata: List[OrderedDict] = [] - for column_index in range(outputs_length): - column_metadata = OrderedDict(outputs_metadata.query_column(column_index)) - - # Update semantic types and prepare it for predicted targets. - semantic_types = set(column_metadata.get('semantic_types', [])) - semantic_types_to_remove = set([]) - add_semantic_types = [] - add_semantic_types.add(hyperparams["return_semantic_type"]) - semantic_types = semantic_types - semantic_types_to_remove - semantic_types = semantic_types.union(add_semantic_types) - column_metadata['semantic_types'] = list(semantic_types) - - target_columns_metadata.append(column_metadata) - - return target_columns_metadata - - @classmethod - def _update_predictions_metadata(cls, inputs_metadata: metadata_base.DataMetadata, outputs: Optional[Outputs], - target_columns_metadata: List[OrderedDict]) -> metadata_base.DataMetadata: - outputs_metadata = metadata_base.DataMetadata().generate(value=outputs) - - for column_index, column_metadata in enumerate(target_columns_metadata): - column_metadata.pop("structural_type", None) - outputs_metadata = outputs_metadata.update_column(column_index, column_metadata) - - return outputs_metadata - - def _wrap_predictions(self, inputs: Inputs, predictions: ndarray) -> Outputs: - outputs = d3m_dataframe(predictions, generate_metadata=True) - target_columns_metadata = self._copy_inputs_metadata(inputs.metadata, self._training_indices, outputs.metadata, self.hyperparams) - outputs.metadata = self._update_predictions_metadata(inputs.metadata, outputs, target_columns_metadata) - return outputs - - - @classmethod - def _copy_inputs_metadata(cls, inputs_metadata: metadata_base.DataMetadata, input_indices: List[int], - outputs_metadata: metadata_base.DataMetadata, hyperparams): - outputs_length = outputs_metadata.query((metadata_base.ALL_ELEMENTS,))['dimension']['length'] - target_columns_metadata: List[OrderedDict] = [] - for column_index in input_indices: - column_name = inputs_metadata.query((metadata_base.ALL_ELEMENTS, column_index)).get("name") - if column_name is None: - column_name = "output_{}".format(column_index) - - column_metadata = OrderedDict(inputs_metadata.query_column(column_index)) - semantic_types = set(column_metadata.get('semantic_types', [])) - semantic_types_to_remove = set([]) - add_semantic_types = set() - add_semantic_types.add(hyperparams["return_semantic_type"]) - semantic_types = semantic_types - semantic_types_to_remove - semantic_types = semantic_types.union(add_semantic_types) - column_metadata['semantic_types'] = list(semantic_types) - - column_metadata["name"] = str(column_name) - target_columns_metadata.append(column_metadata) - - # If outputs has more columns than index, add Attribute Type to all remaining - if outputs_length > len(input_indices): - for column_index in range(len(input_indices), outputs_length): - column_metadata = OrderedDict() - semantic_types = set() - semantic_types.add(hyperparams["return_semantic_type"]) - column_name = "output_{}".format(column_index) - column_metadata["semantic_types"] = list(semantic_types) - column_metadata["name"] = str(column_name) - target_columns_metadata.append(column_metadata) - - return target_columns_metadata - - -SKFastICA.__doc__ = FastICA.__doc__ \ No newline at end of file diff --git a/common-primitives/sklearn-wrap/sklearn_wrap/SKFeatureAgglomeration.py b/common-primitives/sklearn-wrap/sklearn_wrap/SKFeatureAgglomeration.py deleted file mode 100644 index 36c1411..0000000 --- a/common-primitives/sklearn-wrap/sklearn_wrap/SKFeatureAgglomeration.py +++ /dev/null @@ -1,361 +0,0 @@ -from typing import Any, Callable, List, Dict, Union, Optional, Sequence, Tuple -from numpy import ndarray -from collections import OrderedDict -from scipy import sparse -import os -import sklearn -import numpy -import typing - -# Custom import commands if any -from sklearn.cluster.hierarchical import FeatureAgglomeration -from numpy import mean as npmean - - -from d3m.container.numpy import ndarray as d3m_ndarray -from d3m.container import DataFrame as d3m_dataframe -from d3m.metadata import hyperparams, params, base as metadata_base -from d3m import utils -from d3m.base import utils as base_utils -from d3m.exceptions import PrimitiveNotFittedError -from d3m.primitive_interfaces.base import CallResult, DockerContainer -from d3m.primitive_interfaces.unsupervised_learning import UnsupervisedLearnerPrimitiveBase - - -Inputs = d3m_dataframe -Outputs = d3m_dataframe - - -class Params(params.Params): - labels_: Optional[ndarray] - n_leaves_: Optional[int] - children_: Optional[ndarray] - input_column_names: Optional[Any] - target_names_: Optional[Sequence[Any]] - training_indices_: Optional[Sequence[int]] - target_column_indices_: Optional[Sequence[int]] - target_columns_metadata_: Optional[List[OrderedDict]] - - - -class Hyperparams(hyperparams.Hyperparams): - n_clusters = hyperparams.Bounded[int]( - default=2, - lower=0, - upper=None, - description='The number of clusters to find.', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] - ) - affinity = hyperparams.Enumeration[str]( - default='euclidean', - values=['euclidean', 'l1', 'l2', 'manhattan', 'cosine', 'precomputed'], - description='Metric used to compute the linkage. Can be "euclidean", "l1", "l2", "manhattan", "cosine", or \'precomputed\'. If linkage is "ward", only "euclidean" is accepted.', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] - ) - compute_full_tree = hyperparams.Union( - configuration=OrderedDict({ - 'auto': hyperparams.Constant( - default='auto', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'], - ), - 'bool': hyperparams.UniformBool( - default=False, - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'], - ) - }), - default='auto', - description='Stop early the construction of the tree at n_clusters. This is useful to decrease computation time if the number of clusters is not small compared to the number of features. This option is useful only when specifying a connectivity matrix. Note also that when varying the number of clusters and using caching, it may be advantageous to compute the full tree.', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] - ) - linkage = hyperparams.Enumeration[str]( - default='ward', - values=['ward', 'complete', 'average', 'single'], - description='Which linkage criterion to use. The linkage criterion determines which distance to use between sets of features. The algorithm will merge the pairs of cluster that minimize this criterion. - ward minimizes the variance of the clusters being merged. - average uses the average of the distances of each feature of the two sets. - complete or maximum linkage uses the maximum distances between all features of the two sets.', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] - ) - - use_columns = hyperparams.Set( - elements=hyperparams.Hyperparameter[int](-1), - default=(), - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="A set of column indices to force primitive to operate on. If any specified column cannot be parsed, it is skipped.", - ) - exclude_columns = hyperparams.Set( - elements=hyperparams.Hyperparameter[int](-1), - default=(), - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="A set of column indices to not operate on. Applicable only if \"use_columns\" is not provided.", - ) - return_result = hyperparams.Enumeration( - values=['append', 'replace', 'new'], - default='new', - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="Should parsed columns be appended, should they replace original columns, or should only parsed columns be returned? This hyperparam is ignored if use_semantic_types is set to false.", - ) - use_semantic_types = hyperparams.UniformBool( - default=False, - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="Controls whether semantic_types metadata will be used for filtering columns in input dataframe. Setting this to false makes the code ignore return_result and will produce only the output dataframe" - ) - add_index_columns = hyperparams.UniformBool( - default=False, - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="Also include primary index columns if input data has them. Applicable only if \"return_result\" is set to \"new\".", - ) - error_on_no_input = hyperparams.UniformBool( - default=True, - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="Throw an exception if no input column is selected/provided. Defaults to true to behave like sklearn. To prevent pipelines from breaking set this to False.", - ) - - return_semantic_type = hyperparams.Enumeration[str]( - values=['https://metadata.datadrivendiscovery.org/types/Attribute', 'https://metadata.datadrivendiscovery.org/types/ConstructedAttribute'], - default='https://metadata.datadrivendiscovery.org/types/Attribute', - description='Decides what semantic type to attach to generated attributes', - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'] - ) - -class SKFeatureAgglomeration(UnsupervisedLearnerPrimitiveBase[Inputs, Outputs, Params, Hyperparams]): - """ - Primitive wrapping for sklearn FeatureAgglomeration - `sklearn documentation `_ - - """ - - __author__ = "JPL MARVIN" - metadata = metadata_base.PrimitiveMetadata({ - "algorithm_types": [metadata_base.PrimitiveAlgorithmType.DATA_STREAM_CLUSTERING, ], - "name": "sklearn.cluster.hierarchical.FeatureAgglomeration", - "primitive_family": metadata_base.PrimitiveFamily.DATA_PREPROCESSING, - "python_path": "d3m.primitives.data_preprocessing.feature_agglomeration.SKlearn", - "source": {'name': 'JPL', 'contact': 'mailto:shah@jpl.nasa.gov', 'uris': ['https://gitlab.com/datadrivendiscovery/sklearn-wrap/issues', 'https://scikit-learn.org/stable/modules/generated/sklearn.cluster.FeatureAgglomeration.html']}, - "version": "2019.11.13", - "id": "f259b009-5e0f-37b1-b117-441aba2b65c8", - "hyperparams_to_tune": ['n_clusters', 'affinity', 'linkage'], - 'installation': [ - {'type': metadata_base.PrimitiveInstallationType.PIP, - 'package_uri': 'git+https://gitlab.com/datadrivendiscovery/sklearn-wrap.git@{git_commit}#egg=sklearn_wrap'.format( - git_commit=utils.current_git_commit(os.path.dirname(__file__)), - ), - }] - }) - - def __init__(self, *, - hyperparams: Hyperparams, - random_seed: int = 0, - docker_containers: Dict[str, DockerContainer] = None) -> None: - - super().__init__(hyperparams=hyperparams, random_seed=random_seed, docker_containers=docker_containers) - - # False - self._clf = FeatureAgglomeration( - n_clusters=self.hyperparams['n_clusters'], - affinity=self.hyperparams['affinity'], - compute_full_tree=self.hyperparams['compute_full_tree'], - linkage=self.hyperparams['linkage'], - ) - - self._inputs = None - self._outputs = None - self._training_inputs = None - self._training_outputs = None - self._target_names = None - self._training_indices = None - self._target_column_indices = None - self._target_columns_metadata: List[OrderedDict] = None - self._input_column_names = None - self._fitted = False - - - def set_training_data(self, *, inputs: Inputs) -> None: - self._inputs = inputs - self._fitted = False - - def fit(self, *, timeout: float = None, iterations: int = None) -> CallResult[None]: - if self._fitted: - return CallResult(None) - - self._training_inputs, self._training_indices = self._get_columns_to_fit(self._inputs, self.hyperparams) - self._input_column_names = self._training_inputs.columns - - if self._training_inputs is None: - return CallResult(None) - - if len(self._training_indices) > 0: - self._clf.fit(self._training_inputs) - self._fitted = True - else: - if self.hyperparams['error_on_no_input']: - raise RuntimeError("No input columns were selected") - self.logger.warn("No input columns were selected") - return CallResult(None) - - def produce(self, *, inputs: Inputs, timeout: float = None, iterations: int = None) -> CallResult[Outputs]: - if not self._fitted: - raise PrimitiveNotFittedError("Primitive not fitted.") - sk_inputs = inputs - if self.hyperparams['use_semantic_types']: - sk_inputs = inputs.iloc[:, self._training_indices] - output_columns = [] - if len(self._training_indices) > 0: - sk_output = self._clf.transform(sk_inputs) - if sparse.issparse(sk_output): - sk_output = sk_output.toarray() - outputs = self._wrap_predictions(inputs, sk_output) - if len(outputs.columns) == len(self._input_column_names): - outputs.columns = self._input_column_names - output_columns = [outputs] - else: - if self.hyperparams['error_on_no_input']: - raise RuntimeError("No input columns were selected") - self.logger.warn("No input columns were selected") - outputs = base_utils.combine_columns(return_result=self.hyperparams['return_result'], - add_index_columns=self.hyperparams['add_index_columns'], - inputs=inputs, column_indices=self._training_indices, - columns_list=output_columns) - return CallResult(outputs) - - - def get_params(self) -> Params: - if not self._fitted: - return Params( - labels_=None, - n_leaves_=None, - children_=None, - input_column_names=self._input_column_names, - training_indices_=self._training_indices, - target_names_=self._target_names, - target_column_indices_=self._target_column_indices, - target_columns_metadata_=self._target_columns_metadata - ) - - return Params( - labels_=getattr(self._clf, 'labels_', None), - n_leaves_=getattr(self._clf, 'n_leaves_', None), - children_=getattr(self._clf, 'children_', None), - input_column_names=self._input_column_names, - training_indices_=self._training_indices, - target_names_=self._target_names, - target_column_indices_=self._target_column_indices, - target_columns_metadata_=self._target_columns_metadata - ) - - def set_params(self, *, params: Params) -> None: - self._clf.labels_ = params['labels_'] - self._clf.n_leaves_ = params['n_leaves_'] - self._clf.children_ = params['children_'] - self._input_column_names = params['input_column_names'] - self._training_indices = params['training_indices_'] - self._target_names = params['target_names_'] - self._target_column_indices = params['target_column_indices_'] - self._target_columns_metadata = params['target_columns_metadata_'] - - if params['labels_'] is not None: - self._fitted = True - if params['n_leaves_'] is not None: - self._fitted = True - if params['children_'] is not None: - self._fitted = True - - - - - - @classmethod - def _get_columns_to_fit(cls, inputs: Inputs, hyperparams: Hyperparams): - if not hyperparams['use_semantic_types']: - return inputs, list(range(len(inputs.columns))) - - inputs_metadata = inputs.metadata - - def can_produce_column(column_index: int) -> bool: - return cls._can_produce_column(inputs_metadata, column_index, hyperparams) - - columns_to_produce, columns_not_to_produce = base_utils.get_columns_to_use(inputs_metadata, - use_columns=hyperparams['use_columns'], - exclude_columns=hyperparams['exclude_columns'], - can_use_column=can_produce_column) - return inputs.iloc[:, columns_to_produce], columns_to_produce - # return columns_to_produce - - @classmethod - def _can_produce_column(cls, inputs_metadata: metadata_base.DataMetadata, column_index: int, hyperparams: Hyperparams) -> bool: - column_metadata = inputs_metadata.query((metadata_base.ALL_ELEMENTS, column_index)) - - accepted_structural_types = (int, float, numpy.integer, numpy.float64) - accepted_semantic_types = set() - accepted_semantic_types.add("https://metadata.datadrivendiscovery.org/types/Attribute") - if not issubclass(column_metadata['structural_type'], accepted_structural_types): - return False - - semantic_types = set(column_metadata.get('semantic_types', [])) - - if len(semantic_types) == 0: - cls.logger.warning("No semantic types found in column metadata") - return False - - # Making sure all accepted_semantic_types are available in semantic_types - if len(accepted_semantic_types - semantic_types) == 0: - return True - - return False - - - @classmethod - def _get_target_columns_metadata(cls, outputs_metadata: metadata_base.DataMetadata, hyperparams) -> List[OrderedDict]: - outputs_length = outputs_metadata.query((metadata_base.ALL_ELEMENTS,))['dimension']['length'] - - target_columns_metadata: List[OrderedDict] = [] - for column_index in range(outputs_length): - column_metadata = OrderedDict(outputs_metadata.query_column(column_index)) - - # Update semantic types and prepare it for predicted targets. - semantic_types = set(column_metadata.get('semantic_types', [])) - semantic_types_to_remove = set([]) - add_semantic_types = [] - add_semantic_types.add(hyperparams["return_semantic_type"]) - semantic_types = semantic_types - semantic_types_to_remove - semantic_types = semantic_types.union(add_semantic_types) - column_metadata['semantic_types'] = list(semantic_types) - - target_columns_metadata.append(column_metadata) - - return target_columns_metadata - - @classmethod - def _update_predictions_metadata(cls, inputs_metadata: metadata_base.DataMetadata, outputs: Optional[Outputs], - target_columns_metadata: List[OrderedDict]) -> metadata_base.DataMetadata: - outputs_metadata = metadata_base.DataMetadata().generate(value=outputs) - - for column_index, column_metadata in enumerate(target_columns_metadata): - column_metadata.pop("structural_type", None) - outputs_metadata = outputs_metadata.update_column(column_index, column_metadata) - - return outputs_metadata - - def _wrap_predictions(self, inputs: Inputs, predictions: ndarray) -> Outputs: - outputs = d3m_dataframe(predictions, generate_metadata=True) - target_columns_metadata = self._add_target_columns_metadata(outputs.metadata, self.hyperparams) - outputs.metadata = self._update_predictions_metadata(inputs.metadata, outputs, target_columns_metadata) - return outputs - - - @classmethod - def _add_target_columns_metadata(cls, outputs_metadata: metadata_base.DataMetadata, hyperparams): - - outputs_length = outputs_metadata.query((metadata_base.ALL_ELEMENTS,))['dimension']['length'] - target_columns_metadata: List[OrderedDict] = [] - for column_index in range(outputs_length): - column_name = "output_{}".format(column_index) - column_metadata = OrderedDict() - semantic_types = set() - semantic_types.add(hyperparams["return_semantic_type"]) - column_metadata['semantic_types'] = list(semantic_types) - - column_metadata["name"] = str(column_name) - target_columns_metadata.append(column_metadata) - - return target_columns_metadata - - -SKFeatureAgglomeration.__doc__ = FeatureAgglomeration.__doc__ \ No newline at end of file diff --git a/common-primitives/sklearn-wrap/sklearn_wrap/SKGaussianNB.py b/common-primitives/sklearn-wrap/sklearn_wrap/SKGaussianNB.py deleted file mode 100644 index d132e05..0000000 --- a/common-primitives/sklearn-wrap/sklearn_wrap/SKGaussianNB.py +++ /dev/null @@ -1,492 +0,0 @@ -from typing import Any, Callable, List, Dict, Union, Optional, Sequence, Tuple -from numpy import ndarray -from collections import OrderedDict -from scipy import sparse -import os -import sklearn -import numpy -import typing - -# Custom import commands if any -from sklearn.naive_bayes import GaussianNB - - -from d3m.container.numpy import ndarray as d3m_ndarray -from d3m.container import DataFrame as d3m_dataframe -from d3m.metadata import hyperparams, params, base as metadata_base -from d3m import utils -from d3m.base import utils as base_utils -from d3m.exceptions import PrimitiveNotFittedError -from d3m.primitive_interfaces.base import CallResult, DockerContainer - -from d3m.primitive_interfaces.supervised_learning import SupervisedLearnerPrimitiveBase -from d3m.primitive_interfaces.base import ProbabilisticCompositionalityMixin, ContinueFitMixin -from d3m import exceptions -import pandas - - - -Inputs = d3m_dataframe -Outputs = d3m_dataframe - - -class Params(params.Params): - class_prior_: Optional[ndarray] - class_count_: Optional[ndarray] - theta_: Optional[ndarray] - sigma_: Optional[ndarray] - classes_: Optional[ndarray] - epsilon_: Optional[float] - input_column_names: Optional[Any] - target_names_: Optional[Sequence[Any]] - training_indices_: Optional[Sequence[int]] - target_column_indices_: Optional[Sequence[int]] - target_columns_metadata_: Optional[List[OrderedDict]] - - - -class Hyperparams(hyperparams.Hyperparams): - var_smoothing = hyperparams.Bounded[float]( - lower=0, - upper=None, - default=1e-09, - description='Portion of the largest variance of all features that is added to variances for calculation stability.', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] - ) - - use_inputs_columns = hyperparams.Set( - elements=hyperparams.Hyperparameter[int](-1), - default=(), - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="A set of column indices to force primitive to use as training input. If any specified column cannot be parsed, it is skipped.", - ) - use_outputs_columns = hyperparams.Set( - elements=hyperparams.Hyperparameter[int](-1), - default=(), - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="A set of column indices to force primitive to use as training target. If any specified column cannot be parsed, it is skipped.", - ) - exclude_inputs_columns = hyperparams.Set( - elements=hyperparams.Hyperparameter[int](-1), - default=(), - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="A set of column indices to not use as training inputs. Applicable only if \"use_columns\" is not provided.", - ) - exclude_outputs_columns = hyperparams.Set( - elements=hyperparams.Hyperparameter[int](-1), - default=(), - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="A set of column indices to not use as training target. Applicable only if \"use_columns\" is not provided.", - ) - return_result = hyperparams.Enumeration( - values=['append', 'replace', 'new'], - default='new', - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="Should parsed columns be appended, should they replace original columns, or should only parsed columns be returned? This hyperparam is ignored if use_semantic_types is set to false.", - ) - use_semantic_types = hyperparams.UniformBool( - default=False, - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="Controls whether semantic_types metadata will be used for filtering columns in input dataframe. Setting this to false makes the code ignore return_result and will produce only the output dataframe" - ) - add_index_columns = hyperparams.UniformBool( - default=False, - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="Also include primary index columns if input data has them. Applicable only if \"return_result\" is set to \"new\".", - ) - error_on_no_input = hyperparams.UniformBool( - default=True, - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="Throw an exception if no input column is selected/provided. Defaults to true to behave like sklearn. To prevent pipelines from breaking set this to False.", - ) - - return_semantic_type = hyperparams.Enumeration[str]( - values=['https://metadata.datadrivendiscovery.org/types/Attribute', 'https://metadata.datadrivendiscovery.org/types/ConstructedAttribute', 'https://metadata.datadrivendiscovery.org/types/PredictedTarget'], - default='https://metadata.datadrivendiscovery.org/types/PredictedTarget', - description='Decides what semantic type to attach to generated output', - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'] - ) - -class SKGaussianNB(SupervisedLearnerPrimitiveBase[Inputs, Outputs, Params, Hyperparams], - ProbabilisticCompositionalityMixin[Inputs, Outputs, Params, Hyperparams], - ContinueFitMixin[Inputs, Outputs, Params, Hyperparams]): - """ - Primitive wrapping for sklearn GaussianNB - `sklearn documentation `_ - - """ - - __author__ = "JPL MARVIN" - metadata = metadata_base.PrimitiveMetadata({ - "algorithm_types": [metadata_base.PrimitiveAlgorithmType.NAIVE_BAYES_CLASSIFIER, ], - "name": "sklearn.naive_bayes.GaussianNB", - "primitive_family": metadata_base.PrimitiveFamily.CLASSIFICATION, - "python_path": "d3m.primitives.classification.gaussian_naive_bayes.SKlearn", - "source": {'name': 'JPL', 'contact': 'mailto:shah@jpl.nasa.gov', 'uris': ['https://gitlab.com/datadrivendiscovery/sklearn-wrap/issues', 'https://scikit-learn.org/stable/modules/generated/sklearn.naive_bayes.GaussianNB.html']}, - "version": "2019.11.13", - "id": "464783a8-771e-340d-999b-ae90b9f84f0b", - "hyperparams_to_tune": ['var_smoothing'], - 'installation': [ - {'type': metadata_base.PrimitiveInstallationType.PIP, - 'package_uri': 'git+https://gitlab.com/datadrivendiscovery/sklearn-wrap.git@{git_commit}#egg=sklearn_wrap'.format( - git_commit=utils.current_git_commit(os.path.dirname(__file__)), - ), - }] - }) - - def __init__(self, *, - hyperparams: Hyperparams, - random_seed: int = 0, - docker_containers: Dict[str, DockerContainer] = None, - _priors: Union[ndarray, None] = None) -> None: - - super().__init__(hyperparams=hyperparams, random_seed=random_seed, docker_containers=docker_containers) - - # False - self._clf = GaussianNB( - var_smoothing=self.hyperparams['var_smoothing'], - priors=_priors - ) - - self._inputs = None - self._outputs = None - self._training_inputs = None - self._training_outputs = None - self._target_names = None - self._training_indices = None - self._target_column_indices = None - self._target_columns_metadata: List[OrderedDict] = None - self._input_column_names = None - self._fitted = False - self._new_training_data = False - - def set_training_data(self, *, inputs: Inputs, outputs: Outputs) -> None: - self._inputs = inputs - self._outputs = outputs - self._fitted = False - self._new_training_data = True - - def fit(self, *, timeout: float = None, iterations: int = None) -> CallResult[None]: - if self._inputs is None or self._outputs is None: - raise ValueError("Missing training data.") - - if not self._new_training_data: - return CallResult(None) - self._new_training_data = False - - self._training_inputs, self._training_indices = self._get_columns_to_fit(self._inputs, self.hyperparams) - self._training_outputs, self._target_names, self._target_column_indices = self._get_targets(self._outputs, self.hyperparams) - self._input_column_names = self._training_inputs.columns - - if len(self._training_indices) > 0 and len(self._target_column_indices) > 0: - self._target_columns_metadata = self._get_target_columns_metadata(self._training_outputs.metadata, self.hyperparams) - sk_training_output = self._training_outputs.values - - shape = sk_training_output.shape - if len(shape) == 2 and shape[1] == 1: - sk_training_output = numpy.ravel(sk_training_output) - - self._clf.fit(self._training_inputs, sk_training_output) - self._fitted = True - else: - if self.hyperparams['error_on_no_input']: - raise RuntimeError("No input columns were selected") - self.logger.warn("No input columns were selected") - - return CallResult(None) - - def continue_fit(self, *, timeout: float = None, iterations: int = None) -> CallResult[None]: - if self._training_inputs is None or self._training_outputs is None: - raise ValueError("Missing training data.") - - if not self._new_training_data: - return CallResult(None) - self._new_training_data = False - - if len(self._training_indices) > 0 and len(self._target_column_indices) > 0: - self._target_columns_metadata = self._get_target_columns_metadata(self._training_outputs.metadata, self.hyperparams) - sk_training_output = self._training_outputs.values - - shape = sk_training_output.shape - if len(shape) == 2 and shape[1] == 1: - sk_training_output = numpy.ravel(sk_training_output) - - self._clf.partial_fit(self._training_inputs, sk_training_output) - self._fitted = True - else: - if self.hyperparams['error_on_no_input']: - raise RuntimeError("No input columns were selected") - self.logger.warn("No input columns were selected") - - return CallResult(None) - - - def produce(self, *, inputs: Inputs, timeout: float = None, iterations: int = None) -> CallResult[Outputs]: - sk_inputs, columns_to_use = self._get_columns_to_fit(inputs, self.hyperparams) - output = [] - if len(sk_inputs.columns): - try: - sk_output = self._clf.predict(sk_inputs) - except sklearn.exceptions.NotFittedError as error: - raise PrimitiveNotFittedError("Primitive not fitted.") from error - # For primitives that allow predicting without fitting like GaussianProcessRegressor - if not self._fitted: - raise PrimitiveNotFittedError("Primitive not fitted.") - if sparse.issparse(sk_output): - sk_output = sk_output.toarray() - output = self._wrap_predictions(inputs, sk_output) - output.columns = self._target_names - output = [output] - else: - if self.hyperparams['error_on_no_input']: - raise RuntimeError("No input columns were selected") - self.logger.warn("No input columns were selected") - outputs = base_utils.combine_columns(return_result=self.hyperparams['return_result'], - add_index_columns=self.hyperparams['add_index_columns'], - inputs=inputs, column_indices=self._target_column_indices, - columns_list=output) - - return CallResult(outputs) - - - def get_params(self) -> Params: - if not self._fitted: - return Params( - class_prior_=None, - class_count_=None, - theta_=None, - sigma_=None, - classes_=None, - epsilon_=None, - input_column_names=self._input_column_names, - training_indices_=self._training_indices, - target_names_=self._target_names, - target_column_indices_=self._target_column_indices, - target_columns_metadata_=self._target_columns_metadata - ) - - return Params( - class_prior_=getattr(self._clf, 'class_prior_', None), - class_count_=getattr(self._clf, 'class_count_', None), - theta_=getattr(self._clf, 'theta_', None), - sigma_=getattr(self._clf, 'sigma_', None), - classes_=getattr(self._clf, 'classes_', None), - epsilon_=getattr(self._clf, 'epsilon_', None), - input_column_names=self._input_column_names, - training_indices_=self._training_indices, - target_names_=self._target_names, - target_column_indices_=self._target_column_indices, - target_columns_metadata_=self._target_columns_metadata - ) - - def set_params(self, *, params: Params) -> None: - self._clf.class_prior_ = params['class_prior_'] - self._clf.class_count_ = params['class_count_'] - self._clf.theta_ = params['theta_'] - self._clf.sigma_ = params['sigma_'] - self._clf.classes_ = params['classes_'] - self._clf.epsilon_ = params['epsilon_'] - self._input_column_names = params['input_column_names'] - self._training_indices = params['training_indices_'] - self._target_names = params['target_names_'] - self._target_column_indices = params['target_column_indices_'] - self._target_columns_metadata = params['target_columns_metadata_'] - - if params['class_prior_'] is not None: - self._fitted = True - if params['class_count_'] is not None: - self._fitted = True - if params['theta_'] is not None: - self._fitted = True - if params['sigma_'] is not None: - self._fitted = True - if params['classes_'] is not None: - self._fitted = True - if params['epsilon_'] is not None: - self._fitted = True - - - def log_likelihoods(self, *, - outputs: Outputs, - inputs: Inputs, - timeout: float = None, - iterations: int = None) -> CallResult[Sequence[float]]: - inputs = inputs.iloc[:, self._training_indices] # Get ndarray - outputs = outputs.iloc[:, self._target_column_indices] - - if len(inputs.columns) and len(outputs.columns): - - if outputs.shape[1] != self._clf.n_outputs_: - raise exceptions.InvalidArgumentValueError("\"outputs\" argument does not have the correct number of target columns.") - - log_proba = self._clf.predict_log_proba(inputs) - - # Making it always a list, even when only one target. - if self._clf.n_outputs_ == 1: - log_proba = [log_proba] - classes = [self._clf.classes_] - else: - classes = self._clf.classes_ - - samples_length = inputs.shape[0] - - log_likelihoods = [] - for k in range(self._clf.n_outputs_): - # We have to map each class to its internal (numerical) index used in the learner. - # This allows "outputs" to contain string classes. - outputs_column = outputs.iloc[:, k] - classes_map = pandas.Series(numpy.arange(len(classes[k])), index=classes[k]) - mapped_outputs_column = outputs_column.map(classes_map) - - # For each target column (column in "outputs"), for each sample (row) we pick the log - # likelihood for a given class. - log_likelihoods.append(log_proba[k][numpy.arange(samples_length), mapped_outputs_column]) - - results = d3m_dataframe(dict(enumerate(log_likelihoods)), generate_metadata=True) - results.columns = outputs.columns - - for k in range(self._clf.n_outputs_): - column_metadata = outputs.metadata.query_column(k) - if 'name' in column_metadata: - results.metadata = results.metadata.update_column(k, {'name': column_metadata['name']}) - - else: - results = d3m_dataframe(generate_metadata=True) - - return CallResult(results) - - - - - - @classmethod - def _get_columns_to_fit(cls, inputs: Inputs, hyperparams: Hyperparams): - if not hyperparams['use_semantic_types']: - return inputs, list(range(len(inputs.columns))) - - inputs_metadata = inputs.metadata - - def can_produce_column(column_index: int) -> bool: - return cls._can_produce_column(inputs_metadata, column_index, hyperparams) - - columns_to_produce, columns_not_to_produce = base_utils.get_columns_to_use(inputs_metadata, - use_columns=hyperparams['use_inputs_columns'], - exclude_columns=hyperparams['exclude_inputs_columns'], - can_use_column=can_produce_column) - return inputs.iloc[:, columns_to_produce], columns_to_produce - # return columns_to_produce - - @classmethod - def _can_produce_column(cls, inputs_metadata: metadata_base.DataMetadata, column_index: int, hyperparams: Hyperparams) -> bool: - column_metadata = inputs_metadata.query((metadata_base.ALL_ELEMENTS, column_index)) - - accepted_structural_types = (int, float, numpy.integer, numpy.float64) - accepted_semantic_types = set() - accepted_semantic_types.add("https://metadata.datadrivendiscovery.org/types/Attribute") - if not issubclass(column_metadata['structural_type'], accepted_structural_types): - return False - - semantic_types = set(column_metadata.get('semantic_types', [])) - - if len(semantic_types) == 0: - cls.logger.warning("No semantic types found in column metadata") - return False - # Making sure all accepted_semantic_types are available in semantic_types - if len(accepted_semantic_types - semantic_types) == 0: - return True - - return False - - @classmethod - def _get_targets(cls, data: d3m_dataframe, hyperparams: Hyperparams): - if not hyperparams['use_semantic_types']: - return data, list(data.columns), list(range(len(data.columns))) - - metadata = data.metadata - - def can_produce_column(column_index: int) -> bool: - accepted_semantic_types = set() - accepted_semantic_types.add("https://metadata.datadrivendiscovery.org/types/TrueTarget") - column_metadata = metadata.query((metadata_base.ALL_ELEMENTS, column_index)) - semantic_types = set(column_metadata.get('semantic_types', [])) - if len(semantic_types) == 0: - cls.logger.warning("No semantic types found in column metadata") - return False - # Making sure all accepted_semantic_types are available in semantic_types - if len(accepted_semantic_types - semantic_types) == 0: - return True - return False - - target_column_indices, target_columns_not_to_produce = base_utils.get_columns_to_use(metadata, - use_columns=hyperparams[ - 'use_outputs_columns'], - exclude_columns= - hyperparams[ - 'exclude_outputs_columns'], - can_use_column=can_produce_column) - targets = [] - if target_column_indices: - targets = data.select_columns(target_column_indices) - target_column_names = [] - for idx in target_column_indices: - target_column_names.append(data.columns[idx]) - return targets, target_column_names, target_column_indices - - @classmethod - def _get_target_columns_metadata(cls, outputs_metadata: metadata_base.DataMetadata, hyperparams) -> List[OrderedDict]: - outputs_length = outputs_metadata.query((metadata_base.ALL_ELEMENTS,))['dimension']['length'] - - target_columns_metadata: List[OrderedDict] = [] - for column_index in range(outputs_length): - column_metadata = OrderedDict(outputs_metadata.query_column(column_index)) - - # Update semantic types and prepare it for predicted targets. - semantic_types = set(column_metadata.get('semantic_types', [])) - semantic_types_to_remove = set(["https://metadata.datadrivendiscovery.org/types/TrueTarget","https://metadata.datadrivendiscovery.org/types/SuggestedTarget",]) - add_semantic_types = set(["https://metadata.datadrivendiscovery.org/types/PredictedTarget",]) - add_semantic_types.add(hyperparams["return_semantic_type"]) - semantic_types = semantic_types - semantic_types_to_remove - semantic_types = semantic_types.union(add_semantic_types) - column_metadata['semantic_types'] = list(semantic_types) - - target_columns_metadata.append(column_metadata) - - return target_columns_metadata - - @classmethod - def _update_predictions_metadata(cls, inputs_metadata: metadata_base.DataMetadata, outputs: Optional[Outputs], - target_columns_metadata: List[OrderedDict]) -> metadata_base.DataMetadata: - outputs_metadata = metadata_base.DataMetadata().generate(value=outputs) - - for column_index, column_metadata in enumerate(target_columns_metadata): - column_metadata.pop("structural_type", None) - outputs_metadata = outputs_metadata.update_column(column_index, column_metadata) - - return outputs_metadata - - def _wrap_predictions(self, inputs: Inputs, predictions: ndarray) -> Outputs: - outputs = d3m_dataframe(predictions, generate_metadata=False) - outputs.metadata = self._update_predictions_metadata(inputs.metadata, outputs, self._target_columns_metadata) - return outputs - - - @classmethod - def _add_target_columns_metadata(cls, outputs_metadata: metadata_base.DataMetadata): - outputs_length = outputs_metadata.query((metadata_base.ALL_ELEMENTS,))['dimension']['length'] - - target_columns_metadata: List[OrderedDict] = [] - for column_index in range(outputs_length): - column_metadata = OrderedDict() - semantic_types = [] - semantic_types.append('https://metadata.datadrivendiscovery.org/types/PredictedTarget') - column_name = outputs_metadata.query((metadata_base.ALL_ELEMENTS, column_index)).get("name") - if column_name is None: - column_name = "output_{}".format(column_index) - column_metadata["semantic_types"] = semantic_types - column_metadata["name"] = str(column_name) - target_columns_metadata.append(column_metadata) - - return target_columns_metadata - - -SKGaussianNB.__doc__ = GaussianNB.__doc__ \ No newline at end of file diff --git a/common-primitives/sklearn-wrap/sklearn_wrap/SKGaussianProcessRegressor.py b/common-primitives/sklearn-wrap/sklearn_wrap/SKGaussianProcessRegressor.py deleted file mode 100644 index ff8417e..0000000 --- a/common-primitives/sklearn-wrap/sklearn_wrap/SKGaussianProcessRegressor.py +++ /dev/null @@ -1,463 +0,0 @@ -from typing import Any, Callable, List, Dict, Union, Optional, Sequence, Tuple -from numpy import ndarray -from collections import OrderedDict -from scipy import sparse -import os -import sklearn -import numpy -import typing - -# Custom import commands if any -from sklearn.gaussian_process.gpr import GaussianProcessRegressor - - -from d3m.container.numpy import ndarray as d3m_ndarray -from d3m.container import DataFrame as d3m_dataframe -from d3m.metadata import hyperparams, params, base as metadata_base -from d3m import utils -from d3m.base import utils as base_utils -from d3m.exceptions import PrimitiveNotFittedError -from d3m.primitive_interfaces.base import CallResult, DockerContainer - -from d3m.primitive_interfaces.supervised_learning import SupervisedLearnerPrimitiveBase -from d3m.primitive_interfaces.base import ProbabilisticCompositionalityMixin, ContinueFitMixin -from d3m import exceptions -import pandas - - - -Inputs = d3m_dataframe -Outputs = d3m_dataframe - - -class Params(params.Params): - X_train_: Optional[ndarray] - y_train_: Optional[ndarray] - kernel_: Optional[Callable] - alpha_: Optional[ndarray] - log_marginal_likelihood_value_: Optional[float] - _y_train_mean: Optional[ndarray] - _rng: Optional[numpy.random.mtrand.RandomState] - L_: Optional[ndarray] - _K_inv: Optional[object] - input_column_names: Optional[Any] - target_names_: Optional[Sequence[Any]] - training_indices_: Optional[Sequence[int]] - target_column_indices_: Optional[Sequence[int]] - target_columns_metadata_: Optional[List[OrderedDict]] - - - -class Hyperparams(hyperparams.Hyperparams): - alpha = hyperparams.Union( - configuration=OrderedDict({ - 'float': hyperparams.Hyperparameter[float]( - default=1e-10, - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'], - ), - 'ndarray': hyperparams.Hyperparameter[ndarray]( - default=numpy.array([]), - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'], - ) - }), - default='float', - description='Value added to the diagonal of the kernel matrix during fitting. Larger values correspond to increased noise level in the observations and reduce potential numerical issue during fitting. If an array is passed, it must have the same number of entries as the data used for fitting and is used as datapoint-dependent noise level. Note that this is equivalent to adding a WhiteKernel with c=alpha. Allowing to specify the noise level directly as a parameter is mainly for convenience and for consistency with Ridge.', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] - ) - optimizer = hyperparams.Constant( - default='fmin_l_bfgs_b', - description='Can either be one of the internally supported optimizers for optimizing the kernel\'s parameters, specified by a string, or an externally defined optimizer passed as a callable. If a callable is passed, it must have the signature:: def optimizer(obj_func, initial_theta, bounds): # * \'obj_func\' is the objective function to be maximized, which # takes the hyperparameters theta as parameter and an # optional flag eval_gradient, which determines if the # gradient is returned additionally to the function value # * \'initial_theta\': the initial value for theta, which can be # used by local optimizers # * \'bounds\': the bounds on the values of theta .... # Returned are the best found hyperparameters theta and # the corresponding value of the target function. return theta_opt, func_min Per default, the \'fmin_l_bfgs_b\' algorithm from scipy.optimize is used. If None is passed, the kernel\'s parameters are kept fixed. Available internal optimizers are:: \'fmin_l_bfgs_b\'', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] - ) - n_restarts_optimizer = hyperparams.Bounded[int]( - default=0, - lower=0, - upper=None, - description='The number of restarts of the optimizer for finding the kernel\'s parameters which maximize the log-marginal likelihood. The first run of the optimizer is performed from the kernel\'s initial parameters, the remaining ones (if any) from thetas sampled log-uniform randomly from the space of allowed theta-values. If greater than 0, all bounds must be finite. Note that n_restarts_optimizer == 0 implies that one run is performed.', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] - ) - normalize_y = hyperparams.UniformBool( - default=False, - description='Whether the target values y are normalized, i.e., the mean of the observed target values become zero. This parameter should be set to True if the target values\' mean is expected to differ considerable from zero. When enabled, the normalization effectively modifies the GP\'s prior based on the data, which contradicts the likelihood principle; normalization is thus disabled per default. copy_X_train : bool, optional (default: True) If True, a persistent copy of the training data is stored in the object. Otherwise, just a reference to the training data is stored, which might cause predictions to change if the data is modified externally.', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] - ) - - use_inputs_columns = hyperparams.Set( - elements=hyperparams.Hyperparameter[int](-1), - default=(), - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="A set of column indices to force primitive to use as training input. If any specified column cannot be parsed, it is skipped.", - ) - use_outputs_columns = hyperparams.Set( - elements=hyperparams.Hyperparameter[int](-1), - default=(), - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="A set of column indices to force primitive to use as training target. If any specified column cannot be parsed, it is skipped.", - ) - exclude_inputs_columns = hyperparams.Set( - elements=hyperparams.Hyperparameter[int](-1), - default=(), - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="A set of column indices to not use as training inputs. Applicable only if \"use_columns\" is not provided.", - ) - exclude_outputs_columns = hyperparams.Set( - elements=hyperparams.Hyperparameter[int](-1), - default=(), - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="A set of column indices to not use as training target. Applicable only if \"use_columns\" is not provided.", - ) - return_result = hyperparams.Enumeration( - values=['append', 'replace', 'new'], - default='new', - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="Should parsed columns be appended, should they replace original columns, or should only parsed columns be returned? This hyperparam is ignored if use_semantic_types is set to false.", - ) - use_semantic_types = hyperparams.UniformBool( - default=False, - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="Controls whether semantic_types metadata will be used for filtering columns in input dataframe. Setting this to false makes the code ignore return_result and will produce only the output dataframe" - ) - add_index_columns = hyperparams.UniformBool( - default=False, - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="Also include primary index columns if input data has them. Applicable only if \"return_result\" is set to \"new\".", - ) - error_on_no_input = hyperparams.UniformBool( - default=True, - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="Throw an exception if no input column is selected/provided. Defaults to true to behave like sklearn. To prevent pipelines from breaking set this to False.", - ) - - return_semantic_type = hyperparams.Enumeration[str]( - values=['https://metadata.datadrivendiscovery.org/types/Attribute', 'https://metadata.datadrivendiscovery.org/types/ConstructedAttribute', 'https://metadata.datadrivendiscovery.org/types/PredictedTarget'], - default='https://metadata.datadrivendiscovery.org/types/PredictedTarget', - description='Decides what semantic type to attach to generated output', - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'] - ) - -class SKGaussianProcessRegressor(SupervisedLearnerPrimitiveBase[Inputs, Outputs, Params, Hyperparams]): - """ - Primitive wrapping for sklearn GaussianProcessRegressor - `sklearn documentation `_ - - """ - - __author__ = "JPL MARVIN" - metadata = metadata_base.PrimitiveMetadata({ - "algorithm_types": [metadata_base.PrimitiveAlgorithmType.GAUSSIAN_PROCESS, ], - "name": "sklearn.gaussian_process.gpr.GaussianProcessRegressor", - "primitive_family": metadata_base.PrimitiveFamily.REGRESSION, - "python_path": "d3m.primitives.regression.gaussian_process.SKlearn", - "source": {'name': 'JPL', 'contact': 'mailto:shah@jpl.nasa.gov', 'uris': ['https://gitlab.com/datadrivendiscovery/sklearn-wrap/issues', 'https://scikit-learn.org/stable/modules/generated/sklearn.gaussian_process.GaussianProcessRegressor.html']}, - "version": "2019.11.13", - "id": "3894e630-d67b-35d9-ab78-233e264f6324", - "hyperparams_to_tune": ['alpha'], - 'installation': [ - {'type': metadata_base.PrimitiveInstallationType.PIP, - 'package_uri': 'git+https://gitlab.com/datadrivendiscovery/sklearn-wrap.git@{git_commit}#egg=sklearn_wrap'.format( - git_commit=utils.current_git_commit(os.path.dirname(__file__)), - ), - }] - }) - - def __init__(self, *, - hyperparams: Hyperparams, - random_seed: int = 0, - docker_containers: Dict[str, DockerContainer] = None) -> None: - - super().__init__(hyperparams=hyperparams, random_seed=random_seed, docker_containers=docker_containers) - - # False - self._clf = GaussianProcessRegressor( - alpha=self.hyperparams['alpha'], - optimizer=self.hyperparams['optimizer'], - n_restarts_optimizer=self.hyperparams['n_restarts_optimizer'], - normalize_y=self.hyperparams['normalize_y'], - random_state=self.random_seed, - ) - - self._inputs = None - self._outputs = None - self._training_inputs = None - self._training_outputs = None - self._target_names = None - self._training_indices = None - self._target_column_indices = None - self._target_columns_metadata: List[OrderedDict] = None - self._input_column_names = None - self._fitted = False - self._new_training_data = False - - def set_training_data(self, *, inputs: Inputs, outputs: Outputs) -> None: - self._inputs = inputs - self._outputs = outputs - self._fitted = False - self._new_training_data = True - - def fit(self, *, timeout: float = None, iterations: int = None) -> CallResult[None]: - if self._inputs is None or self._outputs is None: - raise ValueError("Missing training data.") - - if not self._new_training_data: - return CallResult(None) - self._new_training_data = False - - self._training_inputs, self._training_indices = self._get_columns_to_fit(self._inputs, self.hyperparams) - self._training_outputs, self._target_names, self._target_column_indices = self._get_targets(self._outputs, self.hyperparams) - self._input_column_names = self._training_inputs.columns - - if len(self._training_indices) > 0 and len(self._target_column_indices) > 0: - self._target_columns_metadata = self._get_target_columns_metadata(self._training_outputs.metadata, self.hyperparams) - sk_training_output = self._training_outputs.values - - shape = sk_training_output.shape - if len(shape) == 2 and shape[1] == 1: - sk_training_output = numpy.ravel(sk_training_output) - - self._clf.fit(self._training_inputs, sk_training_output) - self._fitted = True - else: - if self.hyperparams['error_on_no_input']: - raise RuntimeError("No input columns were selected") - self.logger.warn("No input columns were selected") - - return CallResult(None) - - - - def produce(self, *, inputs: Inputs, timeout: float = None, iterations: int = None) -> CallResult[Outputs]: - sk_inputs, columns_to_use = self._get_columns_to_fit(inputs, self.hyperparams) - output = [] - if len(sk_inputs.columns): - try: - sk_output = self._clf.predict(sk_inputs) - except sklearn.exceptions.NotFittedError as error: - raise PrimitiveNotFittedError("Primitive not fitted.") from error - # For primitives that allow predicting without fitting like GaussianProcessRegressor - if not self._fitted: - raise PrimitiveNotFittedError("Primitive not fitted.") - if sparse.issparse(sk_output): - sk_output = sk_output.toarray() - output = self._wrap_predictions(inputs, sk_output) - output.columns = self._target_names - output = [output] - else: - if self.hyperparams['error_on_no_input']: - raise RuntimeError("No input columns were selected") - self.logger.warn("No input columns were selected") - outputs = base_utils.combine_columns(return_result=self.hyperparams['return_result'], - add_index_columns=self.hyperparams['add_index_columns'], - inputs=inputs, column_indices=self._target_column_indices, - columns_list=output) - - return CallResult(outputs) - - - def get_params(self) -> Params: - if not self._fitted: - return Params( - X_train_=None, - y_train_=None, - kernel_=None, - alpha_=None, - log_marginal_likelihood_value_=None, - _y_train_mean=None, - _rng=None, - L_=None, - _K_inv=None, - input_column_names=self._input_column_names, - training_indices_=self._training_indices, - target_names_=self._target_names, - target_column_indices_=self._target_column_indices, - target_columns_metadata_=self._target_columns_metadata - ) - - return Params( - X_train_=getattr(self._clf, 'X_train_', None), - y_train_=getattr(self._clf, 'y_train_', None), - kernel_=getattr(self._clf, 'kernel_', None), - alpha_=getattr(self._clf, 'alpha_', None), - log_marginal_likelihood_value_=getattr(self._clf, 'log_marginal_likelihood_value_', None), - _y_train_mean=getattr(self._clf, '_y_train_mean', None), - _rng=getattr(self._clf, '_rng', None), - L_=getattr(self._clf, 'L_', None), - _K_inv=getattr(self._clf, '_K_inv', None), - input_column_names=self._input_column_names, - training_indices_=self._training_indices, - target_names_=self._target_names, - target_column_indices_=self._target_column_indices, - target_columns_metadata_=self._target_columns_metadata - ) - - def set_params(self, *, params: Params) -> None: - self._clf.X_train_ = params['X_train_'] - self._clf.y_train_ = params['y_train_'] - self._clf.kernel_ = params['kernel_'] - self._clf.alpha_ = params['alpha_'] - self._clf.log_marginal_likelihood_value_ = params['log_marginal_likelihood_value_'] - self._clf._y_train_mean = params['_y_train_mean'] - self._clf._rng = params['_rng'] - self._clf.L_ = params['L_'] - self._clf._K_inv = params['_K_inv'] - self._input_column_names = params['input_column_names'] - self._training_indices = params['training_indices_'] - self._target_names = params['target_names_'] - self._target_column_indices = params['target_column_indices_'] - self._target_columns_metadata = params['target_columns_metadata_'] - - if params['X_train_'] is not None: - self._fitted = True - if params['y_train_'] is not None: - self._fitted = True - if params['kernel_'] is not None: - self._fitted = True - if params['alpha_'] is not None: - self._fitted = True - if params['log_marginal_likelihood_value_'] is not None: - self._fitted = True - if params['_y_train_mean'] is not None: - self._fitted = True - if params['_rng'] is not None: - self._fitted = True - if params['L_'] is not None: - self._fitted = True - if params['_K_inv'] is not None: - self._fitted = True - - - - - - - - @classmethod - def _get_columns_to_fit(cls, inputs: Inputs, hyperparams: Hyperparams): - if not hyperparams['use_semantic_types']: - return inputs, list(range(len(inputs.columns))) - - inputs_metadata = inputs.metadata - - def can_produce_column(column_index: int) -> bool: - return cls._can_produce_column(inputs_metadata, column_index, hyperparams) - - columns_to_produce, columns_not_to_produce = base_utils.get_columns_to_use(inputs_metadata, - use_columns=hyperparams['use_inputs_columns'], - exclude_columns=hyperparams['exclude_inputs_columns'], - can_use_column=can_produce_column) - return inputs.iloc[:, columns_to_produce], columns_to_produce - # return columns_to_produce - - @classmethod - def _can_produce_column(cls, inputs_metadata: metadata_base.DataMetadata, column_index: int, hyperparams: Hyperparams) -> bool: - column_metadata = inputs_metadata.query((metadata_base.ALL_ELEMENTS, column_index)) - - accepted_structural_types = (int, float, numpy.integer, numpy.float64) - accepted_semantic_types = set() - accepted_semantic_types.add("https://metadata.datadrivendiscovery.org/types/Attribute") - if not issubclass(column_metadata['structural_type'], accepted_structural_types): - return False - - semantic_types = set(column_metadata.get('semantic_types', [])) - - if len(semantic_types) == 0: - cls.logger.warning("No semantic types found in column metadata") - return False - # Making sure all accepted_semantic_types are available in semantic_types - if len(accepted_semantic_types - semantic_types) == 0: - return True - - return False - - @classmethod - def _get_targets(cls, data: d3m_dataframe, hyperparams: Hyperparams): - if not hyperparams['use_semantic_types']: - return data, list(data.columns), list(range(len(data.columns))) - - metadata = data.metadata - - def can_produce_column(column_index: int) -> bool: - accepted_semantic_types = set() - accepted_semantic_types.add("https://metadata.datadrivendiscovery.org/types/TrueTarget") - column_metadata = metadata.query((metadata_base.ALL_ELEMENTS, column_index)) - semantic_types = set(column_metadata.get('semantic_types', [])) - if len(semantic_types) == 0: - cls.logger.warning("No semantic types found in column metadata") - return False - # Making sure all accepted_semantic_types are available in semantic_types - if len(accepted_semantic_types - semantic_types) == 0: - return True - return False - - target_column_indices, target_columns_not_to_produce = base_utils.get_columns_to_use(metadata, - use_columns=hyperparams[ - 'use_outputs_columns'], - exclude_columns= - hyperparams[ - 'exclude_outputs_columns'], - can_use_column=can_produce_column) - targets = [] - if target_column_indices: - targets = data.select_columns(target_column_indices) - target_column_names = [] - for idx in target_column_indices: - target_column_names.append(data.columns[idx]) - return targets, target_column_names, target_column_indices - - @classmethod - def _get_target_columns_metadata(cls, outputs_metadata: metadata_base.DataMetadata, hyperparams) -> List[OrderedDict]: - outputs_length = outputs_metadata.query((metadata_base.ALL_ELEMENTS,))['dimension']['length'] - - target_columns_metadata: List[OrderedDict] = [] - for column_index in range(outputs_length): - column_metadata = OrderedDict(outputs_metadata.query_column(column_index)) - - # Update semantic types and prepare it for predicted targets. - semantic_types = set(column_metadata.get('semantic_types', [])) - semantic_types_to_remove = set(["https://metadata.datadrivendiscovery.org/types/TrueTarget","https://metadata.datadrivendiscovery.org/types/SuggestedTarget",]) - add_semantic_types = set(["https://metadata.datadrivendiscovery.org/types/PredictedTarget",]) - add_semantic_types.add(hyperparams["return_semantic_type"]) - semantic_types = semantic_types - semantic_types_to_remove - semantic_types = semantic_types.union(add_semantic_types) - column_metadata['semantic_types'] = list(semantic_types) - - target_columns_metadata.append(column_metadata) - - return target_columns_metadata - - @classmethod - def _update_predictions_metadata(cls, inputs_metadata: metadata_base.DataMetadata, outputs: Optional[Outputs], - target_columns_metadata: List[OrderedDict]) -> metadata_base.DataMetadata: - outputs_metadata = metadata_base.DataMetadata().generate(value=outputs) - - for column_index, column_metadata in enumerate(target_columns_metadata): - column_metadata.pop("structural_type", None) - outputs_metadata = outputs_metadata.update_column(column_index, column_metadata) - - return outputs_metadata - - def _wrap_predictions(self, inputs: Inputs, predictions: ndarray) -> Outputs: - outputs = d3m_dataframe(predictions, generate_metadata=False) - outputs.metadata = self._update_predictions_metadata(inputs.metadata, outputs, self._target_columns_metadata) - return outputs - - - @classmethod - def _add_target_columns_metadata(cls, outputs_metadata: metadata_base.DataMetadata): - outputs_length = outputs_metadata.query((metadata_base.ALL_ELEMENTS,))['dimension']['length'] - - target_columns_metadata: List[OrderedDict] = [] - for column_index in range(outputs_length): - column_metadata = OrderedDict() - semantic_types = [] - semantic_types.append('https://metadata.datadrivendiscovery.org/types/PredictedTarget') - column_name = outputs_metadata.query((metadata_base.ALL_ELEMENTS, column_index)).get("name") - if column_name is None: - column_name = "output_{}".format(column_index) - column_metadata["semantic_types"] = semantic_types - column_metadata["name"] = str(column_name) - target_columns_metadata.append(column_metadata) - - return target_columns_metadata - - -SKGaussianProcessRegressor.__doc__ = GaussianProcessRegressor.__doc__ \ No newline at end of file diff --git a/common-primitives/sklearn-wrap/sklearn_wrap/SKGaussianRandomProjection.py b/common-primitives/sklearn-wrap/sklearn_wrap/SKGaussianRandomProjection.py deleted file mode 100644 index 867d904..0000000 --- a/common-primitives/sklearn-wrap/sklearn_wrap/SKGaussianRandomProjection.py +++ /dev/null @@ -1,344 +0,0 @@ -from typing import Any, Callable, List, Dict, Union, Optional, Sequence, Tuple -from numpy import ndarray -from collections import OrderedDict -from scipy import sparse -import os -import sklearn -import numpy -import typing - -# Custom import commands if any -from sklearn.random_projection import GaussianRandomProjection - - -from d3m.container.numpy import ndarray as d3m_ndarray -from d3m.container import DataFrame as d3m_dataframe -from d3m.metadata import hyperparams, params, base as metadata_base -from d3m import utils -from d3m.base import utils as base_utils -from d3m.exceptions import PrimitiveNotFittedError -from d3m.primitive_interfaces.base import CallResult, DockerContainer -from d3m.primitive_interfaces.unsupervised_learning import UnsupervisedLearnerPrimitiveBase - - -Inputs = d3m_dataframe -Outputs = d3m_dataframe - - -class Params(params.Params): - n_component_: Optional[int] - components_: Optional[Union[ndarray, sparse.spmatrix]] - input_column_names: Optional[Any] - target_names_: Optional[Sequence[Any]] - training_indices_: Optional[Sequence[int]] - target_column_indices_: Optional[Sequence[int]] - target_columns_metadata_: Optional[List[OrderedDict]] - - - -class Hyperparams(hyperparams.Hyperparams): - n_components = hyperparams.Union( - configuration=OrderedDict({ - 'int': hyperparams.Bounded[int]( - lower=0, - upper=None, - default=100, - description='Number of components to keep.', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'], - ), - 'auto': hyperparams.Constant( - default='auto', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'], - ) - }), - default='auto', - description='Dimensionality of the target projection space. n_components can be automatically adjusted according to the number of samples in the dataset and the bound given by the Johnson-Lindenstrauss lemma. In that case the quality of the embedding is controlled by the ``eps`` parameter. It should be noted that Johnson-Lindenstrauss lemma can yield very conservative estimated of the required number of components as it makes no assumption on the structure of the dataset.', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] - ) - eps = hyperparams.Bounded[float]( - default=0.1, - lower=0, - upper=1, - description='Parameter to control the quality of the embedding according to the Johnson-Lindenstrauss lemma when n_components is set to \'auto\'. Smaller values lead to better embedding and higher number of dimensions (n_components) in the target projection space.', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] - ) - - use_columns = hyperparams.Set( - elements=hyperparams.Hyperparameter[int](-1), - default=(), - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="A set of column indices to force primitive to operate on. If any specified column cannot be parsed, it is skipped.", - ) - exclude_columns = hyperparams.Set( - elements=hyperparams.Hyperparameter[int](-1), - default=(), - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="A set of column indices to not operate on. Applicable only if \"use_columns\" is not provided.", - ) - return_result = hyperparams.Enumeration( - values=['append', 'replace', 'new'], - default='new', - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="Should parsed columns be appended, should they replace original columns, or should only parsed columns be returned? This hyperparam is ignored if use_semantic_types is set to false.", - ) - use_semantic_types = hyperparams.UniformBool( - default=False, - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="Controls whether semantic_types metadata will be used for filtering columns in input dataframe. Setting this to false makes the code ignore return_result and will produce only the output dataframe" - ) - add_index_columns = hyperparams.UniformBool( - default=False, - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="Also include primary index columns if input data has them. Applicable only if \"return_result\" is set to \"new\".", - ) - error_on_no_input = hyperparams.UniformBool( - default=True, - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="Throw an exception if no input column is selected/provided. Defaults to true to behave like sklearn. To prevent pipelines from breaking set this to False.", - ) - - return_semantic_type = hyperparams.Enumeration[str]( - values=['https://metadata.datadrivendiscovery.org/types/Attribute', 'https://metadata.datadrivendiscovery.org/types/ConstructedAttribute'], - default='https://metadata.datadrivendiscovery.org/types/Attribute', - description='Decides what semantic type to attach to generated attributes', - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'] - ) - -class SKGaussianRandomProjection(UnsupervisedLearnerPrimitiveBase[Inputs, Outputs, Params, Hyperparams]): - """ - Primitive wrapping for sklearn GaussianRandomProjection - `sklearn documentation `_ - - """ - - __author__ = "JPL MARVIN" - metadata = metadata_base.PrimitiveMetadata({ - "algorithm_types": [metadata_base.PrimitiveAlgorithmType.RANDOM_PROJECTION, ], - "name": "sklearn.random_projection.GaussianRandomProjection", - "primitive_family": metadata_base.PrimitiveFamily.DATA_TRANSFORMATION, - "python_path": "d3m.primitives.data_transformation.gaussian_random_projection.SKlearn", - "source": {'name': 'JPL', 'contact': 'mailto:shah@jpl.nasa.gov', 'uris': ['https://gitlab.com/datadrivendiscovery/sklearn-wrap/issues', 'https://scikit-learn.org/stable/modules/generated/sklearn.random_projection.GaussianRandomProjection.html']}, - "version": "2019.11.13", - "id": "fc933ab9-baaf-47ca-a373-bdd33081f5fa", - "hyperparams_to_tune": ['n_components'], - 'installation': [ - {'type': metadata_base.PrimitiveInstallationType.PIP, - 'package_uri': 'git+https://gitlab.com/datadrivendiscovery/sklearn-wrap.git@{git_commit}#egg=sklearn_wrap'.format( - git_commit=utils.current_git_commit(os.path.dirname(__file__)), - ), - }] - }) - - def __init__(self, *, - hyperparams: Hyperparams, - random_seed: int = 0, - docker_containers: Dict[str, DockerContainer] = None) -> None: - - super().__init__(hyperparams=hyperparams, random_seed=random_seed, docker_containers=docker_containers) - - # False - self._clf = GaussianRandomProjection( - n_components=self.hyperparams['n_components'], - eps=self.hyperparams['eps'], - random_state=self.random_seed, - ) - - self._inputs = None - self._outputs = None - self._training_inputs = None - self._training_outputs = None - self._target_names = None - self._training_indices = None - self._target_column_indices = None - self._target_columns_metadata: List[OrderedDict] = None - self._input_column_names = None - self._fitted = False - - - def set_training_data(self, *, inputs: Inputs) -> None: - self._inputs = inputs - self._fitted = False - - def fit(self, *, timeout: float = None, iterations: int = None) -> CallResult[None]: - if self._fitted: - return CallResult(None) - - self._training_inputs, self._training_indices = self._get_columns_to_fit(self._inputs, self.hyperparams) - self._input_column_names = self._training_inputs.columns - - if self._training_inputs is None: - return CallResult(None) - - if len(self._training_indices) > 0: - self._clf.fit(self._training_inputs) - self._fitted = True - else: - if self.hyperparams['error_on_no_input']: - raise RuntimeError("No input columns were selected") - self.logger.warn("No input columns were selected") - return CallResult(None) - - def produce(self, *, inputs: Inputs, timeout: float = None, iterations: int = None) -> CallResult[Outputs]: - if not self._fitted: - raise PrimitiveNotFittedError("Primitive not fitted.") - sk_inputs = inputs - if self.hyperparams['use_semantic_types']: - sk_inputs = inputs.iloc[:, self._training_indices] - output_columns = [] - if len(self._training_indices) > 0: - sk_output = self._clf.transform(sk_inputs) - if sparse.issparse(sk_output): - sk_output = sk_output.toarray() - outputs = self._wrap_predictions(inputs, sk_output) - if len(outputs.columns) == len(self._input_column_names): - outputs.columns = self._input_column_names - output_columns = [outputs] - else: - if self.hyperparams['error_on_no_input']: - raise RuntimeError("No input columns were selected") - self.logger.warn("No input columns were selected") - outputs = base_utils.combine_columns(return_result=self.hyperparams['return_result'], - add_index_columns=self.hyperparams['add_index_columns'], - inputs=inputs, column_indices=self._training_indices, - columns_list=output_columns) - return CallResult(outputs) - - - def get_params(self) -> Params: - if not self._fitted: - return Params( - n_component_=None, - components_=None, - input_column_names=self._input_column_names, - training_indices_=self._training_indices, - target_names_=self._target_names, - target_column_indices_=self._target_column_indices, - target_columns_metadata_=self._target_columns_metadata - ) - - return Params( - n_component_=getattr(self._clf, 'n_component_', None), - components_=getattr(self._clf, 'components_', None), - input_column_names=self._input_column_names, - training_indices_=self._training_indices, - target_names_=self._target_names, - target_column_indices_=self._target_column_indices, - target_columns_metadata_=self._target_columns_metadata - ) - - def set_params(self, *, params: Params) -> None: - self._clf.n_component_ = params['n_component_'] - self._clf.components_ = params['components_'] - self._input_column_names = params['input_column_names'] - self._training_indices = params['training_indices_'] - self._target_names = params['target_names_'] - self._target_column_indices = params['target_column_indices_'] - self._target_columns_metadata = params['target_columns_metadata_'] - - if params['n_component_'] is not None: - self._fitted = True - if params['components_'] is not None: - self._fitted = True - - - - - - @classmethod - def _get_columns_to_fit(cls, inputs: Inputs, hyperparams: Hyperparams): - if not hyperparams['use_semantic_types']: - return inputs, list(range(len(inputs.columns))) - - inputs_metadata = inputs.metadata - - def can_produce_column(column_index: int) -> bool: - return cls._can_produce_column(inputs_metadata, column_index, hyperparams) - - columns_to_produce, columns_not_to_produce = base_utils.get_columns_to_use(inputs_metadata, - use_columns=hyperparams['use_columns'], - exclude_columns=hyperparams['exclude_columns'], - can_use_column=can_produce_column) - return inputs.iloc[:, columns_to_produce], columns_to_produce - # return columns_to_produce - - @classmethod - def _can_produce_column(cls, inputs_metadata: metadata_base.DataMetadata, column_index: int, hyperparams: Hyperparams) -> bool: - column_metadata = inputs_metadata.query((metadata_base.ALL_ELEMENTS, column_index)) - - accepted_structural_types = (int, float, numpy.integer, numpy.float64) - accepted_semantic_types = set() - accepted_semantic_types.add("https://metadata.datadrivendiscovery.org/types/Attribute") - if not issubclass(column_metadata['structural_type'], accepted_structural_types): - return False - - semantic_types = set(column_metadata.get('semantic_types', [])) - - if len(semantic_types) == 0: - cls.logger.warning("No semantic types found in column metadata") - return False - - # Making sure all accepted_semantic_types are available in semantic_types - if len(accepted_semantic_types - semantic_types) == 0: - return True - - return False - - - @classmethod - def _get_target_columns_metadata(cls, outputs_metadata: metadata_base.DataMetadata, hyperparams) -> List[OrderedDict]: - outputs_length = outputs_metadata.query((metadata_base.ALL_ELEMENTS,))['dimension']['length'] - - target_columns_metadata: List[OrderedDict] = [] - for column_index in range(outputs_length): - column_metadata = OrderedDict(outputs_metadata.query_column(column_index)) - - # Update semantic types and prepare it for predicted targets. - semantic_types = set(column_metadata.get('semantic_types', [])) - semantic_types_to_remove = set([]) - add_semantic_types = [] - add_semantic_types.add(hyperparams["return_semantic_type"]) - semantic_types = semantic_types - semantic_types_to_remove - semantic_types = semantic_types.union(add_semantic_types) - column_metadata['semantic_types'] = list(semantic_types) - - target_columns_metadata.append(column_metadata) - - return target_columns_metadata - - @classmethod - def _update_predictions_metadata(cls, inputs_metadata: metadata_base.DataMetadata, outputs: Optional[Outputs], - target_columns_metadata: List[OrderedDict]) -> metadata_base.DataMetadata: - outputs_metadata = metadata_base.DataMetadata().generate(value=outputs) - - for column_index, column_metadata in enumerate(target_columns_metadata): - column_metadata.pop("structural_type", None) - outputs_metadata = outputs_metadata.update_column(column_index, column_metadata) - - return outputs_metadata - - def _wrap_predictions(self, inputs: Inputs, predictions: ndarray) -> Outputs: - outputs = d3m_dataframe(predictions, generate_metadata=True) - target_columns_metadata = self._add_target_columns_metadata(outputs.metadata, self.hyperparams) - outputs.metadata = self._update_predictions_metadata(inputs.metadata, outputs, target_columns_metadata) - return outputs - - - @classmethod - def _add_target_columns_metadata(cls, outputs_metadata: metadata_base.DataMetadata, hyperparams): - - outputs_length = outputs_metadata.query((metadata_base.ALL_ELEMENTS,))['dimension']['length'] - target_columns_metadata: List[OrderedDict] = [] - for column_index in range(outputs_length): - column_name = "output_{}".format(column_index) - column_metadata = OrderedDict() - semantic_types = set() - semantic_types.add(hyperparams["return_semantic_type"]) - column_metadata['semantic_types'] = list(semantic_types) - - column_metadata["name"] = str(column_name) - target_columns_metadata.append(column_metadata) - - return target_columns_metadata - - -SKGaussianRandomProjection.__doc__ = GaussianRandomProjection.__doc__ \ No newline at end of file diff --git a/common-primitives/sklearn-wrap/sklearn_wrap/SKGenericUnivariateSelect.py b/common-primitives/sklearn-wrap/sklearn_wrap/SKGenericUnivariateSelect.py deleted file mode 100644 index b0c45ad..0000000 --- a/common-primitives/sklearn-wrap/sklearn_wrap/SKGenericUnivariateSelect.py +++ /dev/null @@ -1,443 +0,0 @@ -from typing import Any, Callable, List, Dict, Union, Optional, Sequence, Tuple -from numpy import ndarray -from collections import OrderedDict -from scipy import sparse -import os -import sklearn -import numpy -import typing - -# Custom import commands if any -from sklearn.feature_selection.univariate_selection import GenericUnivariateSelect -from sklearn.feature_selection import f_classif, f_regression, chi2 - - -from d3m.container.numpy import ndarray as d3m_ndarray -from d3m.container import DataFrame as d3m_dataframe -from d3m.metadata import hyperparams, params, base as metadata_base -from d3m import utils -from d3m.base import utils as base_utils -from d3m.exceptions import PrimitiveNotFittedError -from d3m.primitive_interfaces.base import CallResult, DockerContainer - -from d3m.primitive_interfaces.supervised_learning import SupervisedLearnerPrimitiveBase -from d3m.primitive_interfaces.base import ProbabilisticCompositionalityMixin, ContinueFitMixin -from d3m import exceptions -import pandas - - - -Inputs = d3m_dataframe -Outputs = d3m_dataframe - - -class Params(params.Params): - scores_: Optional[ndarray] - pvalues_: Optional[ndarray] - input_column_names: Optional[Any] - target_names_: Optional[Sequence[Any]] - training_indices_: Optional[Sequence[int]] - target_column_indices_: Optional[Sequence[int]] - target_columns_metadata_: Optional[List[OrderedDict]] - - - -class Hyperparams(hyperparams.Hyperparams): - score_func = hyperparams.Enumeration[str]( - default='f_classif', - values=['f_classif', 'f_regression', 'chi2'], - description='Function taking two arrays X and y, and returning a pair of arrays (scores, pvalues). For modes \'percentile\' or \'kbest\' it can return a single array scores.', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] - ) - mode = hyperparams.Enumeration[str]( - default='percentile', - values=['percentile', 'k_best', 'fpr', 'fdr', 'fwe'], - description='Feature selection mode.', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] - ) - param = hyperparams.Union( - configuration=OrderedDict({ - 'float': hyperparams.Hyperparameter[float]( - default=1e-05, - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'], - ), - 'int': hyperparams.Hyperparameter[int]( - default=0, - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'], - ) - }), - default='float', - description='Parameter of the corresponding mode.', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] - ) - - use_inputs_columns = hyperparams.Set( - elements=hyperparams.Hyperparameter[int](-1), - default=(), - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="A set of column indices to force primitive to use as training input. If any specified column cannot be parsed, it is skipped.", - ) - use_outputs_columns = hyperparams.Set( - elements=hyperparams.Hyperparameter[int](-1), - default=(), - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="A set of column indices to force primitive to use as training target. If any specified column cannot be parsed, it is skipped.", - ) - exclude_inputs_columns = hyperparams.Set( - elements=hyperparams.Hyperparameter[int](-1), - default=(), - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="A set of column indices to not use as training inputs. Applicable only if \"use_columns\" is not provided.", - ) - exclude_outputs_columns = hyperparams.Set( - elements=hyperparams.Hyperparameter[int](-1), - default=(), - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="A set of column indices to not use as training target. Applicable only if \"use_columns\" is not provided.", - ) - return_result = hyperparams.Enumeration( - values=['update_semantic_types', 'replace', 'new'], - default='new', - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="Should parsed columns be appended, should they replace original columns, or should only parsed columns be returned? This hyperparam is ignored if use_semantic_types is set to false.", -) - use_semantic_types = hyperparams.UniformBool( - default=False, - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="Controls whether semantic_types metadata will be used for filtering columns in input dataframe. Setting this to false makes the code ignore return_result and will produce only the output dataframe" - ) - add_index_columns = hyperparams.UniformBool( - default=False, - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="Also include primary index columns if input data has them. Applicable only if \"return_result\" is set to \"new\".", - ) - error_on_no_input = hyperparams.UniformBool( - default=True, - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="Throw an exception if no input column is selected/provided. Defaults to true to behave like sklearn. To prevent pipelines from breaking set this to False.", - ) - - return_semantic_type = hyperparams.Enumeration[str]( - values=['https://metadata.datadrivendiscovery.org/types/Attribute', 'https://metadata.datadrivendiscovery.org/types/ConstructedAttribute', 'https://metadata.datadrivendiscovery.org/types/PredictedTarget'], - default='https://metadata.datadrivendiscovery.org/types/PredictedTarget', - description='Decides what semantic type to attach to generated output', - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'] - ) - -class SKGenericUnivariateSelect(SupervisedLearnerPrimitiveBase[Inputs, Outputs, Params, Hyperparams]): - """ - Primitive wrapping for sklearn GenericUnivariateSelect - `sklearn documentation `_ - - """ - - __author__ = "JPL MARVIN" - metadata = metadata_base.PrimitiveMetadata({ - "algorithm_types": [metadata_base.PrimitiveAlgorithmType.STATISTICAL_MOMENT_ANALYSIS, ], - "name": "sklearn.feature_selection.univariate_selection.GenericUnivariateSelect", - "primitive_family": metadata_base.PrimitiveFamily.FEATURE_SELECTION, - "python_path": "d3m.primitives.feature_selection.generic_univariate_select.SKlearn", - "source": {'name': 'JPL', 'contact': 'mailto:shah@jpl.nasa.gov', 'uris': ['https://gitlab.com/datadrivendiscovery/sklearn-wrap/issues', 'https://scikit-learn.org/stable/modules/generated/sklearn.feature_selection.GenericUnivariateSelect.html']}, - "version": "2019.11.13", - "id": "1055a114-5c94-33b0-9100-675fd0200e72", - "hyperparams_to_tune": ['mode'], - 'installation': [ - {'type': metadata_base.PrimitiveInstallationType.PIP, - 'package_uri': 'git+https://gitlab.com/datadrivendiscovery/sklearn-wrap.git@{git_commit}#egg=sklearn_wrap'.format( - git_commit=utils.current_git_commit(os.path.dirname(__file__)), - ), - }] - }) - - def __init__(self, *, - hyperparams: Hyperparams, - random_seed: int = 0, - docker_containers: Dict[str, DockerContainer] = None) -> None: - - super().__init__(hyperparams=hyperparams, random_seed=random_seed, docker_containers=docker_containers) - - # False - self._clf = GenericUnivariateSelect( - score_func=eval(self.hyperparams['score_func']), - mode=self.hyperparams['mode'], - param=self.hyperparams['param'], - ) - - self._inputs = None - self._outputs = None - self._training_inputs = None - self._training_outputs = None - self._target_names = None - self._training_indices = None - self._target_column_indices = None - self._target_columns_metadata: List[OrderedDict] = None - self._input_column_names = None - self._fitted = False - self._new_training_data = False - - def set_training_data(self, *, inputs: Inputs, outputs: Outputs) -> None: - self._inputs = inputs - self._outputs = outputs - self._fitted = False - self._new_training_data = True - - def fit(self, *, timeout: float = None, iterations: int = None) -> CallResult[None]: - if self._fitted: - return CallResult(None) - - self._training_inputs, self._training_indices = self._get_columns_to_fit(self._inputs, self.hyperparams) - self._training_outputs, self._target_names, self._target_column_indices = self._get_targets(self._outputs, self.hyperparams) - self._input_column_names = self._training_inputs.columns - - if self._training_inputs is None or self._training_outputs is None: - raise ValueError("Missing training data.") - - if len(self._training_indices) > 0 and len(self._target_column_indices) > 0: - sk_training_output = self._training_outputs.values - - shape = sk_training_output.shape - if len(shape) == 2 and shape[1] == 1: - sk_training_output = numpy.ravel(sk_training_output) - - self._clf.fit(self._training_inputs, sk_training_output) - self._fitted = True - else: - if self.hyperparams['error_on_no_input']: - raise RuntimeError("No input columns were selected") - self.logger.warn("No input columns were selected") - - return CallResult(None) - - def produce(self, *, inputs: Inputs, timeout: float = None, iterations: int = None) -> CallResult[Outputs]: - sk_inputs, columns_to_use = self._get_columns_to_fit(inputs, self.hyperparams) - output = [] - if len(sk_inputs.columns): - try: - sk_output = self._clf.transform(sk_inputs) - except sklearn.exceptions.NotFittedError as error: - raise PrimitiveNotFittedError("Primitive not fitted.") from error - if sparse.issparse(sk_output): - sk_output = sk_output.toarray() - target_columns_metadata = self._copy_columns_metadata(inputs.iloc[:, self._training_indices].metadata, - self.produce_support().value) - output = self._wrap_predictions(inputs, sk_output, target_columns_metadata) - output.columns = [inputs.columns[idx] for idx in range(len(inputs.columns)) if idx in self.produce_support().value] - output = [output] - else: - if self.hyperparams['error_on_no_input']: - raise RuntimeError("No input columns were selected") - self.logger.warn("No input columns were selected") - - if self.hyperparams['return_result'] == 'update_semantic_types': - temp_inputs = inputs.copy() - columns_not_selected = sorted(set(range(len(temp_inputs.columns))) - set(self.produce_support().value)) - - for idx in columns_not_selected: - temp_inputs.metadata = temp_inputs.metadata.remove_semantic_type((metadata_base.ALL_ELEMENTS, idx), - 'https://metadata.datadrivendiscovery.org/types/Attribute') - - temp_inputs = temp_inputs.select_columns(self._training_indices) - outputs = base_utils.combine_columns(return_result='replace', - add_index_columns=self.hyperparams['add_index_columns'], - inputs=inputs, column_indices=self._training_indices, - columns_list=[temp_inputs]) - return CallResult(outputs) - - outputs = base_utils.combine_columns(return_result=self.hyperparams['return_result'], - add_index_columns=self.hyperparams['add_index_columns'], - inputs=inputs, column_indices=self._training_indices, - columns_list=output) - - return CallResult(outputs) - - def produce_support(self, *, timeout: float = None, iterations: int = None) -> CallResult[Any]: - all_indices = self._training_indices - selected_indices = self._clf.get_support(indices=True).tolist() - indices = [all_indices[index] for index in selected_indices] - return CallResult(indices) - - - def get_params(self) -> Params: - if not self._fitted: - return Params( - scores_=None, - pvalues_=None, - input_column_names=self._input_column_names, - training_indices_=self._training_indices, - target_names_=self._target_names, - target_column_indices_=self._target_column_indices, - target_columns_metadata_=self._target_columns_metadata - ) - - return Params( - scores_=getattr(self._clf, 'scores_', None), - pvalues_=getattr(self._clf, 'pvalues_', None), - input_column_names=self._input_column_names, - training_indices_=self._training_indices, - target_names_=self._target_names, - target_column_indices_=self._target_column_indices, - target_columns_metadata_=self._target_columns_metadata - ) - - def set_params(self, *, params: Params) -> None: - self._clf.scores_ = params['scores_'] - self._clf.pvalues_ = params['pvalues_'] - self._input_column_names = params['input_column_names'] - self._training_indices = params['training_indices_'] - self._target_names = params['target_names_'] - self._target_column_indices = params['target_column_indices_'] - self._target_columns_metadata = params['target_columns_metadata_'] - - if params['scores_'] is not None: - self._fitted = True - if params['pvalues_'] is not None: - self._fitted = True - - - - - - - - @classmethod - def _get_columns_to_fit(cls, inputs: Inputs, hyperparams: Hyperparams): - if not hyperparams['use_semantic_types']: - return inputs, list(range(len(inputs.columns))) - - inputs_metadata = inputs.metadata - - def can_produce_column(column_index: int) -> bool: - return cls._can_produce_column(inputs_metadata, column_index, hyperparams) - - columns_to_produce, columns_not_to_produce = base_utils.get_columns_to_use(inputs_metadata, - use_columns=hyperparams['use_inputs_columns'], - exclude_columns=hyperparams['exclude_inputs_columns'], - can_use_column=can_produce_column) - return inputs.iloc[:, columns_to_produce], columns_to_produce - # return columns_to_produce - - @classmethod - def _can_produce_column(cls, inputs_metadata: metadata_base.DataMetadata, column_index: int, hyperparams: Hyperparams) -> bool: - column_metadata = inputs_metadata.query((metadata_base.ALL_ELEMENTS, column_index)) - - accepted_structural_types = (int, float, numpy.integer, numpy.float64) - accepted_semantic_types = set() - accepted_semantic_types.add("https://metadata.datadrivendiscovery.org/types/Attribute") - if not issubclass(column_metadata['structural_type'], accepted_structural_types): - return False - - semantic_types = set(column_metadata.get('semantic_types', [])) - - if len(semantic_types) == 0: - cls.logger.warning("No semantic types found in column metadata") - return False - # Making sure all accepted_semantic_types are available in semantic_types - if len(accepted_semantic_types - semantic_types) == 0: - return True - - return False - - @classmethod - def _get_targets(cls, data: d3m_dataframe, hyperparams: Hyperparams): - if not hyperparams['use_semantic_types']: - return data, list(data.columns), list(range(len(data.columns))) - - metadata = data.metadata - - def can_produce_column(column_index: int) -> bool: - accepted_semantic_types = set() - accepted_semantic_types.add("https://metadata.datadrivendiscovery.org/types/TrueTarget") - column_metadata = metadata.query((metadata_base.ALL_ELEMENTS, column_index)) - semantic_types = set(column_metadata.get('semantic_types', [])) - if len(semantic_types) == 0: - cls.logger.warning("No semantic types found in column metadata") - return False - # Making sure all accepted_semantic_types are available in semantic_types - if len(accepted_semantic_types - semantic_types) == 0: - return True - return False - - target_column_indices, target_columns_not_to_produce = base_utils.get_columns_to_use(metadata, - use_columns=hyperparams[ - 'use_outputs_columns'], - exclude_columns= - hyperparams[ - 'exclude_outputs_columns'], - can_use_column=can_produce_column) - targets = [] - if target_column_indices: - targets = data.select_columns(target_column_indices) - target_column_names = [] - for idx in target_column_indices: - target_column_names.append(data.columns[idx]) - return targets, target_column_names, target_column_indices - - @classmethod - def _get_target_columns_metadata(cls, outputs_metadata: metadata_base.DataMetadata, hyperparams) -> List[OrderedDict]: - outputs_length = outputs_metadata.query((metadata_base.ALL_ELEMENTS,))['dimension']['length'] - - target_columns_metadata: List[OrderedDict] = [] - for column_index in range(outputs_length): - column_metadata = OrderedDict(outputs_metadata.query_column(column_index)) - - # Update semantic types and prepare it for predicted targets. - semantic_types = set(column_metadata.get('semantic_types', [])) - semantic_types_to_remove = set([]) - add_semantic_types = [] - add_semantic_types.add(hyperparams["return_semantic_type"]) - semantic_types = semantic_types - semantic_types_to_remove - semantic_types = semantic_types.union(add_semantic_types) - column_metadata['semantic_types'] = list(semantic_types) - - target_columns_metadata.append(column_metadata) - - return target_columns_metadata - - @classmethod - def _update_predictions_metadata(cls, inputs_metadata: metadata_base.DataMetadata, outputs: Optional[Outputs], - target_columns_metadata: List[OrderedDict]) -> metadata_base.DataMetadata: - outputs_metadata = metadata_base.DataMetadata().generate(value=outputs) - - for column_index, column_metadata in enumerate(target_columns_metadata): - if len(target_columns_metadata) == 1: - name = column_metadata.get("name") - for idx in range(len(outputs.columns)): - outputs_metadata = outputs_metadata.update_column(idx, column_metadata) - if len(outputs.columns) > 1: - # Updating column names. - outputs_metadata = outputs_metadata.update((metadata_base.ALL_ELEMENTS, idx), {'name': "{}_{}".format(name, idx)}) - else: - outputs_metadata = outputs_metadata.update_column(column_index, column_metadata) - - return outputs_metadata - - - def _wrap_predictions(self, inputs: Inputs, predictions: ndarray, target_columns_metadata) -> Outputs: - outputs = d3m_dataframe(predictions, generate_metadata=False) - outputs.metadata = self._update_predictions_metadata(inputs.metadata, outputs, target_columns_metadata) - return outputs - - - - @classmethod - def _copy_columns_metadata(cls, inputs_metadata: metadata_base.DataMetadata, column_indices) -> List[OrderedDict]: - outputs_length = inputs_metadata.query((metadata_base.ALL_ELEMENTS,))['dimension']['length'] - - target_columns_metadata: List[OrderedDict] = [] - for column_index in column_indices: - column_name = inputs_metadata.query((metadata_base.ALL_ELEMENTS, column_index)).get("name") - column_metadata = OrderedDict(inputs_metadata.query_column(column_index)) - semantic_types = set(column_metadata.get('semantic_types', [])) - semantic_types_to_remove = set([]) - add_semantic_types = [] - semantic_types = semantic_types - semantic_types_to_remove - semantic_types = semantic_types.union(add_semantic_types) - column_metadata['semantic_types'] = list(semantic_types) - - column_metadata["name"] = str(column_name) - target_columns_metadata.append(column_metadata) - - return target_columns_metadata - - -SKGenericUnivariateSelect.__doc__ = GenericUnivariateSelect.__doc__ \ No newline at end of file diff --git a/common-primitives/sklearn-wrap/sklearn_wrap/SKGradientBoostingClassifier.py b/common-primitives/sklearn-wrap/sklearn_wrap/SKGradientBoostingClassifier.py deleted file mode 100644 index 0c92268..0000000 --- a/common-primitives/sklearn-wrap/sklearn_wrap/SKGradientBoostingClassifier.py +++ /dev/null @@ -1,707 +0,0 @@ -from typing import Any, Callable, List, Dict, Union, Optional, Sequence, Tuple -from numpy import ndarray -from collections import OrderedDict -from scipy import sparse -import os -import sklearn -import numpy -import typing - -# Custom import commands if any -from sklearn.ensemble.gradient_boosting import GradientBoostingClassifier -import sys - - -from d3m.container.numpy import ndarray as d3m_ndarray -from d3m.container import DataFrame as d3m_dataframe -from d3m.metadata import hyperparams, params, base as metadata_base -from d3m import utils -from d3m.base import utils as base_utils -from d3m.exceptions import PrimitiveNotFittedError -from d3m.primitive_interfaces.base import CallResult, DockerContainer - -from d3m.primitive_interfaces.supervised_learning import SupervisedLearnerPrimitiveBase -from d3m.primitive_interfaces.base import ProbabilisticCompositionalityMixin, ContinueFitMixin -from d3m import exceptions -import pandas - - - -Inputs = d3m_dataframe -Outputs = d3m_dataframe - - -class Params(params.Params): - oob_improvement_: Optional[ndarray] - train_score_: Optional[ndarray] - loss_: Optional[object] - init_: Optional[object] - estimators_: Optional[ndarray] - n_features_: Optional[int] - classes_: Optional[ndarray] - max_features_: Optional[int] - n_classes_: Optional[Union[int, List[int]]] - alpha: Optional[float] - _rng: Optional[object] - n_estimators_: Optional[int] - input_column_names: Optional[Any] - target_names_: Optional[Sequence[Any]] - training_indices_: Optional[Sequence[int]] - target_column_indices_: Optional[Sequence[int]] - target_columns_metadata_: Optional[List[OrderedDict]] - - - -class Hyperparams(hyperparams.Hyperparams): - loss = hyperparams.Enumeration[str]( - default='deviance', - values=['deviance', 'exponential'], - description='loss function to be optimized. \'deviance\' refers to deviance (= logistic regression) for classification with probabilistic outputs. For loss \'exponential\' gradient boosting recovers the AdaBoost algorithm.', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] - ) - learning_rate = hyperparams.Bounded[float]( - default=0.1, - lower=0, - upper=None, - description='learning rate shrinks the contribution of each tree by `learning_rate`. There is a trade-off between learning_rate and n_estimators.', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] - ) - n_estimators = hyperparams.Bounded[int]( - default=100, - lower=1, - upper=None, - description='The number of boosting stages to perform. Gradient boosting is fairly robust to over-fitting so a large number usually results in better performance.', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] - ) - max_depth = hyperparams.Bounded[int]( - default=3, - lower=0, - upper=None, - description='maximum depth of the individual regression estimators. The maximum depth limits the number of nodes in the tree. Tune this parameter for best performance; the best value depends on the interaction of the input variables.', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] - ) - criterion = hyperparams.Enumeration[str]( - default='friedman_mse', - values=['friedman_mse', 'mse', 'mae'], - description='The function to measure the quality of a split. Supported criteria are "friedman_mse" for the mean squared error with improvement score by Friedman, "mse" for mean squared error, and "mae" for the mean absolute error. The default value of "friedman_mse" is generally the best as it can provide a better approximation in some cases. .. versionadded:: 0.18', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] - ) - min_samples_split = hyperparams.Union( - configuration=OrderedDict({ - 'absolute': hyperparams.Bounded[int]( - default=2, - lower=1, - upper=None, - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'], - ), - 'percent': hyperparams.Bounded[float]( - default=0.25, - lower=0, - upper=1, - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'], - ) - }), - default='absolute', - description='The minimum number of samples required to split an internal node: - If int, then consider `min_samples_split` as the minimum number. - If float, then `min_samples_split` is a percentage and `ceil(min_samples_split * n_samples)` are the minimum number of samples for each split. .. versionchanged:: 0.18 Added float values for percentages.', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] - ) - min_samples_leaf = hyperparams.Union( - configuration=OrderedDict({ - 'absolute': hyperparams.Bounded[int]( - default=1, - lower=1, - upper=None, - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'], - ), - 'percent': hyperparams.Bounded[float]( - default=0.25, - lower=0, - upper=0.5, - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'], - ) - }), - default='absolute', - description='The minimum number of samples required to be at a leaf node: - If int, then consider `min_samples_leaf` as the minimum number. - If float, then `min_samples_leaf` is a percentage and `ceil(min_samples_leaf * n_samples)` are the minimum number of samples for each node. .. versionchanged:: 0.18 Added float values for percentages.', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] - ) - min_weight_fraction_leaf = hyperparams.Bounded[float]( - default=0, - lower=0, - upper=0.5, - description='The minimum weighted fraction of the sum total of weights (of all the input samples) required to be at a leaf node. Samples have equal weight when sample_weight is not provided.', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] - ) - subsample = hyperparams.Bounded[float]( - default=1.0, - lower=0, - upper=None, - description='The fraction of samples to be used for fitting the individual base learners. If smaller than 1.0 this results in Stochastic Gradient Boosting. `subsample` interacts with the parameter `n_estimators`. Choosing `subsample < 1.0` leads to a reduction of variance and an increase in bias.', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] - ) - max_features = hyperparams.Union( - configuration=OrderedDict({ - 'specified_int': hyperparams.Bounded[int]( - lower=0, - upper=None, - default=0, - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'], - ), - 'calculated': hyperparams.Enumeration[str]( - values=['auto', 'sqrt', 'log2'], - default='auto', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'], - ), - 'none': hyperparams.Constant( - default=None, - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'], - ), - 'percent': hyperparams.Bounded[float]( - default=0.25, - lower=0, - upper=1, - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'], - ) - }), - default='none', - description='The number of features to consider when looking for the best split: - If int, then consider `max_features` features at each split. - If float, then `max_features` is a percentage and `int(max_features * n_features)` features are considered at each split. - If "auto", then `max_features=sqrt(n_features)`. - If "sqrt", then `max_features=sqrt(n_features)`. - If "log2", then `max_features=log2(n_features)`. - If None, then `max_features=n_features`. Choosing `max_features < n_features` leads to a reduction of variance and an increase in bias. Note: the search for a split does not stop until at least one valid partition of the node samples is found, even if it requires to effectively inspect more than ``max_features`` features.', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] - ) - max_leaf_nodes = hyperparams.Union( - configuration=OrderedDict({ - 'int': hyperparams.Bounded[int]( - default=10, - lower=0, - upper=None, - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'], - ), - 'none': hyperparams.Constant( - default=None, - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'], - ) - }), - default='none', - description='Grow trees with ``max_leaf_nodes`` in best-first fashion. Best nodes are defined as relative reduction in impurity. If None then unlimited number of leaf nodes.', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] - ) - min_impurity_decrease = hyperparams.Bounded[float]( - default=0.0, - lower=0.0, - upper=None, - description='A node will be split if this split induces a decrease of the impurity greater than or equal to this value. The weighted impurity decrease equation is the following:: N_t / N * (impurity - N_t_R / N_t * right_impurity - N_t_L / N_t * left_impurity) where ``N`` is the total number of samples, ``N_t`` is the number of samples at the current node, ``N_t_L`` is the number of samples in the left child, and ``N_t_R`` is the number of samples in the right child. ``N``, ``N_t``, ``N_t_R`` and ``N_t_L`` all refer to the weighted sum, if ``sample_weight`` is passed. .. versionadded:: 0.19', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] - ) - warm_start = hyperparams.UniformBool( - default=False, - description='When set to ``True``, reuse the solution of the previous call to fit and add more estimators to the ensemble, otherwise, just erase the previous solution.', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] - ) - presort = hyperparams.Union( - configuration=OrderedDict({ - 'bool': hyperparams.UniformBool( - default=False, - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'], - ), - 'auto': hyperparams.Constant( - default='auto', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'], - ) - }), - default='auto', - description='Whether to presort the data to speed up the finding of best splits in fitting. Auto mode by default will use presorting on dense data and default to normal sorting on sparse data. Setting presort to true on sparse data will raise an error. .. versionadded:: 0.17 *presort* parameter.', - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'] - ) - validation_fraction = hyperparams.Bounded[float]( - default=0.1, - lower=0, - upper=1, - description='The proportion of training data to set aside as validation set for early stopping. Must be between 0 and 1. Only used if ``n_iter_no_change`` is set to an integer.', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] - ) - n_iter_no_change = hyperparams.Union( - configuration=OrderedDict({ - 'int': hyperparams.Bounded[int]( - default=5, - lower=0, - upper=None, - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'], - ), - 'none': hyperparams.Constant( - default=None, - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'], - ) - }), - default='none', - description='``n_iter_no_change`` is used to decide if early stopping will be used to terminate training when validation score is not improving. By default it is set to None to disable early stopping. If set to a number, it will set aside ``validation_fraction`` size of the training data as validation and terminate training when validation score is not improving in all of the previous ``n_iter_no_change`` numbers of iterations.', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] - ) - tol = hyperparams.Bounded[float]( - default=0.0001, - lower=0, - upper=None, - description='Tolerance for the early stopping. When the loss is not improving by at least tol for ``n_iter_no_change`` iterations (if set to a number), the training stops.', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] - ) - - use_inputs_columns = hyperparams.Set( - elements=hyperparams.Hyperparameter[int](-1), - default=(), - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="A set of column indices to force primitive to use as training input. If any specified column cannot be parsed, it is skipped.", - ) - use_outputs_columns = hyperparams.Set( - elements=hyperparams.Hyperparameter[int](-1), - default=(), - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="A set of column indices to force primitive to use as training target. If any specified column cannot be parsed, it is skipped.", - ) - exclude_inputs_columns = hyperparams.Set( - elements=hyperparams.Hyperparameter[int](-1), - default=(), - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="A set of column indices to not use as training inputs. Applicable only if \"use_columns\" is not provided.", - ) - exclude_outputs_columns = hyperparams.Set( - elements=hyperparams.Hyperparameter[int](-1), - default=(), - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="A set of column indices to not use as training target. Applicable only if \"use_columns\" is not provided.", - ) - return_result = hyperparams.Enumeration( - values=['append', 'replace', 'new'], - default='new', - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="Should parsed columns be appended, should they replace original columns, or should only parsed columns be returned? This hyperparam is ignored if use_semantic_types is set to false.", - ) - use_semantic_types = hyperparams.UniformBool( - default=False, - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="Controls whether semantic_types metadata will be used for filtering columns in input dataframe. Setting this to false makes the code ignore return_result and will produce only the output dataframe" - ) - add_index_columns = hyperparams.UniformBool( - default=False, - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="Also include primary index columns if input data has them. Applicable only if \"return_result\" is set to \"new\".", - ) - error_on_no_input = hyperparams.UniformBool( - default=True, - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="Throw an exception if no input column is selected/provided. Defaults to true to behave like sklearn. To prevent pipelines from breaking set this to False.", - ) - - return_semantic_type = hyperparams.Enumeration[str]( - values=['https://metadata.datadrivendiscovery.org/types/Attribute', 'https://metadata.datadrivendiscovery.org/types/ConstructedAttribute', 'https://metadata.datadrivendiscovery.org/types/PredictedTarget'], - default='https://metadata.datadrivendiscovery.org/types/PredictedTarget', - description='Decides what semantic type to attach to generated output', - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'] - ) - -class SKGradientBoostingClassifier(SupervisedLearnerPrimitiveBase[Inputs, Outputs, Params, Hyperparams], - ProbabilisticCompositionalityMixin[Inputs, Outputs, Params, Hyperparams]): - """ - Primitive wrapping for sklearn GradientBoostingClassifier - `sklearn documentation `_ - - """ - - __author__ = "JPL MARVIN" - metadata = metadata_base.PrimitiveMetadata({ - "algorithm_types": [metadata_base.PrimitiveAlgorithmType.GRADIENT_BOOSTING, ], - "name": "sklearn.ensemble.gradient_boosting.GradientBoostingClassifier", - "primitive_family": metadata_base.PrimitiveFamily.CLASSIFICATION, - "python_path": "d3m.primitives.classification.gradient_boosting.SKlearn", - "source": {'name': 'JPL', 'contact': 'mailto:shah@jpl.nasa.gov', 'uris': ['https://gitlab.com/datadrivendiscovery/sklearn-wrap/issues', 'https://scikit-learn.org/stable/modules/generated/sklearn.ensemble.GradientBoostingClassifier.html']}, - "version": "2019.11.13", - "id": "01d2c086-91bf-3ca5-b023-5139cf239c77", - "hyperparams_to_tune": ['n_estimators', 'learning_rate', 'max_depth', 'min_samples_leaf', 'min_samples_split', 'max_features'], - 'installation': [ - {'type': metadata_base.PrimitiveInstallationType.PIP, - 'package_uri': 'git+https://gitlab.com/datadrivendiscovery/sklearn-wrap.git@{git_commit}#egg=sklearn_wrap'.format( - git_commit=utils.current_git_commit(os.path.dirname(__file__)), - ), - }] - }) - - def __init__(self, *, - hyperparams: Hyperparams, - random_seed: int = 0, - docker_containers: Dict[str, DockerContainer] = None, - _verbose: int = 0) -> None: - - super().__init__(hyperparams=hyperparams, random_seed=random_seed, docker_containers=docker_containers) - - # False - self._clf = GradientBoostingClassifier( - loss=self.hyperparams['loss'], - learning_rate=self.hyperparams['learning_rate'], - n_estimators=self.hyperparams['n_estimators'], - max_depth=self.hyperparams['max_depth'], - criterion=self.hyperparams['criterion'], - min_samples_split=self.hyperparams['min_samples_split'], - min_samples_leaf=self.hyperparams['min_samples_leaf'], - min_weight_fraction_leaf=self.hyperparams['min_weight_fraction_leaf'], - subsample=self.hyperparams['subsample'], - max_features=self.hyperparams['max_features'], - max_leaf_nodes=self.hyperparams['max_leaf_nodes'], - min_impurity_decrease=self.hyperparams['min_impurity_decrease'], - warm_start=self.hyperparams['warm_start'], - presort=self.hyperparams['presort'], - validation_fraction=self.hyperparams['validation_fraction'], - n_iter_no_change=self.hyperparams['n_iter_no_change'], - tol=self.hyperparams['tol'], - verbose=_verbose, - random_state=self.random_seed, - ) - - self._inputs = None - self._outputs = None - self._training_inputs = None - self._training_outputs = None - self._target_names = None - self._training_indices = None - self._target_column_indices = None - self._target_columns_metadata: List[OrderedDict] = None - self._input_column_names = None - self._fitted = False - self._new_training_data = False - - def set_training_data(self, *, inputs: Inputs, outputs: Outputs) -> None: - self._inputs = inputs - self._outputs = outputs - self._fitted = False - self._new_training_data = True - - def fit(self, *, timeout: float = None, iterations: int = None) -> CallResult[None]: - if self._inputs is None or self._outputs is None: - raise ValueError("Missing training data.") - - if not self._new_training_data: - return CallResult(None) - self._new_training_data = False - - self._training_inputs, self._training_indices = self._get_columns_to_fit(self._inputs, self.hyperparams) - self._training_outputs, self._target_names, self._target_column_indices = self._get_targets(self._outputs, self.hyperparams) - self._input_column_names = self._training_inputs.columns - - if len(self._training_indices) > 0 and len(self._target_column_indices) > 0: - self._target_columns_metadata = self._get_target_columns_metadata(self._training_outputs.metadata, self.hyperparams) - sk_training_output = self._training_outputs.values - - shape = sk_training_output.shape - if len(shape) == 2 and shape[1] == 1: - sk_training_output = numpy.ravel(sk_training_output) - - self._clf.fit(self._training_inputs, sk_training_output) - self._fitted = True - else: - if self.hyperparams['error_on_no_input']: - raise RuntimeError("No input columns were selected") - self.logger.warn("No input columns were selected") - - return CallResult(None) - - - - def produce(self, *, inputs: Inputs, timeout: float = None, iterations: int = None) -> CallResult[Outputs]: - sk_inputs, columns_to_use = self._get_columns_to_fit(inputs, self.hyperparams) - output = [] - if len(sk_inputs.columns): - try: - sk_output = self._clf.predict(sk_inputs) - except sklearn.exceptions.NotFittedError as error: - raise PrimitiveNotFittedError("Primitive not fitted.") from error - # For primitives that allow predicting without fitting like GaussianProcessRegressor - if not self._fitted: - raise PrimitiveNotFittedError("Primitive not fitted.") - if sparse.issparse(sk_output): - sk_output = sk_output.toarray() - output = self._wrap_predictions(inputs, sk_output) - output.columns = self._target_names - output = [output] - else: - if self.hyperparams['error_on_no_input']: - raise RuntimeError("No input columns were selected") - self.logger.warn("No input columns were selected") - outputs = base_utils.combine_columns(return_result=self.hyperparams['return_result'], - add_index_columns=self.hyperparams['add_index_columns'], - inputs=inputs, column_indices=self._target_column_indices, - columns_list=output) - - return CallResult(outputs) - - - def get_params(self) -> Params: - if not self._fitted: - return Params( - oob_improvement_=None, - train_score_=None, - loss_=None, - init_=None, - estimators_=None, - n_features_=None, - classes_=None, - max_features_=None, - n_classes_=None, - alpha=None, - _rng=None, - n_estimators_=None, - input_column_names=self._input_column_names, - training_indices_=self._training_indices, - target_names_=self._target_names, - target_column_indices_=self._target_column_indices, - target_columns_metadata_=self._target_columns_metadata - ) - - return Params( - oob_improvement_=getattr(self._clf, 'oob_improvement_', None), - train_score_=getattr(self._clf, 'train_score_', None), - loss_=getattr(self._clf, 'loss_', None), - init_=getattr(self._clf, 'init_', None), - estimators_=getattr(self._clf, 'estimators_', None), - n_features_=getattr(self._clf, 'n_features_', None), - classes_=getattr(self._clf, 'classes_', None), - max_features_=getattr(self._clf, 'max_features_', None), - n_classes_=getattr(self._clf, 'n_classes_', None), - alpha=getattr(self._clf, 'alpha', None), - _rng=getattr(self._clf, '_rng', None), - n_estimators_=getattr(self._clf, 'n_estimators_', None), - input_column_names=self._input_column_names, - training_indices_=self._training_indices, - target_names_=self._target_names, - target_column_indices_=self._target_column_indices, - target_columns_metadata_=self._target_columns_metadata - ) - - def set_params(self, *, params: Params) -> None: - self._clf.oob_improvement_ = params['oob_improvement_'] - self._clf.train_score_ = params['train_score_'] - self._clf.loss_ = params['loss_'] - self._clf.init_ = params['init_'] - self._clf.estimators_ = params['estimators_'] - self._clf.n_features_ = params['n_features_'] - self._clf.classes_ = params['classes_'] - self._clf.max_features_ = params['max_features_'] - self._clf.n_classes_ = params['n_classes_'] - self._clf.alpha = params['alpha'] - self._clf._rng = params['_rng'] - self._clf.n_estimators_ = params['n_estimators_'] - self._input_column_names = params['input_column_names'] - self._training_indices = params['training_indices_'] - self._target_names = params['target_names_'] - self._target_column_indices = params['target_column_indices_'] - self._target_columns_metadata = params['target_columns_metadata_'] - - if params['oob_improvement_'] is not None: - self._fitted = True - if params['train_score_'] is not None: - self._fitted = True - if params['loss_'] is not None: - self._fitted = True - if params['init_'] is not None: - self._fitted = True - if params['estimators_'] is not None: - self._fitted = True - if params['n_features_'] is not None: - self._fitted = True - if params['classes_'] is not None: - self._fitted = True - if params['max_features_'] is not None: - self._fitted = True - if params['n_classes_'] is not None: - self._fitted = True - if params['alpha'] is not None: - self._fitted = True - if params['_rng'] is not None: - self._fitted = True - if params['n_estimators_'] is not None: - self._fitted = True - - - def log_likelihoods(self, *, - outputs: Outputs, - inputs: Inputs, - timeout: float = None, - iterations: int = None) -> CallResult[Sequence[float]]: - inputs = inputs.iloc[:, self._training_indices] # Get ndarray - outputs = outputs.iloc[:, self._target_column_indices] - - if len(inputs.columns) and len(outputs.columns): - - if outputs.shape[1] != self._clf.n_outputs_: - raise exceptions.InvalidArgumentValueError("\"outputs\" argument does not have the correct number of target columns.") - - log_proba = self._clf.predict_log_proba(inputs) - - # Making it always a list, even when only one target. - if self._clf.n_outputs_ == 1: - log_proba = [log_proba] - classes = [self._clf.classes_] - else: - classes = self._clf.classes_ - - samples_length = inputs.shape[0] - - log_likelihoods = [] - for k in range(self._clf.n_outputs_): - # We have to map each class to its internal (numerical) index used in the learner. - # This allows "outputs" to contain string classes. - outputs_column = outputs.iloc[:, k] - classes_map = pandas.Series(numpy.arange(len(classes[k])), index=classes[k]) - mapped_outputs_column = outputs_column.map(classes_map) - - # For each target column (column in "outputs"), for each sample (row) we pick the log - # likelihood for a given class. - log_likelihoods.append(log_proba[k][numpy.arange(samples_length), mapped_outputs_column]) - - results = d3m_dataframe(dict(enumerate(log_likelihoods)), generate_metadata=True) - results.columns = outputs.columns - - for k in range(self._clf.n_outputs_): - column_metadata = outputs.metadata.query_column(k) - if 'name' in column_metadata: - results.metadata = results.metadata.update_column(k, {'name': column_metadata['name']}) - - else: - results = d3m_dataframe(generate_metadata=True) - - return CallResult(results) - - - - def produce_feature_importances(self, *, timeout: float = None, iterations: int = None) -> CallResult[d3m_dataframe]: - output = d3m_dataframe(self._clf.feature_importances_.reshape((1, len(self._input_column_names)))) - output.columns = self._input_column_names - for i in range(len(self._input_column_names)): - output.metadata = output.metadata.update_column(i, {"name": self._input_column_names[i]}) - return CallResult(output) - - @classmethod - def _get_columns_to_fit(cls, inputs: Inputs, hyperparams: Hyperparams): - if not hyperparams['use_semantic_types']: - return inputs, list(range(len(inputs.columns))) - - inputs_metadata = inputs.metadata - - def can_produce_column(column_index: int) -> bool: - return cls._can_produce_column(inputs_metadata, column_index, hyperparams) - - columns_to_produce, columns_not_to_produce = base_utils.get_columns_to_use(inputs_metadata, - use_columns=hyperparams['use_inputs_columns'], - exclude_columns=hyperparams['exclude_inputs_columns'], - can_use_column=can_produce_column) - return inputs.iloc[:, columns_to_produce], columns_to_produce - # return columns_to_produce - - @classmethod - def _can_produce_column(cls, inputs_metadata: metadata_base.DataMetadata, column_index: int, hyperparams: Hyperparams) -> bool: - column_metadata = inputs_metadata.query((metadata_base.ALL_ELEMENTS, column_index)) - - accepted_structural_types = (int, float, numpy.integer, numpy.float64) - accepted_semantic_types = set() - accepted_semantic_types.add("https://metadata.datadrivendiscovery.org/types/Attribute") - if not issubclass(column_metadata['structural_type'], accepted_structural_types): - return False - - semantic_types = set(column_metadata.get('semantic_types', [])) - - if len(semantic_types) == 0: - cls.logger.warning("No semantic types found in column metadata") - return False - # Making sure all accepted_semantic_types are available in semantic_types - if len(accepted_semantic_types - semantic_types) == 0: - return True - - return False - - @classmethod - def _get_targets(cls, data: d3m_dataframe, hyperparams: Hyperparams): - if not hyperparams['use_semantic_types']: - return data, list(data.columns), list(range(len(data.columns))) - - metadata = data.metadata - - def can_produce_column(column_index: int) -> bool: - accepted_semantic_types = set() - accepted_semantic_types.add("https://metadata.datadrivendiscovery.org/types/TrueTarget") - column_metadata = metadata.query((metadata_base.ALL_ELEMENTS, column_index)) - semantic_types = set(column_metadata.get('semantic_types', [])) - if len(semantic_types) == 0: - cls.logger.warning("No semantic types found in column metadata") - return False - # Making sure all accepted_semantic_types are available in semantic_types - if len(accepted_semantic_types - semantic_types) == 0: - return True - return False - - target_column_indices, target_columns_not_to_produce = base_utils.get_columns_to_use(metadata, - use_columns=hyperparams[ - 'use_outputs_columns'], - exclude_columns= - hyperparams[ - 'exclude_outputs_columns'], - can_use_column=can_produce_column) - targets = [] - if target_column_indices: - targets = data.select_columns(target_column_indices) - target_column_names = [] - for idx in target_column_indices: - target_column_names.append(data.columns[idx]) - return targets, target_column_names, target_column_indices - - @classmethod - def _get_target_columns_metadata(cls, outputs_metadata: metadata_base.DataMetadata, hyperparams) -> List[OrderedDict]: - outputs_length = outputs_metadata.query((metadata_base.ALL_ELEMENTS,))['dimension']['length'] - - target_columns_metadata: List[OrderedDict] = [] - for column_index in range(outputs_length): - column_metadata = OrderedDict(outputs_metadata.query_column(column_index)) - - # Update semantic types and prepare it for predicted targets. - semantic_types = set(column_metadata.get('semantic_types', [])) - semantic_types_to_remove = set(["https://metadata.datadrivendiscovery.org/types/TrueTarget","https://metadata.datadrivendiscovery.org/types/SuggestedTarget",]) - add_semantic_types = set(["https://metadata.datadrivendiscovery.org/types/PredictedTarget",]) - add_semantic_types.add(hyperparams["return_semantic_type"]) - semantic_types = semantic_types - semantic_types_to_remove - semantic_types = semantic_types.union(add_semantic_types) - column_metadata['semantic_types'] = list(semantic_types) - - target_columns_metadata.append(column_metadata) - - return target_columns_metadata - - @classmethod - def _update_predictions_metadata(cls, inputs_metadata: metadata_base.DataMetadata, outputs: Optional[Outputs], - target_columns_metadata: List[OrderedDict]) -> metadata_base.DataMetadata: - outputs_metadata = metadata_base.DataMetadata().generate(value=outputs) - - for column_index, column_metadata in enumerate(target_columns_metadata): - column_metadata.pop("structural_type", None) - outputs_metadata = outputs_metadata.update_column(column_index, column_metadata) - - return outputs_metadata - - def _wrap_predictions(self, inputs: Inputs, predictions: ndarray) -> Outputs: - outputs = d3m_dataframe(predictions, generate_metadata=False) - outputs.metadata = self._update_predictions_metadata(inputs.metadata, outputs, self._target_columns_metadata) - return outputs - - - @classmethod - def _add_target_columns_metadata(cls, outputs_metadata: metadata_base.DataMetadata): - outputs_length = outputs_metadata.query((metadata_base.ALL_ELEMENTS,))['dimension']['length'] - - target_columns_metadata: List[OrderedDict] = [] - for column_index in range(outputs_length): - column_metadata = OrderedDict() - semantic_types = [] - semantic_types.append('https://metadata.datadrivendiscovery.org/types/PredictedTarget') - column_name = outputs_metadata.query((metadata_base.ALL_ELEMENTS, column_index)).get("name") - if column_name is None: - column_name = "output_{}".format(column_index) - column_metadata["semantic_types"] = semantic_types - column_metadata["name"] = str(column_name) - target_columns_metadata.append(column_metadata) - - return target_columns_metadata - - -SKGradientBoostingClassifier.__doc__ = GradientBoostingClassifier.__doc__ \ No newline at end of file diff --git a/common-primitives/sklearn-wrap/sklearn_wrap/SKGradientBoostingRegressor.py b/common-primitives/sklearn-wrap/sklearn_wrap/SKGradientBoostingRegressor.py deleted file mode 100644 index 7ec68f0..0000000 --- a/common-primitives/sklearn-wrap/sklearn_wrap/SKGradientBoostingRegressor.py +++ /dev/null @@ -1,673 +0,0 @@ -from typing import Any, Callable, List, Dict, Union, Optional, Sequence, Tuple -from numpy import ndarray -from collections import OrderedDict -from scipy import sparse -import os -import sklearn -import numpy -import typing - -# Custom import commands if any -from sklearn.ensemble.gradient_boosting import GradientBoostingRegressor - - -from d3m.container.numpy import ndarray as d3m_ndarray -from d3m.container import DataFrame as d3m_dataframe -from d3m.metadata import hyperparams, params, base as metadata_base -from d3m import utils -from d3m.base import utils as base_utils -from d3m.exceptions import PrimitiveNotFittedError -from d3m.primitive_interfaces.base import CallResult, DockerContainer - -from d3m.primitive_interfaces.supervised_learning import SupervisedLearnerPrimitiveBase -from d3m.primitive_interfaces.base import ProbabilisticCompositionalityMixin, ContinueFitMixin -from d3m import exceptions -import pandas - - - -Inputs = d3m_dataframe -Outputs = d3m_dataframe - - -class Params(params.Params): - oob_improvement_: Optional[ndarray] - train_score_: Optional[ndarray] - loss_: Optional[object] - estimators_: Optional[object] - n_features_: Optional[int] - init_: Optional[object] - max_features_: Optional[int] - n_classes_: Optional[Union[int, List[int]]] - _rng: Optional[object] - n_estimators_: Optional[int] - input_column_names: Optional[Any] - target_names_: Optional[Sequence[Any]] - training_indices_: Optional[Sequence[int]] - target_column_indices_: Optional[Sequence[int]] - target_columns_metadata_: Optional[List[OrderedDict]] - - - -class Hyperparams(hyperparams.Hyperparams): - loss = hyperparams.Choice( - choices={ - 'ls': hyperparams.Hyperparams.define( - configuration=OrderedDict({}) - ), - 'lad': hyperparams.Hyperparams.define( - configuration=OrderedDict({}) - ), - 'huber': hyperparams.Hyperparams.define( - configuration=OrderedDict({ - 'alpha': hyperparams.Constant( - default=0.9, - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] - ) - }) - ), - 'quantile': hyperparams.Hyperparams.define( - configuration=OrderedDict({ - 'alpha': hyperparams.Constant( - default=0.9, - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] - ) - }) - ) - }, - default='ls', - description='loss function to be optimized. \'ls\' refers to least squares regression. \'lad\' (least absolute deviation) is a highly robust loss function solely based on order information of the input variables. \'huber\' is a combination of the two. \'quantile\' allows quantile regression (use `alpha` to specify the quantile).', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] - ) - learning_rate = hyperparams.Bounded[float]( - lower=0, - upper=None, - default=0.1, - description='learning rate shrinks the contribution of each tree by `learning_rate`. There is a trade-off between learning_rate and n_estimators.', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] - ) - n_estimators = hyperparams.Bounded[int]( - lower=1, - upper=None, - default=100, - description='The number of boosting stages to perform. Gradient boosting is fairly robust to over-fitting so a large number usually results in better performance.', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] - ) - max_depth = hyperparams.Bounded[int]( - lower=0, - upper=None, - default=3, - description='maximum depth of the individual regression estimators. The maximum depth limits the number of nodes in the tree. Tune this parameter for best performance; the best value depends on the interaction of the input variables.', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] - ) - criterion = hyperparams.Enumeration[str]( - values=['friedman_mse', 'mse', 'mae'], - default='friedman_mse', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'], - description='The function to measure the quality of a split. Supported criteria are "friedman_mse" for the mean squared error with improvement score by Friedman, "mse" for mean squared error, and "mae" for the mean absolute error. The default value of "friedman_mse" is generally the best as it can provide a better approximation in some cases. .. versionadded:: 0.18' - ) - min_samples_split = hyperparams.Union( - configuration=OrderedDict({ - 'float': hyperparams.Bounded[float]( - lower=0, - upper=1, - default=1.0, - description='It\'s a percentage and `ceil(min_samples_split * n_samples)` is the minimum number of samples for each split.', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'], - ), - 'int': hyperparams.Bounded[int]( - lower=0, - upper=None, - default=2, - description='Minimum number.', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'], - ) - }), - default='int', - description='The minimum number of samples required to split an internal node: - If int, then consider `min_samples_split` as the minimum number. - If float, then `min_samples_split` is a percentage and `ceil(min_samples_split * n_samples)` are the minimum number of samples for each split. .. versionchanged:: 0.18 Added float values for percentages.', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] - ) - min_samples_leaf = hyperparams.Union( - configuration=OrderedDict({ - 'percent': hyperparams.Bounded[float]( - lower=0, - upper=0.5, - default=0.25, - description='It\'s a percentage and `ceil(min_samples_leaf * n_samples)` is the minimum number of samples for each node.', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'], - ), - 'absolute': hyperparams.Bounded[int]( - lower=1, - upper=None, - default=1, - description='Minimum number.', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'], - ) - }), - default='absolute', - description='The minimum number of samples required to be at a leaf node: - If int, then consider `min_samples_leaf` as the minimum number. - If float, then `min_samples_leaf` is a percentage and `ceil(min_samples_leaf * n_samples)` are the minimum number of samples for each node. .. versionchanged:: 0.18 Added float values for percentages.', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] - ) - min_weight_fraction_leaf = hyperparams.Bounded[float]( - default=0, - lower=0, - upper=0.5, - description='The minimum weighted fraction of the sum total of weights (of all the input samples) required to be at a leaf node. Samples have equal weight when sample_weight is not provided.', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] - ) - subsample = hyperparams.Bounded[int]( - default=1, - lower=0, - upper=None, - description='The fraction of samples to be used for fitting the individual base learners. If smaller than 1.0 this results in Stochastic Gradient Boosting. `subsample` interacts with the parameter `n_estimators`. Choosing `subsample < 1.0` leads to a reduction of variance and an increase in bias.', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] - ) - max_features = hyperparams.Union( - configuration=OrderedDict({ - 'specified_int': hyperparams.Bounded[int]( - lower=0, - upper=None, - default=0, - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'], - ), - 'calculated': hyperparams.Enumeration[str]( - values=['auto', 'sqrt', 'log2'], - default='auto', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'], - ), - 'none': hyperparams.Constant( - default=None, - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'], - ), - 'percent': hyperparams.Bounded[float]( - default=0.25, - lower=0, - upper=1, - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'], - ) - }), - default='none', - description='The number of features to consider when looking for the best split: - If int, then consider `max_features` features at each split. - If float, then `max_features` is a percentage and `int(max_features * n_features)` features are considered at each split. - If "auto", then `max_features=n_features`. - If "sqrt", then `max_features=sqrt(n_features)`. - If "log2", then `max_features=log2(n_features)`. - If None, then `max_features=n_features`. Choosing `max_features < n_features` leads to a reduction of variance and an increase in bias. Note: the search for a split does not stop until at least one valid partition of the node samples is found, even if it requires to effectively inspect more than ``max_features`` features.', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] - ) - max_leaf_nodes = hyperparams.Union( - configuration=OrderedDict({ - 'int': hyperparams.Bounded[int]( - lower=0, - upper=None, - default=10, - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'], - ), - 'none': hyperparams.Constant( - default=None, - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'], - ) - }), - default='none', - description='Grow trees with ``max_leaf_nodes`` in best-first fashion. Best nodes are defined as relative reduction in impurity. If None then unlimited number of leaf nodes.', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] - ) - min_impurity_decrease = hyperparams.Bounded[float]( - default=0.0, - lower=0.0, - upper=None, - description='A node will be split if this split induces a decrease of the impurity greater than or equal to this value. The weighted impurity decrease equation is the following:: N_t / N * (impurity - N_t_R / N_t * right_impurity - N_t_L / N_t * left_impurity) where ``N`` is the total number of samples, ``N_t`` is the number of samples at the current node, ``N_t_L`` is the number of samples in the left child, and ``N_t_R`` is the number of samples in the right child. ``N``, ``N_t``, ``N_t_R`` and ``N_t_L`` all refer to the weighted sum, if ``sample_weight`` is passed. .. versionadded:: 0.19 ', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] - ) - warm_start = hyperparams.UniformBool( - default=False, - description='When set to ``True``, reuse the solution of the previous call to fit and add more estimators to the ensemble, otherwise, just erase the previous solution.', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] - ) - presort = hyperparams.Union( - configuration=OrderedDict({ - 'bool': hyperparams.UniformBool( - default=False, - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'], - ), - 'auto': hyperparams.Constant( - default='auto', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'], - ) - }), - default='auto', - description='Whether to presort the data to speed up the finding of best splits in fitting. Auto mode by default will use presorting on dense data and default to normal sorting on sparse data. Setting presort to true on sparse data will raise an error. .. versionadded:: 0.17 optional parameter *presort*.', - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'] - ) - validation_fraction = hyperparams.Bounded[float]( - default=0.1, - lower=0, - upper=1, - description='The proportion of training data to set aside as validation set for early stopping. Must be between 0 and 1. Only used if ``n_iter_no_change`` is set to an integer.', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] - ) - n_iter_no_change = hyperparams.Union( - configuration=OrderedDict({ - 'int': hyperparams.Bounded[int]( - default=5, - lower=0, - upper=None, - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'], - ), - 'none': hyperparams.Constant( - default=None, - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'], - ) - }), - default='none', - description='``n_iter_no_change`` is used to decide if early stopping will be used to terminate training when validation score is not improving. By default it is set to None to disable early stopping. If set to a number, it will set aside ``validation_fraction`` size of the training data as validation and terminate training when validation score is not improving in all of the previous ``n_iter_no_change`` numbers of iterations.', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] - ) - tol = hyperparams.Bounded[float]( - default=0.0001, - lower=0, - upper=None, - description='Tolerance for the early stopping. When the loss is not improving by at least tol for ``n_iter_no_change`` iterations (if set to a number), the training stops.', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] - ) - - use_inputs_columns = hyperparams.Set( - elements=hyperparams.Hyperparameter[int](-1), - default=(), - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="A set of column indices to force primitive to use as training input. If any specified column cannot be parsed, it is skipped.", - ) - use_outputs_columns = hyperparams.Set( - elements=hyperparams.Hyperparameter[int](-1), - default=(), - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="A set of column indices to force primitive to use as training target. If any specified column cannot be parsed, it is skipped.", - ) - exclude_inputs_columns = hyperparams.Set( - elements=hyperparams.Hyperparameter[int](-1), - default=(), - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="A set of column indices to not use as training inputs. Applicable only if \"use_columns\" is not provided.", - ) - exclude_outputs_columns = hyperparams.Set( - elements=hyperparams.Hyperparameter[int](-1), - default=(), - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="A set of column indices to not use as training target. Applicable only if \"use_columns\" is not provided.", - ) - return_result = hyperparams.Enumeration( - values=['append', 'replace', 'new'], - default='new', - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="Should parsed columns be appended, should they replace original columns, or should only parsed columns be returned? This hyperparam is ignored if use_semantic_types is set to false.", - ) - use_semantic_types = hyperparams.UniformBool( - default=False, - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="Controls whether semantic_types metadata will be used for filtering columns in input dataframe. Setting this to false makes the code ignore return_result and will produce only the output dataframe" - ) - add_index_columns = hyperparams.UniformBool( - default=False, - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="Also include primary index columns if input data has them. Applicable only if \"return_result\" is set to \"new\".", - ) - error_on_no_input = hyperparams.UniformBool( - default=True, - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="Throw an exception if no input column is selected/provided. Defaults to true to behave like sklearn. To prevent pipelines from breaking set this to False.", - ) - - return_semantic_type = hyperparams.Enumeration[str]( - values=['https://metadata.datadrivendiscovery.org/types/Attribute', 'https://metadata.datadrivendiscovery.org/types/ConstructedAttribute', 'https://metadata.datadrivendiscovery.org/types/PredictedTarget'], - default='https://metadata.datadrivendiscovery.org/types/PredictedTarget', - description='Decides what semantic type to attach to generated output', - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'] - ) - -class SKGradientBoostingRegressor(SupervisedLearnerPrimitiveBase[Inputs, Outputs, Params, Hyperparams]): - """ - Primitive wrapping for sklearn GradientBoostingRegressor - `sklearn documentation `_ - - """ - - __author__ = "JPL MARVIN" - metadata = metadata_base.PrimitiveMetadata({ - "algorithm_types": [metadata_base.PrimitiveAlgorithmType.GRADIENT_BOOSTING, ], - "name": "sklearn.ensemble.gradient_boosting.GradientBoostingRegressor", - "primitive_family": metadata_base.PrimitiveFamily.REGRESSION, - "python_path": "d3m.primitives.regression.gradient_boosting.SKlearn", - "source": {'name': 'JPL', 'contact': 'mailto:shah@jpl.nasa.gov', 'uris': ['https://gitlab.com/datadrivendiscovery/sklearn-wrap/issues', 'https://scikit-learn.org/stable/modules/generated/sklearn.ensemble.GradientBoostingRegressor.html']}, - "version": "2019.11.13", - "id": "2a031907-6b2c-3390-b365-921f89c8816a", - "hyperparams_to_tune": ['n_estimators', 'learning_rate', 'max_depth', 'min_samples_leaf', 'min_samples_split', 'max_features'], - 'installation': [ - {'type': metadata_base.PrimitiveInstallationType.PIP, - 'package_uri': 'git+https://gitlab.com/datadrivendiscovery/sklearn-wrap.git@{git_commit}#egg=sklearn_wrap'.format( - git_commit=utils.current_git_commit(os.path.dirname(__file__)), - ), - }] - }) - - def __init__(self, *, - hyperparams: Hyperparams, - random_seed: int = 0, - docker_containers: Dict[str, DockerContainer] = None, - _verbose: int = 0) -> None: - - super().__init__(hyperparams=hyperparams, random_seed=random_seed, docker_containers=docker_containers) - - # False - self._clf = GradientBoostingRegressor( - loss=self.hyperparams['loss']['choice'], - alpha=self.hyperparams['loss'].get('alpha', 0.9), - learning_rate=self.hyperparams['learning_rate'], - n_estimators=self.hyperparams['n_estimators'], - max_depth=self.hyperparams['max_depth'], - criterion=self.hyperparams['criterion'], - min_samples_split=self.hyperparams['min_samples_split'], - min_samples_leaf=self.hyperparams['min_samples_leaf'], - min_weight_fraction_leaf=self.hyperparams['min_weight_fraction_leaf'], - subsample=self.hyperparams['subsample'], - max_features=self.hyperparams['max_features'], - max_leaf_nodes=self.hyperparams['max_leaf_nodes'], - min_impurity_decrease=self.hyperparams['min_impurity_decrease'], - warm_start=self.hyperparams['warm_start'], - presort=self.hyperparams['presort'], - validation_fraction=self.hyperparams['validation_fraction'], - n_iter_no_change=self.hyperparams['n_iter_no_change'], - tol=self.hyperparams['tol'], - verbose=_verbose, - random_state=self.random_seed, - ) - - self._inputs = None - self._outputs = None - self._training_inputs = None - self._training_outputs = None - self._target_names = None - self._training_indices = None - self._target_column_indices = None - self._target_columns_metadata: List[OrderedDict] = None - self._input_column_names = None - self._fitted = False - self._new_training_data = False - - def set_training_data(self, *, inputs: Inputs, outputs: Outputs) -> None: - self._inputs = inputs - self._outputs = outputs - self._fitted = False - self._new_training_data = True - - def fit(self, *, timeout: float = None, iterations: int = None) -> CallResult[None]: - if self._inputs is None or self._outputs is None: - raise ValueError("Missing training data.") - - if not self._new_training_data: - return CallResult(None) - self._new_training_data = False - - self._training_inputs, self._training_indices = self._get_columns_to_fit(self._inputs, self.hyperparams) - self._training_outputs, self._target_names, self._target_column_indices = self._get_targets(self._outputs, self.hyperparams) - self._input_column_names = self._training_inputs.columns - - if len(self._training_indices) > 0 and len(self._target_column_indices) > 0: - self._target_columns_metadata = self._get_target_columns_metadata(self._training_outputs.metadata, self.hyperparams) - sk_training_output = self._training_outputs.values - - shape = sk_training_output.shape - if len(shape) == 2 and shape[1] == 1: - sk_training_output = numpy.ravel(sk_training_output) - - self._clf.fit(self._training_inputs, sk_training_output) - self._fitted = True - else: - if self.hyperparams['error_on_no_input']: - raise RuntimeError("No input columns were selected") - self.logger.warn("No input columns were selected") - - return CallResult(None) - - - - def produce(self, *, inputs: Inputs, timeout: float = None, iterations: int = None) -> CallResult[Outputs]: - sk_inputs, columns_to_use = self._get_columns_to_fit(inputs, self.hyperparams) - output = [] - if len(sk_inputs.columns): - try: - sk_output = self._clf.predict(sk_inputs) - except sklearn.exceptions.NotFittedError as error: - raise PrimitiveNotFittedError("Primitive not fitted.") from error - # For primitives that allow predicting without fitting like GaussianProcessRegressor - if not self._fitted: - raise PrimitiveNotFittedError("Primitive not fitted.") - if sparse.issparse(sk_output): - sk_output = sk_output.toarray() - output = self._wrap_predictions(inputs, sk_output) - output.columns = self._target_names - output = [output] - else: - if self.hyperparams['error_on_no_input']: - raise RuntimeError("No input columns were selected") - self.logger.warn("No input columns were selected") - outputs = base_utils.combine_columns(return_result=self.hyperparams['return_result'], - add_index_columns=self.hyperparams['add_index_columns'], - inputs=inputs, column_indices=self._target_column_indices, - columns_list=output) - - return CallResult(outputs) - - - def get_params(self) -> Params: - if not self._fitted: - return Params( - oob_improvement_=None, - train_score_=None, - loss_=None, - estimators_=None, - n_features_=None, - init_=None, - max_features_=None, - n_classes_=None, - _rng=None, - n_estimators_=None, - input_column_names=self._input_column_names, - training_indices_=self._training_indices, - target_names_=self._target_names, - target_column_indices_=self._target_column_indices, - target_columns_metadata_=self._target_columns_metadata - ) - - return Params( - oob_improvement_=getattr(self._clf, 'oob_improvement_', None), - train_score_=getattr(self._clf, 'train_score_', None), - loss_=getattr(self._clf, 'loss_', None), - estimators_=getattr(self._clf, 'estimators_', None), - n_features_=getattr(self._clf, 'n_features_', None), - init_=getattr(self._clf, 'init_', None), - max_features_=getattr(self._clf, 'max_features_', None), - n_classes_=getattr(self._clf, 'n_classes_', None), - _rng=getattr(self._clf, '_rng', None), - n_estimators_=getattr(self._clf, 'n_estimators_', None), - input_column_names=self._input_column_names, - training_indices_=self._training_indices, - target_names_=self._target_names, - target_column_indices_=self._target_column_indices, - target_columns_metadata_=self._target_columns_metadata - ) - - def set_params(self, *, params: Params) -> None: - self._clf.oob_improvement_ = params['oob_improvement_'] - self._clf.train_score_ = params['train_score_'] - self._clf.loss_ = params['loss_'] - self._clf.estimators_ = params['estimators_'] - self._clf.n_features_ = params['n_features_'] - self._clf.init_ = params['init_'] - self._clf.max_features_ = params['max_features_'] - self._clf.n_classes_ = params['n_classes_'] - self._clf._rng = params['_rng'] - self._clf.n_estimators_ = params['n_estimators_'] - self._input_column_names = params['input_column_names'] - self._training_indices = params['training_indices_'] - self._target_names = params['target_names_'] - self._target_column_indices = params['target_column_indices_'] - self._target_columns_metadata = params['target_columns_metadata_'] - - if params['oob_improvement_'] is not None: - self._fitted = True - if params['train_score_'] is not None: - self._fitted = True - if params['loss_'] is not None: - self._fitted = True - if params['estimators_'] is not None: - self._fitted = True - if params['n_features_'] is not None: - self._fitted = True - if params['init_'] is not None: - self._fitted = True - if params['max_features_'] is not None: - self._fitted = True - if params['n_classes_'] is not None: - self._fitted = True - if params['_rng'] is not None: - self._fitted = True - if params['n_estimators_'] is not None: - self._fitted = True - - - - - - def produce_feature_importances(self, *, timeout: float = None, iterations: int = None) -> CallResult[d3m_dataframe]: - output = d3m_dataframe(self._clf.feature_importances_.reshape((1, len(self._input_column_names)))) - output.columns = self._input_column_names - for i in range(len(self._input_column_names)): - output.metadata = output.metadata.update_column(i, {"name": self._input_column_names[i]}) - return CallResult(output) - - @classmethod - def _get_columns_to_fit(cls, inputs: Inputs, hyperparams: Hyperparams): - if not hyperparams['use_semantic_types']: - return inputs, list(range(len(inputs.columns))) - - inputs_metadata = inputs.metadata - - def can_produce_column(column_index: int) -> bool: - return cls._can_produce_column(inputs_metadata, column_index, hyperparams) - - columns_to_produce, columns_not_to_produce = base_utils.get_columns_to_use(inputs_metadata, - use_columns=hyperparams['use_inputs_columns'], - exclude_columns=hyperparams['exclude_inputs_columns'], - can_use_column=can_produce_column) - return inputs.iloc[:, columns_to_produce], columns_to_produce - # return columns_to_produce - - @classmethod - def _can_produce_column(cls, inputs_metadata: metadata_base.DataMetadata, column_index: int, hyperparams: Hyperparams) -> bool: - column_metadata = inputs_metadata.query((metadata_base.ALL_ELEMENTS, column_index)) - - accepted_structural_types = (int, float, numpy.integer, numpy.float64) - accepted_semantic_types = set() - accepted_semantic_types.add("https://metadata.datadrivendiscovery.org/types/Attribute") - if not issubclass(column_metadata['structural_type'], accepted_structural_types): - return False - - semantic_types = set(column_metadata.get('semantic_types', [])) - - if len(semantic_types) == 0: - cls.logger.warning("No semantic types found in column metadata") - return False - # Making sure all accepted_semantic_types are available in semantic_types - if len(accepted_semantic_types - semantic_types) == 0: - return True - - return False - - @classmethod - def _get_targets(cls, data: d3m_dataframe, hyperparams: Hyperparams): - if not hyperparams['use_semantic_types']: - return data, list(data.columns), list(range(len(data.columns))) - - metadata = data.metadata - - def can_produce_column(column_index: int) -> bool: - accepted_semantic_types = set() - accepted_semantic_types.add("https://metadata.datadrivendiscovery.org/types/TrueTarget") - column_metadata = metadata.query((metadata_base.ALL_ELEMENTS, column_index)) - semantic_types = set(column_metadata.get('semantic_types', [])) - if len(semantic_types) == 0: - cls.logger.warning("No semantic types found in column metadata") - return False - # Making sure all accepted_semantic_types are available in semantic_types - if len(accepted_semantic_types - semantic_types) == 0: - return True - return False - - target_column_indices, target_columns_not_to_produce = base_utils.get_columns_to_use(metadata, - use_columns=hyperparams[ - 'use_outputs_columns'], - exclude_columns= - hyperparams[ - 'exclude_outputs_columns'], - can_use_column=can_produce_column) - targets = [] - if target_column_indices: - targets = data.select_columns(target_column_indices) - target_column_names = [] - for idx in target_column_indices: - target_column_names.append(data.columns[idx]) - return targets, target_column_names, target_column_indices - - @classmethod - def _get_target_columns_metadata(cls, outputs_metadata: metadata_base.DataMetadata, hyperparams) -> List[OrderedDict]: - outputs_length = outputs_metadata.query((metadata_base.ALL_ELEMENTS,))['dimension']['length'] - - target_columns_metadata: List[OrderedDict] = [] - for column_index in range(outputs_length): - column_metadata = OrderedDict(outputs_metadata.query_column(column_index)) - - # Update semantic types and prepare it for predicted targets. - semantic_types = set(column_metadata.get('semantic_types', [])) - semantic_types_to_remove = set(["https://metadata.datadrivendiscovery.org/types/TrueTarget","https://metadata.datadrivendiscovery.org/types/SuggestedTarget",]) - add_semantic_types = set(["https://metadata.datadrivendiscovery.org/types/PredictedTarget",]) - add_semantic_types.add(hyperparams["return_semantic_type"]) - semantic_types = semantic_types - semantic_types_to_remove - semantic_types = semantic_types.union(add_semantic_types) - column_metadata['semantic_types'] = list(semantic_types) - - target_columns_metadata.append(column_metadata) - - return target_columns_metadata - - @classmethod - def _update_predictions_metadata(cls, inputs_metadata: metadata_base.DataMetadata, outputs: Optional[Outputs], - target_columns_metadata: List[OrderedDict]) -> metadata_base.DataMetadata: - outputs_metadata = metadata_base.DataMetadata().generate(value=outputs) - - for column_index, column_metadata in enumerate(target_columns_metadata): - column_metadata.pop("structural_type", None) - outputs_metadata = outputs_metadata.update_column(column_index, column_metadata) - - return outputs_metadata - - def _wrap_predictions(self, inputs: Inputs, predictions: ndarray) -> Outputs: - outputs = d3m_dataframe(predictions, generate_metadata=False) - outputs.metadata = self._update_predictions_metadata(inputs.metadata, outputs, self._target_columns_metadata) - return outputs - - - @classmethod - def _add_target_columns_metadata(cls, outputs_metadata: metadata_base.DataMetadata): - outputs_length = outputs_metadata.query((metadata_base.ALL_ELEMENTS,))['dimension']['length'] - - target_columns_metadata: List[OrderedDict] = [] - for column_index in range(outputs_length): - column_metadata = OrderedDict() - semantic_types = [] - semantic_types.append('https://metadata.datadrivendiscovery.org/types/PredictedTarget') - column_name = outputs_metadata.query((metadata_base.ALL_ELEMENTS, column_index)).get("name") - if column_name is None: - column_name = "output_{}".format(column_index) - column_metadata["semantic_types"] = semantic_types - column_metadata["name"] = str(column_name) - target_columns_metadata.append(column_metadata) - - return target_columns_metadata - - -SKGradientBoostingRegressor.__doc__ = GradientBoostingRegressor.__doc__ \ No newline at end of file diff --git a/common-primitives/sklearn-wrap/sklearn_wrap/SKImputer.py b/common-primitives/sklearn-wrap/sklearn_wrap/SKImputer.py deleted file mode 100644 index 203a3ca..0000000 --- a/common-primitives/sklearn-wrap/sklearn_wrap/SKImputer.py +++ /dev/null @@ -1,391 +0,0 @@ -from typing import Any, Callable, List, Dict, Union, Optional, Sequence, Tuple -from numpy import ndarray -from collections import OrderedDict -from scipy import sparse -import os -import sklearn -import numpy -import typing - -# Custom import commands if any -from sklearn.impute import SimpleImputer -from sklearn.impute._base import _get_mask - - -from d3m.container.numpy import ndarray as d3m_ndarray -from d3m.container import DataFrame as d3m_dataframe -from d3m.metadata import hyperparams, params, base as metadata_base -from d3m import utils -from d3m.base import utils as base_utils -from d3m.exceptions import PrimitiveNotFittedError -from d3m.primitive_interfaces.base import CallResult, DockerContainer -from d3m.primitive_interfaces.unsupervised_learning import UnsupervisedLearnerPrimitiveBase - - -Inputs = d3m_dataframe -Outputs = d3m_dataframe - - -class Params(params.Params): - statistics_: Optional[ndarray] - indicator_: Optional[sklearn.base.BaseEstimator] - input_column_names: Optional[Any] - target_names_: Optional[Sequence[Any]] - training_indices_: Optional[Sequence[int]] - target_column_indices_: Optional[Sequence[int]] - target_columns_metadata_: Optional[List[OrderedDict]] - - - -class Hyperparams(hyperparams.Hyperparams): - missing_values = hyperparams.Union( - configuration=OrderedDict({ - 'int': hyperparams.Hyperparameter[int]( - default=0, - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'], - ), - 'float': hyperparams.Hyperparameter[float]( - default=numpy.nan, - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'], - ) - }), - default='float', - description='The placeholder for the missing values. All occurrences of `missing_values` will be imputed.', - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'] - ) - strategy = hyperparams.Enumeration[str]( - default='mean', - values=['median', 'most_frequent', 'mean', 'constant'], - description='The imputation strategy. - If "mean", then replace missing values using the mean along each column. Can only be used with numeric data. - If "median", then replace missing values using the median along each column. Can only be used with numeric data. - If "most_frequent", then replace missing using the most frequent value along each column. Can be used with strings or numeric data. - If "constant", then replace missing values with fill_value. Can be used with strings or numeric data. .. versionadded:: 0.20 strategy="constant" for fixed value imputation.', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] - ) - add_indicator = hyperparams.UniformBool( - default=False, - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] - ) - fill_value = hyperparams.Union( - configuration=OrderedDict({ - 'int': hyperparams.Hyperparameter[int]( - default=0, - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'], - ), - 'none': hyperparams.Constant( - default=None, - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'], - ) - }), - default='none', - description='When strategy == "constant", fill_value is used to replace all occurrences of missing_values. If left to the default, fill_value will be 0 when imputing numerical data and "missing_value" for strings or object data types.', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] - ) - - use_columns = hyperparams.Set( - elements=hyperparams.Hyperparameter[int](-1), - default=(), - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="A set of column indices to force primitive to operate on. If any specified column cannot be parsed, it is skipped.", - ) - exclude_columns = hyperparams.Set( - elements=hyperparams.Hyperparameter[int](-1), - default=(), - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="A set of column indices to not operate on. Applicable only if \"use_columns\" is not provided.", - ) - return_result = hyperparams.Enumeration( - values=['append', 'replace', 'new'], - default='new', - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="Should parsed columns be appended, should they replace original columns, or should only parsed columns be returned? This hyperparam is ignored if use_semantic_types is set to false.", - ) - use_semantic_types = hyperparams.UniformBool( - default=False, - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="Controls whether semantic_types metadata will be used for filtering columns in input dataframe. Setting this to false makes the code ignore return_result and will produce only the output dataframe" - ) - add_index_columns = hyperparams.UniformBool( - default=False, - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="Also include primary index columns if input data has them. Applicable only if \"return_result\" is set to \"new\".", - ) - error_on_no_input = hyperparams.UniformBool( - default=True, - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="Throw an exception if no input column is selected/provided. Defaults to true to behave like sklearn. To prevent pipelines from breaking set this to False.", - ) - - return_semantic_type = hyperparams.Enumeration[str]( - values=['https://metadata.datadrivendiscovery.org/types/Attribute', 'https://metadata.datadrivendiscovery.org/types/ConstructedAttribute'], - default='https://metadata.datadrivendiscovery.org/types/Attribute', - description='Decides what semantic type to attach to generated attributes', - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'] - ) - -class SKImputer(UnsupervisedLearnerPrimitiveBase[Inputs, Outputs, Params, Hyperparams]): - """ - Primitive wrapping for sklearn SimpleImputer - `sklearn documentation `_ - - """ - - __author__ = "JPL MARVIN" - metadata = metadata_base.PrimitiveMetadata({ - "algorithm_types": [metadata_base.PrimitiveAlgorithmType.IMPUTATION, ], - "name": "sklearn.impute.SimpleImputer", - "primitive_family": metadata_base.PrimitiveFamily.DATA_CLEANING, - "python_path": "d3m.primitives.data_cleaning.imputer.SKlearn", - "source": {'name': 'JPL', 'contact': 'mailto:shah@jpl.nasa.gov', 'uris': ['https://gitlab.com/datadrivendiscovery/sklearn-wrap/issues', 'https://scikit-learn.org/stable/modules/generated/sklearn.impute.SimpleImputer.html']}, - "version": "2019.11.13", - "id": "d016df89-de62-3c53-87ed-c06bb6a23cde", - "hyperparams_to_tune": ['strategy'], - 'installation': [ - {'type': metadata_base.PrimitiveInstallationType.PIP, - 'package_uri': 'git+https://gitlab.com/datadrivendiscovery/sklearn-wrap.git@{git_commit}#egg=sklearn_wrap'.format( - git_commit=utils.current_git_commit(os.path.dirname(__file__)), - ), - }] - }) - - def __init__(self, *, - hyperparams: Hyperparams, - random_seed: int = 0, - docker_containers: Dict[str, DockerContainer] = None, - _verbose: int = 0) -> None: - - super().__init__(hyperparams=hyperparams, random_seed=random_seed, docker_containers=docker_containers) - - # False - self._clf = SimpleImputer( - missing_values=self.hyperparams['missing_values'], - strategy=self.hyperparams['strategy'], - add_indicator=self.hyperparams['add_indicator'], - fill_value=self.hyperparams['fill_value'], - verbose=_verbose - ) - - self._inputs = None - self._outputs = None - self._training_inputs = None - self._training_outputs = None - self._target_names = None - self._training_indices = None - self._target_column_indices = None - self._target_columns_metadata: List[OrderedDict] = None - self._input_column_names = None - self._fitted = False - - - def set_training_data(self, *, inputs: Inputs) -> None: - self._inputs = inputs - self._fitted = False - - def fit(self, *, timeout: float = None, iterations: int = None) -> CallResult[None]: - if self._fitted: - return CallResult(None) - - self._training_inputs, self._training_indices, _ = self._get_columns_to_fit(self._inputs, self.hyperparams) - self._input_column_names = self._training_inputs.columns - - if self._training_inputs is None: - return CallResult(None) - - if len(self._training_indices) > 0: - self._clf.fit(self._training_inputs) - self._fitted = True - else: - if self.hyperparams['error_on_no_input']: - raise RuntimeError("No input columns were selected") - self.logger.warn("No input columns were selected") - return CallResult(None) - - def produce(self, *, inputs: Inputs, timeout: float = None, iterations: int = None) -> CallResult[Outputs]: - sk_inputs, columns_to_use, _ = self._get_columns_to_fit(inputs, self.hyperparams) - output = [] - if len(sk_inputs.columns): - try: - sk_output = self._clf.transform(sk_inputs) - except sklearn.exceptions.NotFittedError as error: - raise PrimitiveNotFittedError("Primitive not fitted.") from error - if sparse.issparse(sk_output): - sk_output = sk_output.toarray() - target_columns_metadata = self._copy_columns_metadata(inputs.metadata, self._training_indices, self.hyperparams) - output = self._wrap_predictions(inputs, sk_output, target_columns_metadata) - - output.columns = [inputs.columns[idx] for idx in range(len(inputs.columns)) if idx in self._training_indices] - output = [output] - else: - if self.hyperparams['error_on_no_input']: - raise RuntimeError("No input columns were selected") - self.logger.warn("No input columns were selected") - _, _, dropped_cols = self._get_columns_to_fit(inputs, self.hyperparams) - outputs = base_utils.combine_columns(return_result=self.hyperparams['return_result'], - add_index_columns=self.hyperparams['add_index_columns'], - inputs=inputs, column_indices=self._training_indices + dropped_cols, - columns_list=output) - return CallResult(outputs) - - - def get_params(self) -> Params: - if not self._fitted: - return Params( - statistics_=None, - indicator_=None, - input_column_names=self._input_column_names, - training_indices_=self._training_indices, - target_names_=self._target_names, - target_column_indices_=self._target_column_indices, - target_columns_metadata_=self._target_columns_metadata - ) - - return Params( - statistics_=getattr(self._clf, 'statistics_', None), - indicator_=getattr(self._clf, 'indicator_', None), - input_column_names=self._input_column_names, - training_indices_=self._training_indices, - target_names_=self._target_names, - target_column_indices_=self._target_column_indices, - target_columns_metadata_=self._target_columns_metadata - ) - - def set_params(self, *, params: Params) -> None: - self._clf.statistics_ = params['statistics_'] - self._clf.indicator_ = params['indicator_'] - self._input_column_names = params['input_column_names'] - self._training_indices = params['training_indices_'] - self._target_names = params['target_names_'] - self._target_column_indices = params['target_column_indices_'] - self._target_columns_metadata = params['target_columns_metadata_'] - - if params['statistics_'] is not None: - self._fitted = True - if params['indicator_'] is not None: - self._fitted = True - - - - - - @classmethod - def _get_columns_to_fit(cls, inputs: Inputs, hyperparams: Hyperparams): - - if not hyperparams['use_semantic_types']: - columns_to_produce = list(range(len(inputs.columns))) - - else: - inputs_metadata = inputs.metadata - - def can_produce_column(column_index: int) -> bool: - return cls._can_produce_column(inputs_metadata, column_index, hyperparams) - - columns_to_produce, columns_not_to_produce = base_utils.get_columns_to_use(inputs_metadata, - use_columns=hyperparams['use_columns'], - exclude_columns=hyperparams['exclude_columns'], - can_use_column=can_produce_column) - - columns_to_drop = cls._get_columns_to_drop(inputs, columns_to_produce, hyperparams) - for col in columns_to_drop: - columns_to_produce.remove(col) - - return inputs.iloc[:, columns_to_produce], columns_to_produce, columns_to_drop - - @classmethod - def _get_columns_to_drop(cls, inputs: Inputs, column_indices: List[int], hyperparams: Hyperparams): - """ - Check for columns that contain missing_values that need to be imputed - If strategy is constant and missin_values is nan, then all nan columns will not be dropped - :param inputs: - :param column_indices: - :return: - """ - columns_to_remove = [] - if hyperparams['strategy'] != "constant": - for _, col in enumerate(column_indices): - inp = inputs.iloc[:, [col]].values - mask = _get_mask(inp, hyperparams['missing_values']) - if mask.all(): - columns_to_remove.append(col) - return columns_to_remove - - @classmethod - def _can_produce_column(cls, inputs_metadata: metadata_base.DataMetadata, column_index: int, hyperparams: Hyperparams) -> bool: - column_metadata = inputs_metadata.query((metadata_base.ALL_ELEMENTS, column_index)) - - accepted_structural_types = (int, float, numpy.integer, numpy.float64) - accepted_semantic_types = set() - accepted_semantic_types.add("https://metadata.datadrivendiscovery.org/types/Attribute") - if not issubclass(column_metadata['structural_type'], accepted_structural_types): - return False - - semantic_types = set(column_metadata.get('semantic_types', [])) - - if len(semantic_types) == 0: - cls.logger.warning("No semantic types found in column metadata") - return False - # Making sure all accepted_semantic_types are available in semantic_types - if len(accepted_semantic_types - semantic_types) == 0: - return True - - return False - - - @classmethod - def _get_target_columns_metadata(cls, outputs_metadata: metadata_base.DataMetadata, hyperparams) -> List[OrderedDict]: - outputs_length = outputs_metadata.query((metadata_base.ALL_ELEMENTS,))['dimension']['length'] - - target_columns_metadata: List[OrderedDict] = [] - for column_index in range(outputs_length): - column_metadata = OrderedDict(outputs_metadata.query_column(column_index)) - - # Update semantic types and prepare it for predicted targets. - semantic_types = set(column_metadata.get('semantic_types', [])) - semantic_types_to_remove = set([]) - add_semantic_types = [] - add_semantic_types.add(hyperparams["return_semantic_type"]) - semantic_types = semantic_types - semantic_types_to_remove - semantic_types = semantic_types.union(add_semantic_types) - column_metadata['semantic_types'] = list(semantic_types) - - target_columns_metadata.append(column_metadata) - - return target_columns_metadata - - @classmethod - def _update_predictions_metadata(cls, inputs_metadata: metadata_base.DataMetadata, outputs: Optional[Outputs], - target_columns_metadata: List[OrderedDict]) -> metadata_base.DataMetadata: - outputs_metadata = metadata_base.DataMetadata().generate(value=outputs) - - for column_index, column_metadata in enumerate(target_columns_metadata): - column_metadata.pop("structural_type", None) - outputs_metadata = outputs_metadata.update_column(column_index, column_metadata) - - return outputs_metadata - - def _wrap_predictions(self, inputs: Inputs, predictions: ndarray, target_columns_metadata) -> Outputs: - outputs = d3m_dataframe(predictions, generate_metadata=False) - outputs.metadata = self._update_predictions_metadata(inputs.metadata, outputs, target_columns_metadata) - return outputs - - - - @classmethod - def _copy_columns_metadata(cls, inputs_metadata: metadata_base.DataMetadata, column_indices, hyperparams) -> List[OrderedDict]: - outputs_length = inputs_metadata.query((metadata_base.ALL_ELEMENTS,))['dimension']['length'] - - target_columns_metadata: List[OrderedDict] = [] - for column_index in column_indices: - column_name = inputs_metadata.query((metadata_base.ALL_ELEMENTS, column_index)).get("name") - column_metadata = OrderedDict(inputs_metadata.query_column(column_index)) - semantic_types = set(column_metadata.get('semantic_types', [])) - semantic_types_to_remove = set([]) - add_semantic_types = set() - add_semantic_types.add(hyperparams["return_semantic_type"]) - semantic_types = semantic_types - semantic_types_to_remove - semantic_types = semantic_types.union(add_semantic_types) - column_metadata['semantic_types'] = list(semantic_types) - - column_metadata["name"] = str(column_name) - target_columns_metadata.append(column_metadata) - - return target_columns_metadata - - -SKImputer.__doc__ = SimpleImputer.__doc__ \ No newline at end of file diff --git a/common-primitives/sklearn-wrap/sklearn_wrap/SKKNeighborsClassifier.py b/common-primitives/sklearn-wrap/sklearn_wrap/SKKNeighborsClassifier.py deleted file mode 100644 index 75d5f2f..0000000 --- a/common-primitives/sklearn-wrap/sklearn_wrap/SKKNeighborsClassifier.py +++ /dev/null @@ -1,497 +0,0 @@ -from typing import Any, Callable, List, Dict, Union, Optional, Sequence, Tuple -from numpy import ndarray -from collections import OrderedDict -from scipy import sparse -import os -import sklearn -import numpy -import typing - -# Custom import commands if any -from sklearn.neighbors.classification import KNeighborsClassifier - - -from d3m.container.numpy import ndarray as d3m_ndarray -from d3m.container import DataFrame as d3m_dataframe -from d3m.metadata import hyperparams, params, base as metadata_base -from d3m import utils -from d3m.base import utils as base_utils -from d3m.exceptions import PrimitiveNotFittedError -from d3m.primitive_interfaces.base import CallResult, DockerContainer - -from d3m.primitive_interfaces.supervised_learning import SupervisedLearnerPrimitiveBase -from d3m.primitive_interfaces.base import ProbabilisticCompositionalityMixin, ContinueFitMixin -from d3m import exceptions -import pandas - - - -Inputs = d3m_dataframe -Outputs = d3m_dataframe - - -class Params(params.Params): - _fit_method: Optional[str] - _fit_X: Optional[ndarray] - _tree: Optional[object] - classes_: Optional[ndarray] - _y: Optional[ndarray] - outputs_2d_: Optional[bool] - effective_metric_: Optional[str] - effective_metric_params_: Optional[Dict] - radius: Optional[float] - input_column_names: Optional[Any] - target_names_: Optional[Sequence[Any]] - training_indices_: Optional[Sequence[int]] - target_column_indices_: Optional[Sequence[int]] - target_columns_metadata_: Optional[List[OrderedDict]] - - - -class Hyperparams(hyperparams.Hyperparams): - n_neighbors = hyperparams.Bounded[int]( - default=5, - lower=0, - upper=None, - description='Number of neighbors to use by default for :meth:`k_neighbors` queries.', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] - ) - weights = hyperparams.Enumeration[str]( - values=['uniform', 'distance'], - default='uniform', - description='weight function used in prediction. Possible values: - \'uniform\' : uniform weights. All points in each neighborhood are weighted equally. - \'distance\' : weight points by the inverse of their distance. in this case, closer neighbors of a query point will have a greater influence than neighbors which are further away. - [callable] : a user-defined function which accepts an array of distances, and returns an array of the same shape containing the weights.', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] - ) - algorithm = hyperparams.Enumeration[str]( - values=['auto', 'ball_tree', 'kd_tree', 'brute'], - default='auto', - description='Algorithm used to compute the nearest neighbors: - \'ball_tree\' will use :class:`BallTree` - \'kd_tree\' will use :class:`KDTree` - \'brute\' will use a brute-force search. - \'auto\' will attempt to decide the most appropriate algorithm based on the values passed to :meth:`fit` method. Note: fitting on sparse input will override the setting of this parameter, using brute force.', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] - ) - leaf_size = hyperparams.Bounded[int]( - default=30, - lower=0, - upper=None, - description='Leaf size passed to BallTree or KDTree. This can affect the speed of the construction and query, as well as the memory required to store the tree. The optimal value depends on the nature of the problem.', - semantic_types=['https://metadata.datadrivendiscovery.org/types/ResourcesUseParameter', 'https://metadata.datadrivendiscovery.org/types/TuningParameter'] - ) - metric = hyperparams.Enumeration[str]( - values=['euclidean', 'manhattan', 'chebyshev', 'minkowski', 'wminkowski', 'seuclidean', 'mahalanobis'], - default='minkowski', - description='the distance metric to use for the tree. The default metric is minkowski, and with p=2 is equivalent to the standard Euclidean metric. See the documentation of the DistanceMetric class for a list of available metrics.', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] - ) - p = hyperparams.Enumeration[int]( - values=[1, 2], - default=2, - description='Power parameter for the Minkowski metric. When p = 1, this is equivalent to using manhattan_distance (l1), and euclidean_distance (l2) for p = 2. For arbitrary p, minkowski_distance (l_p) is used.', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] - ) - n_jobs = hyperparams.Union( - configuration=OrderedDict({ - 'limit': hyperparams.Bounded[int]( - default=1, - lower=1, - upper=None, - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'], - ), - 'all_cores': hyperparams.Constant( - default=-1, - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'], - ) - }), - default='limit', - description='The number of parallel jobs to run for neighbors search. If ``-1``, then the number of jobs is set to the number of CPU cores. Doesn\'t affect :meth:`fit` method.', - semantic_types=['https://metadata.datadrivendiscovery.org/types/ResourcesUseParameter'] - ) - - use_inputs_columns = hyperparams.Set( - elements=hyperparams.Hyperparameter[int](-1), - default=(), - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="A set of column indices to force primitive to use as training input. If any specified column cannot be parsed, it is skipped.", - ) - use_outputs_columns = hyperparams.Set( - elements=hyperparams.Hyperparameter[int](-1), - default=(), - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="A set of column indices to force primitive to use as training target. If any specified column cannot be parsed, it is skipped.", - ) - exclude_inputs_columns = hyperparams.Set( - elements=hyperparams.Hyperparameter[int](-1), - default=(), - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="A set of column indices to not use as training inputs. Applicable only if \"use_columns\" is not provided.", - ) - exclude_outputs_columns = hyperparams.Set( - elements=hyperparams.Hyperparameter[int](-1), - default=(), - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="A set of column indices to not use as training target. Applicable only if \"use_columns\" is not provided.", - ) - return_result = hyperparams.Enumeration( - values=['append', 'replace', 'new'], - default='new', - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="Should parsed columns be appended, should they replace original columns, or should only parsed columns be returned? This hyperparam is ignored if use_semantic_types is set to false.", - ) - use_semantic_types = hyperparams.UniformBool( - default=False, - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="Controls whether semantic_types metadata will be used for filtering columns in input dataframe. Setting this to false makes the code ignore return_result and will produce only the output dataframe" - ) - add_index_columns = hyperparams.UniformBool( - default=False, - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="Also include primary index columns if input data has them. Applicable only if \"return_result\" is set to \"new\".", - ) - error_on_no_input = hyperparams.UniformBool( - default=True, - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="Throw an exception if no input column is selected/provided. Defaults to true to behave like sklearn. To prevent pipelines from breaking set this to False.", - ) - - return_semantic_type = hyperparams.Enumeration[str]( - values=['https://metadata.datadrivendiscovery.org/types/Attribute', 'https://metadata.datadrivendiscovery.org/types/ConstructedAttribute', 'https://metadata.datadrivendiscovery.org/types/PredictedTarget'], - default='https://metadata.datadrivendiscovery.org/types/PredictedTarget', - description='Decides what semantic type to attach to generated output', - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'] - ) - -class SKKNeighborsClassifier(SupervisedLearnerPrimitiveBase[Inputs, Outputs, Params, Hyperparams], - ProbabilisticCompositionalityMixin[Inputs, Outputs, Params, Hyperparams]): - """ - Primitive wrapping for sklearn KNeighborsClassifier - `sklearn documentation `_ - - """ - - __author__ = "JPL MARVIN" - metadata = metadata_base.PrimitiveMetadata({ - "algorithm_types": [metadata_base.PrimitiveAlgorithmType.K_NEAREST_NEIGHBORS, ], - "name": "sklearn.neighbors.classification.KNeighborsClassifier", - "primitive_family": metadata_base.PrimitiveFamily.CLASSIFICATION, - "python_path": "d3m.primitives.classification.k_neighbors.SKlearn", - "source": {'name': 'JPL', 'contact': 'mailto:shah@jpl.nasa.gov', 'uris': ['https://gitlab.com/datadrivendiscovery/sklearn-wrap/issues', 'https://scikit-learn.org/stable/modules/generated/sklearn.neighbors.KNeighborsClassifier.html']}, - "version": "2019.11.13", - "id": "754f7210-a0b7-3b7a-8c98-f43c7b663d28", - "hyperparams_to_tune": ['n_neighbors', 'p'], - 'installation': [ - {'type': metadata_base.PrimitiveInstallationType.PIP, - 'package_uri': 'git+https://gitlab.com/datadrivendiscovery/sklearn-wrap.git@{git_commit}#egg=sklearn_wrap'.format( - git_commit=utils.current_git_commit(os.path.dirname(__file__)), - ), - }] - }) - - def __init__(self, *, - hyperparams: Hyperparams, - random_seed: int = 0, - docker_containers: Dict[str, DockerContainer] = None) -> None: - - super().__init__(hyperparams=hyperparams, random_seed=random_seed, docker_containers=docker_containers) - - # False - self._clf = KNeighborsClassifier( - n_neighbors=self.hyperparams['n_neighbors'], - weights=self.hyperparams['weights'], - algorithm=self.hyperparams['algorithm'], - leaf_size=self.hyperparams['leaf_size'], - metric=self.hyperparams['metric'], - p=self.hyperparams['p'], - n_jobs=self.hyperparams['n_jobs'], - ) - - self._inputs = None - self._outputs = None - self._training_inputs = None - self._training_outputs = None - self._target_names = None - self._training_indices = None - self._target_column_indices = None - self._target_columns_metadata: List[OrderedDict] = None - self._input_column_names = None - self._fitted = False - self._new_training_data = False - - def set_training_data(self, *, inputs: Inputs, outputs: Outputs) -> None: - self._inputs = inputs - self._outputs = outputs - self._fitted = False - self._new_training_data = True - - def fit(self, *, timeout: float = None, iterations: int = None) -> CallResult[None]: - if self._inputs is None or self._outputs is None: - raise ValueError("Missing training data.") - - if not self._new_training_data: - return CallResult(None) - self._new_training_data = False - - self._training_inputs, self._training_indices = self._get_columns_to_fit(self._inputs, self.hyperparams) - self._training_outputs, self._target_names, self._target_column_indices = self._get_targets(self._outputs, self.hyperparams) - self._input_column_names = self._training_inputs.columns - - if len(self._training_indices) > 0 and len(self._target_column_indices) > 0: - self._target_columns_metadata = self._get_target_columns_metadata(self._training_outputs.metadata, self.hyperparams) - sk_training_output = self._training_outputs.values - - shape = sk_training_output.shape - if len(shape) == 2 and shape[1] == 1: - sk_training_output = numpy.ravel(sk_training_output) - - self._clf.fit(self._training_inputs, sk_training_output) - self._fitted = True - else: - if self.hyperparams['error_on_no_input']: - raise RuntimeError("No input columns were selected") - self.logger.warn("No input columns were selected") - - return CallResult(None) - - - - def produce(self, *, inputs: Inputs, timeout: float = None, iterations: int = None) -> CallResult[Outputs]: - sk_inputs, columns_to_use = self._get_columns_to_fit(inputs, self.hyperparams) - output = [] - if len(sk_inputs.columns): - try: - sk_output = self._clf.predict(sk_inputs) - except sklearn.exceptions.NotFittedError as error: - raise PrimitiveNotFittedError("Primitive not fitted.") from error - # For primitives that allow predicting without fitting like GaussianProcessRegressor - if not self._fitted: - raise PrimitiveNotFittedError("Primitive not fitted.") - if sparse.issparse(sk_output): - sk_output = sk_output.toarray() - output = self._wrap_predictions(inputs, sk_output) - output.columns = self._target_names - output = [output] - else: - if self.hyperparams['error_on_no_input']: - raise RuntimeError("No input columns were selected") - self.logger.warn("No input columns were selected") - outputs = base_utils.combine_columns(return_result=self.hyperparams['return_result'], - add_index_columns=self.hyperparams['add_index_columns'], - inputs=inputs, column_indices=self._target_column_indices, - columns_list=output) - - return CallResult(outputs) - - - def get_params(self) -> Params: - if not self._fitted: - return Params( - _fit_method=None, - _fit_X=None, - _tree=None, - classes_=None, - _y=None, - outputs_2d_=None, - effective_metric_=None, - effective_metric_params_=None, - radius=None, - input_column_names=self._input_column_names, - training_indices_=self._training_indices, - target_names_=self._target_names, - target_column_indices_=self._target_column_indices, - target_columns_metadata_=self._target_columns_metadata - ) - - return Params( - _fit_method=getattr(self._clf, '_fit_method', None), - _fit_X=getattr(self._clf, '_fit_X', None), - _tree=getattr(self._clf, '_tree', None), - classes_=getattr(self._clf, 'classes_', None), - _y=getattr(self._clf, '_y', None), - outputs_2d_=getattr(self._clf, 'outputs_2d_', None), - effective_metric_=getattr(self._clf, 'effective_metric_', None), - effective_metric_params_=getattr(self._clf, 'effective_metric_params_', None), - radius=getattr(self._clf, 'radius', None), - input_column_names=self._input_column_names, - training_indices_=self._training_indices, - target_names_=self._target_names, - target_column_indices_=self._target_column_indices, - target_columns_metadata_=self._target_columns_metadata - ) - - def set_params(self, *, params: Params) -> None: - self._clf._fit_method = params['_fit_method'] - self._clf._fit_X = params['_fit_X'] - self._clf._tree = params['_tree'] - self._clf.classes_ = params['classes_'] - self._clf._y = params['_y'] - self._clf.outputs_2d_ = params['outputs_2d_'] - self._clf.effective_metric_ = params['effective_metric_'] - self._clf.effective_metric_params_ = params['effective_metric_params_'] - self._clf.radius = params['radius'] - self._input_column_names = params['input_column_names'] - self._training_indices = params['training_indices_'] - self._target_names = params['target_names_'] - self._target_column_indices = params['target_column_indices_'] - self._target_columns_metadata = params['target_columns_metadata_'] - - if params['_fit_method'] is not None: - self._fitted = True - if params['_fit_X'] is not None: - self._fitted = True - if params['_tree'] is not None: - self._fitted = True - if params['classes_'] is not None: - self._fitted = True - if params['_y'] is not None: - self._fitted = True - if params['outputs_2d_'] is not None: - self._fitted = True - if params['effective_metric_'] is not None: - self._fitted = True - if params['effective_metric_params_'] is not None: - self._fitted = True - if params['radius'] is not None: - self._fitted = True - - - - def log_likelihoods(self, *, - outputs: Outputs, - inputs: Inputs, - timeout: float = None, - iterations: int = None) -> CallResult[Sequence[float]]: - inputs = inputs.values # Get ndarray - outputs = outputs.values - return CallResult(numpy.log(self._clf.predict_proba(inputs)[:, outputs])) - - - - - @classmethod - def _get_columns_to_fit(cls, inputs: Inputs, hyperparams: Hyperparams): - if not hyperparams['use_semantic_types']: - return inputs, list(range(len(inputs.columns))) - - inputs_metadata = inputs.metadata - - def can_produce_column(column_index: int) -> bool: - return cls._can_produce_column(inputs_metadata, column_index, hyperparams) - - columns_to_produce, columns_not_to_produce = base_utils.get_columns_to_use(inputs_metadata, - use_columns=hyperparams['use_inputs_columns'], - exclude_columns=hyperparams['exclude_inputs_columns'], - can_use_column=can_produce_column) - return inputs.iloc[:, columns_to_produce], columns_to_produce - # return columns_to_produce - - @classmethod - def _can_produce_column(cls, inputs_metadata: metadata_base.DataMetadata, column_index: int, hyperparams: Hyperparams) -> bool: - column_metadata = inputs_metadata.query((metadata_base.ALL_ELEMENTS, column_index)) - - accepted_structural_types = (int, float, numpy.integer, numpy.float64) - accepted_semantic_types = set() - accepted_semantic_types.add("https://metadata.datadrivendiscovery.org/types/Attribute") - if not issubclass(column_metadata['structural_type'], accepted_structural_types): - return False - - semantic_types = set(column_metadata.get('semantic_types', [])) - - if len(semantic_types) == 0: - cls.logger.warning("No semantic types found in column metadata") - return False - # Making sure all accepted_semantic_types are available in semantic_types - if len(accepted_semantic_types - semantic_types) == 0: - return True - - return False - - @classmethod - def _get_targets(cls, data: d3m_dataframe, hyperparams: Hyperparams): - if not hyperparams['use_semantic_types']: - return data, list(data.columns), list(range(len(data.columns))) - - metadata = data.metadata - - def can_produce_column(column_index: int) -> bool: - accepted_semantic_types = set() - accepted_semantic_types.add("https://metadata.datadrivendiscovery.org/types/TrueTarget") - column_metadata = metadata.query((metadata_base.ALL_ELEMENTS, column_index)) - semantic_types = set(column_metadata.get('semantic_types', [])) - if len(semantic_types) == 0: - cls.logger.warning("No semantic types found in column metadata") - return False - # Making sure all accepted_semantic_types are available in semantic_types - if len(accepted_semantic_types - semantic_types) == 0: - return True - return False - - target_column_indices, target_columns_not_to_produce = base_utils.get_columns_to_use(metadata, - use_columns=hyperparams[ - 'use_outputs_columns'], - exclude_columns= - hyperparams[ - 'exclude_outputs_columns'], - can_use_column=can_produce_column) - targets = [] - if target_column_indices: - targets = data.select_columns(target_column_indices) - target_column_names = [] - for idx in target_column_indices: - target_column_names.append(data.columns[idx]) - return targets, target_column_names, target_column_indices - - @classmethod - def _get_target_columns_metadata(cls, outputs_metadata: metadata_base.DataMetadata, hyperparams) -> List[OrderedDict]: - outputs_length = outputs_metadata.query((metadata_base.ALL_ELEMENTS,))['dimension']['length'] - - target_columns_metadata: List[OrderedDict] = [] - for column_index in range(outputs_length): - column_metadata = OrderedDict(outputs_metadata.query_column(column_index)) - - # Update semantic types and prepare it for predicted targets. - semantic_types = set(column_metadata.get('semantic_types', [])) - semantic_types_to_remove = set(["https://metadata.datadrivendiscovery.org/types/TrueTarget","https://metadata.datadrivendiscovery.org/types/SuggestedTarget",]) - add_semantic_types = set(["https://metadata.datadrivendiscovery.org/types/PredictedTarget",]) - add_semantic_types.add(hyperparams["return_semantic_type"]) - semantic_types = semantic_types - semantic_types_to_remove - semantic_types = semantic_types.union(add_semantic_types) - column_metadata['semantic_types'] = list(semantic_types) - - target_columns_metadata.append(column_metadata) - - return target_columns_metadata - - @classmethod - def _update_predictions_metadata(cls, inputs_metadata: metadata_base.DataMetadata, outputs: Optional[Outputs], - target_columns_metadata: List[OrderedDict]) -> metadata_base.DataMetadata: - outputs_metadata = metadata_base.DataMetadata().generate(value=outputs) - - for column_index, column_metadata in enumerate(target_columns_metadata): - column_metadata.pop("structural_type", None) - outputs_metadata = outputs_metadata.update_column(column_index, column_metadata) - - return outputs_metadata - - def _wrap_predictions(self, inputs: Inputs, predictions: ndarray) -> Outputs: - outputs = d3m_dataframe(predictions, generate_metadata=False) - outputs.metadata = self._update_predictions_metadata(inputs.metadata, outputs, self._target_columns_metadata) - return outputs - - - @classmethod - def _add_target_columns_metadata(cls, outputs_metadata: metadata_base.DataMetadata): - outputs_length = outputs_metadata.query((metadata_base.ALL_ELEMENTS,))['dimension']['length'] - - target_columns_metadata: List[OrderedDict] = [] - for column_index in range(outputs_length): - column_metadata = OrderedDict() - semantic_types = [] - semantic_types.append('https://metadata.datadrivendiscovery.org/types/PredictedTarget') - column_name = outputs_metadata.query((metadata_base.ALL_ELEMENTS, column_index)).get("name") - if column_name is None: - column_name = "output_{}".format(column_index) - column_metadata["semantic_types"] = semantic_types - column_metadata["name"] = str(column_name) - target_columns_metadata.append(column_metadata) - - return target_columns_metadata - - -SKKNeighborsClassifier.__doc__ = KNeighborsClassifier.__doc__ \ No newline at end of file diff --git a/common-primitives/sklearn-wrap/sklearn_wrap/SKKNeighborsRegressor.py b/common-primitives/sklearn-wrap/sklearn_wrap/SKKNeighborsRegressor.py deleted file mode 100644 index 38b4469..0000000 --- a/common-primitives/sklearn-wrap/sklearn_wrap/SKKNeighborsRegressor.py +++ /dev/null @@ -1,475 +0,0 @@ -from typing import Any, Callable, List, Dict, Union, Optional, Sequence, Tuple -from numpy import ndarray -from collections import OrderedDict -from scipy import sparse -import os -import sklearn -import numpy -import typing - -# Custom import commands if any -from sklearn.neighbors.regression import KNeighborsRegressor - - -from d3m.container.numpy import ndarray as d3m_ndarray -from d3m.container import DataFrame as d3m_dataframe -from d3m.metadata import hyperparams, params, base as metadata_base -from d3m import utils -from d3m.base import utils as base_utils -from d3m.exceptions import PrimitiveNotFittedError -from d3m.primitive_interfaces.base import CallResult, DockerContainer - -from d3m.primitive_interfaces.supervised_learning import SupervisedLearnerPrimitiveBase -from d3m.primitive_interfaces.base import ProbabilisticCompositionalityMixin, ContinueFitMixin -from d3m import exceptions -import pandas - - - -Inputs = d3m_dataframe -Outputs = d3m_dataframe - - -class Params(params.Params): - _fit_method: Optional[str] - _fit_X: Optional[ndarray] - _tree: Optional[object] - _y: Optional[ndarray] - effective_metric_: Optional[str] - effective_metric_params_: Optional[Dict] - radius: Optional[float] - input_column_names: Optional[Any] - target_names_: Optional[Sequence[Any]] - training_indices_: Optional[Sequence[int]] - target_column_indices_: Optional[Sequence[int]] - target_columns_metadata_: Optional[List[OrderedDict]] - - - -class Hyperparams(hyperparams.Hyperparams): - n_neighbors = hyperparams.Bounded[int]( - default=5, - lower=0, - upper=None, - description='Number of neighbors to use by default for :meth:`k_neighbors` queries.', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] - ) - weights = hyperparams.Enumeration[str]( - values=['uniform', 'distance'], - default='uniform', - description='weight function used in prediction. Possible values: - \'uniform\' : uniform weights. All points in each neighborhood are weighted equally. - \'distance\' : weight points by the inverse of their distance. in this case, closer neighbors of a query point will have a greater influence than neighbors which are further away. - [callable] : a user-defined function which accepts an array of distances, and returns an array of the same shape containing the weights. Uniform weights are used by default.', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] - ) - algorithm = hyperparams.Enumeration[str]( - values=['auto', 'ball_tree', 'kd_tree', 'brute'], - default='auto', - description='Algorithm used to compute the nearest neighbors: - \'ball_tree\' will use :class:`BallTree` - \'kd_tree\' will use :class:`KDtree` - \'brute\' will use a brute-force search. - \'auto\' will attempt to decide the most appropriate algorithm based on the values passed to :meth:`fit` method. Note: fitting on sparse input will override the setting of this parameter, using brute force.', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] - ) - leaf_size = hyperparams.Bounded[int]( - default=30, - lower=0, - upper=None, - description='Leaf size passed to BallTree or KDTree. This can affect the speed of the construction and query, as well as the memory required to store the tree. The optimal value depends on the nature of the problem.', - semantic_types=['https://metadata.datadrivendiscovery.org/types/ResourcesUseParameter', 'https://metadata.datadrivendiscovery.org/types/TuningParameter'] - ) - metric = hyperparams.Constant( - default='minkowski', - description='the distance metric to use for the tree. The default metric is minkowski, and with p=2 is equivalent to the standard Euclidean metric. See the documentation of the DistanceMetric class for a list of available metrics.', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] - ) - p = hyperparams.Enumeration[int]( - values=[1, 2], - default=2, - description='Power parameter for the Minkowski metric. When p = 1, this is equivalent to using manhattan_distance (l1), and euclidean_distance (l2) for p = 2. For arbitrary p, minkowski_distance (l_p) is used.', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] - ) - n_jobs = hyperparams.Union( - configuration=OrderedDict({ - 'limit': hyperparams.Bounded[int]( - default=1, - lower=1, - upper=None, - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'], - ), - 'all_cores': hyperparams.Constant( - default=-1, - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'], - ) - }), - default='limit', - description='The number of parallel jobs to run for neighbors search. If ``-1``, then the number of jobs is set to the number of CPU cores. Doesn\'t affect :meth:`fit` method.', - semantic_types=['https://metadata.datadrivendiscovery.org/types/ResourcesUseParameter'] - ) - - use_inputs_columns = hyperparams.Set( - elements=hyperparams.Hyperparameter[int](-1), - default=(), - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="A set of column indices to force primitive to use as training input. If any specified column cannot be parsed, it is skipped.", - ) - use_outputs_columns = hyperparams.Set( - elements=hyperparams.Hyperparameter[int](-1), - default=(), - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="A set of column indices to force primitive to use as training target. If any specified column cannot be parsed, it is skipped.", - ) - exclude_inputs_columns = hyperparams.Set( - elements=hyperparams.Hyperparameter[int](-1), - default=(), - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="A set of column indices to not use as training inputs. Applicable only if \"use_columns\" is not provided.", - ) - exclude_outputs_columns = hyperparams.Set( - elements=hyperparams.Hyperparameter[int](-1), - default=(), - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="A set of column indices to not use as training target. Applicable only if \"use_columns\" is not provided.", - ) - return_result = hyperparams.Enumeration( - values=['append', 'replace', 'new'], - default='new', - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="Should parsed columns be appended, should they replace original columns, or should only parsed columns be returned? This hyperparam is ignored if use_semantic_types is set to false.", - ) - use_semantic_types = hyperparams.UniformBool( - default=False, - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="Controls whether semantic_types metadata will be used for filtering columns in input dataframe. Setting this to false makes the code ignore return_result and will produce only the output dataframe" - ) - add_index_columns = hyperparams.UniformBool( - default=False, - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="Also include primary index columns if input data has them. Applicable only if \"return_result\" is set to \"new\".", - ) - error_on_no_input = hyperparams.UniformBool( - default=True, - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="Throw an exception if no input column is selected/provided. Defaults to true to behave like sklearn. To prevent pipelines from breaking set this to False.", - ) - - return_semantic_type = hyperparams.Enumeration[str]( - values=['https://metadata.datadrivendiscovery.org/types/Attribute', 'https://metadata.datadrivendiscovery.org/types/ConstructedAttribute', 'https://metadata.datadrivendiscovery.org/types/PredictedTarget'], - default='https://metadata.datadrivendiscovery.org/types/PredictedTarget', - description='Decides what semantic type to attach to generated output', - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'] - ) - -class SKKNeighborsRegressor(SupervisedLearnerPrimitiveBase[Inputs, Outputs, Params, Hyperparams]): - """ - Primitive wrapping for sklearn KNeighborsRegressor - `sklearn documentation `_ - - """ - - __author__ = "JPL MARVIN" - metadata = metadata_base.PrimitiveMetadata({ - "algorithm_types": [metadata_base.PrimitiveAlgorithmType.K_NEAREST_NEIGHBORS, ], - "name": "sklearn.neighbors.regression.KNeighborsRegressor", - "primitive_family": metadata_base.PrimitiveFamily.REGRESSION, - "python_path": "d3m.primitives.regression.k_neighbors.SKlearn", - "source": {'name': 'JPL', 'contact': 'mailto:shah@jpl.nasa.gov', 'uris': ['https://gitlab.com/datadrivendiscovery/sklearn-wrap/issues', 'https://scikit-learn.org/stable/modules/generated/sklearn.neighbors.KNeighborsRegressor.html']}, - "version": "2019.11.13", - "id": "50b499a5-cef8-3028-8a99-ae553819f855", - "hyperparams_to_tune": ['n_neighbors', 'p'], - 'installation': [ - {'type': metadata_base.PrimitiveInstallationType.PIP, - 'package_uri': 'git+https://gitlab.com/datadrivendiscovery/sklearn-wrap.git@{git_commit}#egg=sklearn_wrap'.format( - git_commit=utils.current_git_commit(os.path.dirname(__file__)), - ), - }] - }) - - def __init__(self, *, - hyperparams: Hyperparams, - random_seed: int = 0, - docker_containers: Dict[str, DockerContainer] = None) -> None: - - super().__init__(hyperparams=hyperparams, random_seed=random_seed, docker_containers=docker_containers) - - # False - self._clf = KNeighborsRegressor( - n_neighbors=self.hyperparams['n_neighbors'], - weights=self.hyperparams['weights'], - algorithm=self.hyperparams['algorithm'], - leaf_size=self.hyperparams['leaf_size'], - metric=self.hyperparams['metric'], - p=self.hyperparams['p'], - n_jobs=self.hyperparams['n_jobs'], - ) - - self._inputs = None - self._outputs = None - self._training_inputs = None - self._training_outputs = None - self._target_names = None - self._training_indices = None - self._target_column_indices = None - self._target_columns_metadata: List[OrderedDict] = None - self._input_column_names = None - self._fitted = False - self._new_training_data = False - - def set_training_data(self, *, inputs: Inputs, outputs: Outputs) -> None: - self._inputs = inputs - self._outputs = outputs - self._fitted = False - self._new_training_data = True - - def fit(self, *, timeout: float = None, iterations: int = None) -> CallResult[None]: - if self._inputs is None or self._outputs is None: - raise ValueError("Missing training data.") - - if not self._new_training_data: - return CallResult(None) - self._new_training_data = False - - self._training_inputs, self._training_indices = self._get_columns_to_fit(self._inputs, self.hyperparams) - self._training_outputs, self._target_names, self._target_column_indices = self._get_targets(self._outputs, self.hyperparams) - self._input_column_names = self._training_inputs.columns - - if len(self._training_indices) > 0 and len(self._target_column_indices) > 0: - self._target_columns_metadata = self._get_target_columns_metadata(self._training_outputs.metadata, self.hyperparams) - sk_training_output = self._training_outputs.values - - shape = sk_training_output.shape - if len(shape) == 2 and shape[1] == 1: - sk_training_output = numpy.ravel(sk_training_output) - - self._clf.fit(self._training_inputs, sk_training_output) - self._fitted = True - else: - if self.hyperparams['error_on_no_input']: - raise RuntimeError("No input columns were selected") - self.logger.warn("No input columns were selected") - - return CallResult(None) - - - - def produce(self, *, inputs: Inputs, timeout: float = None, iterations: int = None) -> CallResult[Outputs]: - sk_inputs, columns_to_use = self._get_columns_to_fit(inputs, self.hyperparams) - output = [] - if len(sk_inputs.columns): - try: - sk_output = self._clf.predict(sk_inputs) - except sklearn.exceptions.NotFittedError as error: - raise PrimitiveNotFittedError("Primitive not fitted.") from error - # For primitives that allow predicting without fitting like GaussianProcessRegressor - if not self._fitted: - raise PrimitiveNotFittedError("Primitive not fitted.") - if sparse.issparse(sk_output): - sk_output = sk_output.toarray() - output = self._wrap_predictions(inputs, sk_output) - output.columns = self._target_names - output = [output] - else: - if self.hyperparams['error_on_no_input']: - raise RuntimeError("No input columns were selected") - self.logger.warn("No input columns were selected") - outputs = base_utils.combine_columns(return_result=self.hyperparams['return_result'], - add_index_columns=self.hyperparams['add_index_columns'], - inputs=inputs, column_indices=self._target_column_indices, - columns_list=output) - - return CallResult(outputs) - - - def get_params(self) -> Params: - if not self._fitted: - return Params( - _fit_method=None, - _fit_X=None, - _tree=None, - _y=None, - effective_metric_=None, - effective_metric_params_=None, - radius=None, - input_column_names=self._input_column_names, - training_indices_=self._training_indices, - target_names_=self._target_names, - target_column_indices_=self._target_column_indices, - target_columns_metadata_=self._target_columns_metadata - ) - - return Params( - _fit_method=getattr(self._clf, '_fit_method', None), - _fit_X=getattr(self._clf, '_fit_X', None), - _tree=getattr(self._clf, '_tree', None), - _y=getattr(self._clf, '_y', None), - effective_metric_=getattr(self._clf, 'effective_metric_', None), - effective_metric_params_=getattr(self._clf, 'effective_metric_params_', None), - radius=getattr(self._clf, 'radius', None), - input_column_names=self._input_column_names, - training_indices_=self._training_indices, - target_names_=self._target_names, - target_column_indices_=self._target_column_indices, - target_columns_metadata_=self._target_columns_metadata - ) - - def set_params(self, *, params: Params) -> None: - self._clf._fit_method = params['_fit_method'] - self._clf._fit_X = params['_fit_X'] - self._clf._tree = params['_tree'] - self._clf._y = params['_y'] - self._clf.effective_metric_ = params['effective_metric_'] - self._clf.effective_metric_params_ = params['effective_metric_params_'] - self._clf.radius = params['radius'] - self._input_column_names = params['input_column_names'] - self._training_indices = params['training_indices_'] - self._target_names = params['target_names_'] - self._target_column_indices = params['target_column_indices_'] - self._target_columns_metadata = params['target_columns_metadata_'] - - if params['_fit_method'] is not None: - self._fitted = True - if params['_fit_X'] is not None: - self._fitted = True - if params['_tree'] is not None: - self._fitted = True - if params['_y'] is not None: - self._fitted = True - if params['effective_metric_'] is not None: - self._fitted = True - if params['effective_metric_params_'] is not None: - self._fitted = True - if params['radius'] is not None: - self._fitted = True - - - - - - - - @classmethod - def _get_columns_to_fit(cls, inputs: Inputs, hyperparams: Hyperparams): - if not hyperparams['use_semantic_types']: - return inputs, list(range(len(inputs.columns))) - - inputs_metadata = inputs.metadata - - def can_produce_column(column_index: int) -> bool: - return cls._can_produce_column(inputs_metadata, column_index, hyperparams) - - columns_to_produce, columns_not_to_produce = base_utils.get_columns_to_use(inputs_metadata, - use_columns=hyperparams['use_inputs_columns'], - exclude_columns=hyperparams['exclude_inputs_columns'], - can_use_column=can_produce_column) - return inputs.iloc[:, columns_to_produce], columns_to_produce - # return columns_to_produce - - @classmethod - def _can_produce_column(cls, inputs_metadata: metadata_base.DataMetadata, column_index: int, hyperparams: Hyperparams) -> bool: - column_metadata = inputs_metadata.query((metadata_base.ALL_ELEMENTS, column_index)) - - accepted_structural_types = (int, float, numpy.integer, numpy.float64) - accepted_semantic_types = set() - accepted_semantic_types.add("https://metadata.datadrivendiscovery.org/types/Attribute") - if not issubclass(column_metadata['structural_type'], accepted_structural_types): - return False - - semantic_types = set(column_metadata.get('semantic_types', [])) - - if len(semantic_types) == 0: - cls.logger.warning("No semantic types found in column metadata") - return False - # Making sure all accepted_semantic_types are available in semantic_types - if len(accepted_semantic_types - semantic_types) == 0: - return True - - return False - - @classmethod - def _get_targets(cls, data: d3m_dataframe, hyperparams: Hyperparams): - if not hyperparams['use_semantic_types']: - return data, list(data.columns), list(range(len(data.columns))) - - metadata = data.metadata - - def can_produce_column(column_index: int) -> bool: - accepted_semantic_types = set() - accepted_semantic_types.add("https://metadata.datadrivendiscovery.org/types/TrueTarget") - column_metadata = metadata.query((metadata_base.ALL_ELEMENTS, column_index)) - semantic_types = set(column_metadata.get('semantic_types', [])) - if len(semantic_types) == 0: - cls.logger.warning("No semantic types found in column metadata") - return False - # Making sure all accepted_semantic_types are available in semantic_types - if len(accepted_semantic_types - semantic_types) == 0: - return True - return False - - target_column_indices, target_columns_not_to_produce = base_utils.get_columns_to_use(metadata, - use_columns=hyperparams[ - 'use_outputs_columns'], - exclude_columns= - hyperparams[ - 'exclude_outputs_columns'], - can_use_column=can_produce_column) - targets = [] - if target_column_indices: - targets = data.select_columns(target_column_indices) - target_column_names = [] - for idx in target_column_indices: - target_column_names.append(data.columns[idx]) - return targets, target_column_names, target_column_indices - - @classmethod - def _get_target_columns_metadata(cls, outputs_metadata: metadata_base.DataMetadata, hyperparams) -> List[OrderedDict]: - outputs_length = outputs_metadata.query((metadata_base.ALL_ELEMENTS,))['dimension']['length'] - - target_columns_metadata: List[OrderedDict] = [] - for column_index in range(outputs_length): - column_metadata = OrderedDict(outputs_metadata.query_column(column_index)) - - # Update semantic types and prepare it for predicted targets. - semantic_types = set(column_metadata.get('semantic_types', [])) - semantic_types_to_remove = set(["https://metadata.datadrivendiscovery.org/types/TrueTarget","https://metadata.datadrivendiscovery.org/types/SuggestedTarget",]) - add_semantic_types = set(["https://metadata.datadrivendiscovery.org/types/PredictedTarget",]) - add_semantic_types.add(hyperparams["return_semantic_type"]) - semantic_types = semantic_types - semantic_types_to_remove - semantic_types = semantic_types.union(add_semantic_types) - column_metadata['semantic_types'] = list(semantic_types) - - target_columns_metadata.append(column_metadata) - - return target_columns_metadata - - @classmethod - def _update_predictions_metadata(cls, inputs_metadata: metadata_base.DataMetadata, outputs: Optional[Outputs], - target_columns_metadata: List[OrderedDict]) -> metadata_base.DataMetadata: - outputs_metadata = metadata_base.DataMetadata().generate(value=outputs) - - for column_index, column_metadata in enumerate(target_columns_metadata): - column_metadata.pop("structural_type", None) - outputs_metadata = outputs_metadata.update_column(column_index, column_metadata) - - return outputs_metadata - - def _wrap_predictions(self, inputs: Inputs, predictions: ndarray) -> Outputs: - outputs = d3m_dataframe(predictions, generate_metadata=False) - outputs.metadata = self._update_predictions_metadata(inputs.metadata, outputs, self._target_columns_metadata) - return outputs - - - @classmethod - def _add_target_columns_metadata(cls, outputs_metadata: metadata_base.DataMetadata): - outputs_length = outputs_metadata.query((metadata_base.ALL_ELEMENTS,))['dimension']['length'] - - target_columns_metadata: List[OrderedDict] = [] - for column_index in range(outputs_length): - column_metadata = OrderedDict() - semantic_types = [] - semantic_types.append('https://metadata.datadrivendiscovery.org/types/PredictedTarget') - column_name = outputs_metadata.query((metadata_base.ALL_ELEMENTS, column_index)).get("name") - if column_name is None: - column_name = "output_{}".format(column_index) - column_metadata["semantic_types"] = semantic_types - column_metadata["name"] = str(column_name) - target_columns_metadata.append(column_metadata) - - return target_columns_metadata - - -SKKNeighborsRegressor.__doc__ = KNeighborsRegressor.__doc__ \ No newline at end of file diff --git a/common-primitives/sklearn-wrap/sklearn_wrap/SKKernelPCA.py b/common-primitives/sklearn-wrap/sklearn_wrap/SKKernelPCA.py deleted file mode 100644 index 0c7fb57..0000000 --- a/common-primitives/sklearn-wrap/sklearn_wrap/SKKernelPCA.py +++ /dev/null @@ -1,536 +0,0 @@ -from typing import Any, Callable, List, Dict, Union, Optional, Sequence, Tuple -from numpy import ndarray -from collections import OrderedDict -from scipy import sparse -import os -import sklearn -import numpy -import typing - -# Custom import commands if any -from sklearn.decomposition.kernel_pca import KernelPCA - - -from d3m.container.numpy import ndarray as d3m_ndarray -from d3m.container import DataFrame as d3m_dataframe -from d3m.metadata import hyperparams, params, base as metadata_base -from d3m import utils -from d3m.base import utils as base_utils -from d3m.exceptions import PrimitiveNotFittedError -from d3m.primitive_interfaces.base import CallResult, DockerContainer -from d3m.primitive_interfaces.unsupervised_learning import UnsupervisedLearnerPrimitiveBase - - -Inputs = d3m_dataframe -Outputs = d3m_dataframe - - -class Params(params.Params): - lambdas_: Optional[ndarray] - alphas_: Optional[ndarray] - dual_coef_: Optional[ndarray] - X_fit_: Optional[ndarray] - _centerer: Optional[sklearn.base.BaseEstimator] - X_transformed_fit_: Optional[ndarray] - input_column_names: Optional[Any] - target_names_: Optional[Sequence[Any]] - training_indices_: Optional[Sequence[int]] - target_column_indices_: Optional[Sequence[int]] - target_columns_metadata_: Optional[List[OrderedDict]] - - - -class Hyperparams(hyperparams.Hyperparams): - n_components = hyperparams.Union( - configuration=OrderedDict({ - 'int': hyperparams.Bounded[int]( - lower=0, - upper=None, - default=100, - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'], - ), - 'none': hyperparams.Constant( - default=None, - description='All non-zero components are kept.', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'], - ) - }), - default='none', - description='Number of components. If None, all non-zero components are kept.', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] - ) - kernel = hyperparams.Choice( - choices={ - 'linear': hyperparams.Hyperparams.define( - configuration=OrderedDict({}) - ), - 'poly': hyperparams.Hyperparams.define( - configuration=OrderedDict({ - 'degree': hyperparams.Bounded[int]( - default=3, - lower=0, - upper=None, - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] - ), - 'gamma': hyperparams.Union( - configuration=OrderedDict({ - 'float': hyperparams.Constant( - default=1.0, - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'], - ), - 'none': hyperparams.Constant( - default=None, - description='Equals 1/n_features.', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'], - ) - }), - default='none', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] - ), - 'coef0': hyperparams.Constant( - default=1, - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] - ) - }) - ), - 'rbf': hyperparams.Hyperparams.define( - configuration=OrderedDict({ - 'gamma': hyperparams.Union( - configuration=OrderedDict({ - 'float': hyperparams.Constant( - default=1.0, - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'], - ), - 'none': hyperparams.Constant( - default=None, - description='Equals 1/n_features.', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'], - ) - }), - default='none', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] - ) - }) - ), - 'sigmoid': hyperparams.Hyperparams.define( - configuration=OrderedDict({ - 'gamma': hyperparams.Union( - configuration=OrderedDict({ - 'float': hyperparams.Constant( - default=1.0, - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'], - ), - 'none': hyperparams.Constant( - default=None, - description='Equals 1/n_features.', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'], - ) - }), - default='none', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] - ), - 'coef0': hyperparams.Constant( - default=1, - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] - ) - }) - ), - 'precomputed': hyperparams.Hyperparams.define( - configuration=OrderedDict({}) - ) - }, - default='rbf', - description='Kernel. Default="linear".', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] - ) - fit_inverse_transform = hyperparams.UniformBool( - default=False, - description='Learn the inverse transform for non-precomputed kernels. (i.e. learn to find the pre-image of a point)', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] - ) - alpha = hyperparams.Constant( - default=1, - description='Hyperparameter of the ridge regression that learns the inverse transform (when fit_inverse_transform=True).', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] - ) - eigen_solver = hyperparams.Enumeration[str]( - default='auto', - values=['auto', 'dense', 'arpack'], - description='Select eigensolver to use. If n_components is much less than the number of training samples, arpack may be more efficient than the dense eigensolver.', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] - ) - tol = hyperparams.Bounded[float]( - default=0, - lower=0, - upper=None, - description='Convergence tolerance for arpack. If 0, optimal value will be chosen by arpack.', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] - ) - max_iter = hyperparams.Union( - configuration=OrderedDict({ - 'int': hyperparams.Bounded[int]( - lower=0, - upper=None, - default=4, - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'], - ), - 'none': hyperparams.Constant( - default=None, - description='Optimal value is chosen by arpack.', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'], - ) - }), - default='none', - description='Maximum number of iterations for arpack. If None, optimal value will be chosen by arpack.', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] - ) - remove_zero_eig = hyperparams.UniformBool( - default=False, - description='If True, then all components with zero eigenvalues are removed, so that the number of components in the output may be < n_components (and sometimes even zero due to numerical instability). When n_components is None, this parameter is ignored and components with zero eigenvalues are removed regardless.', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] - ) - n_jobs = hyperparams.Union( - configuration=OrderedDict({ - 'limit': hyperparams.Bounded[int]( - default=1, - lower=1, - upper=None, - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'], - ), - 'all_cores': hyperparams.Constant( - default=-1, - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'], - ) - }), - default='limit', - description='The number of parallel jobs to run. If `-1`, then the number of jobs is set to the number of CPU cores. .. versionadded:: 0.18 copy_X : boolean, default=True If True, input X is copied and stored by the model in the `X_fit_` attribute. If no further changes will be done to X, setting `copy_X=False` saves memory by storing a reference. .. versionadded:: 0.18', - semantic_types=['https://metadata.datadrivendiscovery.org/types/ResourcesUseParameter'] - ) - - use_columns = hyperparams.Set( - elements=hyperparams.Hyperparameter[int](-1), - default=(), - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="A set of column indices to force primitive to operate on. If any specified column cannot be parsed, it is skipped.", - ) - exclude_columns = hyperparams.Set( - elements=hyperparams.Hyperparameter[int](-1), - default=(), - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="A set of column indices to not operate on. Applicable only if \"use_columns\" is not provided.", - ) - return_result = hyperparams.Enumeration( - values=['append', 'replace', 'new'], - default='new', - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="Should parsed columns be appended, should they replace original columns, or should only parsed columns be returned? This hyperparam is ignored if use_semantic_types is set to false.", - ) - use_semantic_types = hyperparams.UniformBool( - default=False, - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="Controls whether semantic_types metadata will be used for filtering columns in input dataframe. Setting this to false makes the code ignore return_result and will produce only the output dataframe" - ) - add_index_columns = hyperparams.UniformBool( - default=False, - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="Also include primary index columns if input data has them. Applicable only if \"return_result\" is set to \"new\".", - ) - error_on_no_input = hyperparams.UniformBool( - default=True, - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="Throw an exception if no input column is selected/provided. Defaults to true to behave like sklearn. To prevent pipelines from breaking set this to False.", - ) - - return_semantic_type = hyperparams.Enumeration[str]( - values=['https://metadata.datadrivendiscovery.org/types/Attribute', 'https://metadata.datadrivendiscovery.org/types/ConstructedAttribute'], - default='https://metadata.datadrivendiscovery.org/types/Attribute', - description='Decides what semantic type to attach to generated attributes', - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'] - ) - -class SKKernelPCA(UnsupervisedLearnerPrimitiveBase[Inputs, Outputs, Params, Hyperparams]): - """ - Primitive wrapping for sklearn KernelPCA - `sklearn documentation `_ - - """ - - __author__ = "JPL MARVIN" - metadata = metadata_base.PrimitiveMetadata({ - "algorithm_types": [metadata_base.PrimitiveAlgorithmType.PRINCIPAL_COMPONENT_ANALYSIS, ], - "name": "sklearn.decomposition.kernel_pca.KernelPCA", - "primitive_family": metadata_base.PrimitiveFamily.FEATURE_EXTRACTION, - "python_path": "d3m.primitives.feature_extraction.kernel_pca.SKlearn", - "source": {'name': 'JPL', 'contact': 'mailto:shah@jpl.nasa.gov', 'uris': ['https://gitlab.com/datadrivendiscovery/sklearn-wrap/issues', 'https://scikit-learn.org/stable/modules/generated/sklearn.decomposition.KernelPCA.html']}, - "version": "2019.11.13", - "id": "fec6eba2-4a1b-3ea9-a31f-1da371941ede", - "hyperparams_to_tune": ['n_components', 'kernel', 'alpha'], - 'installation': [ - {'type': metadata_base.PrimitiveInstallationType.PIP, - 'package_uri': 'git+https://gitlab.com/datadrivendiscovery/sklearn-wrap.git@{git_commit}#egg=sklearn_wrap'.format( - git_commit=utils.current_git_commit(os.path.dirname(__file__)), - ), - }] - }) - - def __init__(self, *, - hyperparams: Hyperparams, - random_seed: int = 0, - docker_containers: Dict[str, DockerContainer] = None) -> None: - - super().__init__(hyperparams=hyperparams, random_seed=random_seed, docker_containers=docker_containers) - - # False - self._clf = KernelPCA( - n_components=self.hyperparams['n_components'], - kernel=self.hyperparams['kernel']['choice'], - degree=self.hyperparams['kernel'].get('degree', 3), - gamma=self.hyperparams['kernel'].get('gamma', 'none'), - coef0=self.hyperparams['kernel'].get('coef0', 1), - fit_inverse_transform=self.hyperparams['fit_inverse_transform'], - alpha=self.hyperparams['alpha'], - eigen_solver=self.hyperparams['eigen_solver'], - tol=self.hyperparams['tol'], - max_iter=self.hyperparams['max_iter'], - remove_zero_eig=self.hyperparams['remove_zero_eig'], - n_jobs=self.hyperparams['n_jobs'], - random_state=self.random_seed, - ) - - self._inputs = None - self._outputs = None - self._training_inputs = None - self._training_outputs = None - self._target_names = None - self._training_indices = None - self._target_column_indices = None - self._target_columns_metadata: List[OrderedDict] = None - self._input_column_names = None - self._fitted = False - - - def set_training_data(self, *, inputs: Inputs) -> None: - self._inputs = inputs - self._fitted = False - - def fit(self, *, timeout: float = None, iterations: int = None) -> CallResult[None]: - if self._fitted: - return CallResult(None) - - self._training_inputs, self._training_indices = self._get_columns_to_fit(self._inputs, self.hyperparams) - self._input_column_names = self._training_inputs.columns - - if self._training_inputs is None: - return CallResult(None) - - if len(self._training_indices) > 0: - self._clf.fit(self._training_inputs) - self._fitted = True - else: - if self.hyperparams['error_on_no_input']: - raise RuntimeError("No input columns were selected") - self.logger.warn("No input columns were selected") - return CallResult(None) - - def produce(self, *, inputs: Inputs, timeout: float = None, iterations: int = None) -> CallResult[Outputs]: - if not self._fitted: - raise PrimitiveNotFittedError("Primitive not fitted.") - sk_inputs = inputs - if self.hyperparams['use_semantic_types']: - sk_inputs = inputs.iloc[:, self._training_indices] - output_columns = [] - if len(self._training_indices) > 0: - sk_output = self._clf.transform(sk_inputs) - if sparse.issparse(sk_output): - sk_output = sk_output.toarray() - outputs = self._wrap_predictions(inputs, sk_output) - if len(outputs.columns) == len(self._input_column_names): - outputs.columns = self._input_column_names - output_columns = [outputs] - else: - if self.hyperparams['error_on_no_input']: - raise RuntimeError("No input columns were selected") - self.logger.warn("No input columns were selected") - outputs = base_utils.combine_columns(return_result=self.hyperparams['return_result'], - add_index_columns=self.hyperparams['add_index_columns'], - inputs=inputs, column_indices=self._training_indices, - columns_list=output_columns) - return CallResult(outputs) - - - def get_params(self) -> Params: - if not self._fitted: - return Params( - lambdas_=None, - alphas_=None, - dual_coef_=None, - X_fit_=None, - _centerer=None, - X_transformed_fit_=None, - input_column_names=self._input_column_names, - training_indices_=self._training_indices, - target_names_=self._target_names, - target_column_indices_=self._target_column_indices, - target_columns_metadata_=self._target_columns_metadata - ) - - return Params( - lambdas_=getattr(self._clf, 'lambdas_', None), - alphas_=getattr(self._clf, 'alphas_', None), - dual_coef_=getattr(self._clf, 'dual_coef_', None), - X_fit_=getattr(self._clf, 'X_fit_', None), - _centerer=getattr(self._clf, '_centerer', None), - X_transformed_fit_=getattr(self._clf, 'X_transformed_fit_', None), - input_column_names=self._input_column_names, - training_indices_=self._training_indices, - target_names_=self._target_names, - target_column_indices_=self._target_column_indices, - target_columns_metadata_=self._target_columns_metadata - ) - - def set_params(self, *, params: Params) -> None: - self._clf.lambdas_ = params['lambdas_'] - self._clf.alphas_ = params['alphas_'] - self._clf.dual_coef_ = params['dual_coef_'] - self._clf.X_fit_ = params['X_fit_'] - self._clf._centerer = params['_centerer'] - self._clf.X_transformed_fit_ = params['X_transformed_fit_'] - self._input_column_names = params['input_column_names'] - self._training_indices = params['training_indices_'] - self._target_names = params['target_names_'] - self._target_column_indices = params['target_column_indices_'] - self._target_columns_metadata = params['target_columns_metadata_'] - - if params['lambdas_'] is not None: - self._fitted = True - if params['alphas_'] is not None: - self._fitted = True - if params['dual_coef_'] is not None: - self._fitted = True - if params['X_fit_'] is not None: - self._fitted = True - if params['_centerer'] is not None: - self._fitted = True - if params['X_transformed_fit_'] is not None: - self._fitted = True - - - - - - @classmethod - def _get_columns_to_fit(cls, inputs: Inputs, hyperparams: Hyperparams): - if not hyperparams['use_semantic_types']: - return inputs, list(range(len(inputs.columns))) - - inputs_metadata = inputs.metadata - - def can_produce_column(column_index: int) -> bool: - return cls._can_produce_column(inputs_metadata, column_index, hyperparams) - - columns_to_produce, columns_not_to_produce = base_utils.get_columns_to_use(inputs_metadata, - use_columns=hyperparams['use_columns'], - exclude_columns=hyperparams['exclude_columns'], - can_use_column=can_produce_column) - return inputs.iloc[:, columns_to_produce], columns_to_produce - # return columns_to_produce - - @classmethod - def _can_produce_column(cls, inputs_metadata: metadata_base.DataMetadata, column_index: int, hyperparams: Hyperparams) -> bool: - column_metadata = inputs_metadata.query((metadata_base.ALL_ELEMENTS, column_index)) - - accepted_structural_types = (int, float, numpy.integer, numpy.float64) - accepted_semantic_types = set() - accepted_semantic_types.add("https://metadata.datadrivendiscovery.org/types/Attribute") - if not issubclass(column_metadata['structural_type'], accepted_structural_types): - return False - - semantic_types = set(column_metadata.get('semantic_types', [])) - - if len(semantic_types) == 0: - cls.logger.warning("No semantic types found in column metadata") - return False - - # Making sure all accepted_semantic_types are available in semantic_types - if len(accepted_semantic_types - semantic_types) == 0: - return True - - return False - - - @classmethod - def _get_target_columns_metadata(cls, outputs_metadata: metadata_base.DataMetadata, hyperparams) -> List[OrderedDict]: - outputs_length = outputs_metadata.query((metadata_base.ALL_ELEMENTS,))['dimension']['length'] - - target_columns_metadata: List[OrderedDict] = [] - for column_index in range(outputs_length): - column_metadata = OrderedDict(outputs_metadata.query_column(column_index)) - - # Update semantic types and prepare it for predicted targets. - semantic_types = set(column_metadata.get('semantic_types', [])) - semantic_types_to_remove = set([]) - add_semantic_types = [] - add_semantic_types.add(hyperparams["return_semantic_type"]) - semantic_types = semantic_types - semantic_types_to_remove - semantic_types = semantic_types.union(add_semantic_types) - column_metadata['semantic_types'] = list(semantic_types) - - target_columns_metadata.append(column_metadata) - - return target_columns_metadata - - @classmethod - def _update_predictions_metadata(cls, inputs_metadata: metadata_base.DataMetadata, outputs: Optional[Outputs], - target_columns_metadata: List[OrderedDict]) -> metadata_base.DataMetadata: - outputs_metadata = metadata_base.DataMetadata().generate(value=outputs) - - for column_index, column_metadata in enumerate(target_columns_metadata): - column_metadata.pop("structural_type", None) - outputs_metadata = outputs_metadata.update_column(column_index, column_metadata) - - return outputs_metadata - - def _wrap_predictions(self, inputs: Inputs, predictions: ndarray) -> Outputs: - outputs = d3m_dataframe(predictions, generate_metadata=True) - target_columns_metadata = self._copy_inputs_metadata(inputs.metadata, self._training_indices, outputs.metadata, self.hyperparams) - outputs.metadata = self._update_predictions_metadata(inputs.metadata, outputs, target_columns_metadata) - return outputs - - - @classmethod - def _copy_inputs_metadata(cls, inputs_metadata: metadata_base.DataMetadata, input_indices: List[int], - outputs_metadata: metadata_base.DataMetadata, hyperparams): - outputs_length = outputs_metadata.query((metadata_base.ALL_ELEMENTS,))['dimension']['length'] - target_columns_metadata: List[OrderedDict] = [] - for column_index in input_indices: - column_name = inputs_metadata.query((metadata_base.ALL_ELEMENTS, column_index)).get("name") - if column_name is None: - column_name = "output_{}".format(column_index) - - column_metadata = OrderedDict(inputs_metadata.query_column(column_index)) - semantic_types = set(column_metadata.get('semantic_types', [])) - semantic_types_to_remove = set([]) - add_semantic_types = set() - add_semantic_types.add(hyperparams["return_semantic_type"]) - semantic_types = semantic_types - semantic_types_to_remove - semantic_types = semantic_types.union(add_semantic_types) - column_metadata['semantic_types'] = list(semantic_types) - - column_metadata["name"] = str(column_name) - target_columns_metadata.append(column_metadata) - - # If outputs has more columns than index, add Attribute Type to all remaining - if outputs_length > len(input_indices): - for column_index in range(len(input_indices), outputs_length): - column_metadata = OrderedDict() - semantic_types = set() - semantic_types.add(hyperparams["return_semantic_type"]) - column_name = "output_{}".format(column_index) - column_metadata["semantic_types"] = list(semantic_types) - column_metadata["name"] = str(column_name) - target_columns_metadata.append(column_metadata) - - return target_columns_metadata - - -SKKernelPCA.__doc__ = KernelPCA.__doc__ \ No newline at end of file diff --git a/common-primitives/sklearn-wrap/sklearn_wrap/SKKernelRidge.py b/common-primitives/sklearn-wrap/sklearn_wrap/SKKernelRidge.py deleted file mode 100644 index a8b12ee..0000000 --- a/common-primitives/sklearn-wrap/sklearn_wrap/SKKernelRidge.py +++ /dev/null @@ -1,491 +0,0 @@ -from typing import Any, Callable, List, Dict, Union, Optional, Sequence, Tuple -from numpy import ndarray -from collections import OrderedDict -from scipy import sparse -import os -import sklearn -import numpy -import typing - -# Custom import commands if any -from sklearn.kernel_ridge import KernelRidge - - -from d3m.container.numpy import ndarray as d3m_ndarray -from d3m.container import DataFrame as d3m_dataframe -from d3m.metadata import hyperparams, params, base as metadata_base -from d3m import utils -from d3m.base import utils as base_utils -from d3m.exceptions import PrimitiveNotFittedError -from d3m.primitive_interfaces.base import CallResult, DockerContainer - -from d3m.primitive_interfaces.supervised_learning import SupervisedLearnerPrimitiveBase -from d3m.primitive_interfaces.base import ProbabilisticCompositionalityMixin, ContinueFitMixin -from d3m import exceptions -import pandas - - - -Inputs = d3m_dataframe -Outputs = d3m_dataframe - - -class Params(params.Params): - dual_coef_: Optional[ndarray] - X_fit_: Optional[Union[ndarray, sparse.spmatrix]] - input_column_names: Optional[Any] - target_names_: Optional[Sequence[Any]] - training_indices_: Optional[Sequence[int]] - target_column_indices_: Optional[Sequence[int]] - target_columns_metadata_: Optional[List[OrderedDict]] - - - -class Hyperparams(hyperparams.Hyperparams): - alpha = hyperparams.Bounded[float]( - default=1, - lower=0, - upper=None, - description='Small positive values of alpha improve the conditioning of the problem and reduce the variance of the estimates. Alpha corresponds to ``(2*C)^-1`` in other linear models such as LogisticRegression or LinearSVC. If an array is passed, penalties are assumed to be specific to the targets. Hence they must correspond in number.', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] - ) - kernel = hyperparams.Choice( - choices={ - 'linear': hyperparams.Hyperparams.define( - configuration=OrderedDict({}) - ), - 'poly': hyperparams.Hyperparams.define( - configuration=OrderedDict({ - 'degree': hyperparams.Bounded[float]( - default=3, - lower=0, - upper=None, - description='Degree of the polynomial kernel. Ignored by other kernels.', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] - ), - 'gamma': hyperparams.Bounded[float]( - default=0, - lower=0, - upper=None, - description='Gamma parameter for the RBF, laplacian, polynomial, exponential chi2 and sigmoid kernels. Interpretation of the default value is left to the kernel; see the documentation for sklearn.metrics.pairwise. Ignored by other kernels.', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] - ), - 'coef0': hyperparams.Bounded[float]( - default=1, - lower=0, - upper=None, - description='Zero coefficient for polynomial and sigmoid kernels. Ignored by other kernels classes', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] - ) - }) - ), - 'rbf': hyperparams.Hyperparams.define( - configuration=OrderedDict({ - 'gamma': hyperparams.Bounded[float]( - default=0, - lower=0, - upper=None, - description='Gamma parameter for the RBF, laplacian, polynomial, exponential chi2 and sigmoid kernels. Interpretation of the default value is left to the kernel; see the documentation for sklearn.metrics.pairwise. Ignored by other kernels.', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] - ) - }) - ), - 'sigmoid': hyperparams.Hyperparams.define( - configuration=OrderedDict({ - 'gamma': hyperparams.Bounded[float]( - default=0, - lower=0, - upper=None, - description='Gamma parameter for the RBF, laplacian, polynomial, exponential chi2 and sigmoid kernels. Interpretation of the default value is left to the kernel; see the documentation for sklearn.metrics.pairwise. Ignored by other kernels.', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] - ), - 'coef0': hyperparams.Bounded[float]( - default=1, - lower=0, - upper=None, - description='Zero coefficient for polynomial and sigmoid kernels. Ignored by other kernels classes', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] - ) - }) - ), - 'additive_chi2': hyperparams.Hyperparams.define( - configuration=OrderedDict({}) - ), - 'chi2': hyperparams.Hyperparams.define( - configuration=OrderedDict({ - 'gamma': hyperparams.Bounded[float]( - default=0, - lower=0, - upper=None, - description='Gamma parameter for the RBF, laplacian, polynomial, exponential chi2 and sigmoid kernels. Interpretation of the default value is left to the kernel; see the documentation for sklearn.metrics.pairwise. Ignored by other kernels.', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] - ) - }) - ), - 'laplacian': hyperparams.Hyperparams.define( - configuration=OrderedDict({ - 'gamma': hyperparams.Bounded[float]( - default=0, - lower=0, - upper=None, - description='Gamma parameter for the RBF, laplacian, polynomial, exponential chi2 and sigmoid kernels. Interpretation of the default value is left to the kernel; see the documentation for sklearn.metrics.pairwise. Ignored by other kernels.', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] - ) - }) - ), - 'cosine': hyperparams.Hyperparams.define( - configuration=OrderedDict({}) - ), - 'precomputed': hyperparams.Hyperparams.define( - configuration=OrderedDict({}) - ) - }, - default='linear', - description='Kernel mapping used internally. A callable should accept two arguments and the keyword arguments passed to this object as kernel_params, and should return a floating point number.', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] - ) - - use_inputs_columns = hyperparams.Set( - elements=hyperparams.Hyperparameter[int](-1), - default=(), - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="A set of column indices to force primitive to use as training input. If any specified column cannot be parsed, it is skipped.", - ) - use_outputs_columns = hyperparams.Set( - elements=hyperparams.Hyperparameter[int](-1), - default=(), - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="A set of column indices to force primitive to use as training target. If any specified column cannot be parsed, it is skipped.", - ) - exclude_inputs_columns = hyperparams.Set( - elements=hyperparams.Hyperparameter[int](-1), - default=(), - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="A set of column indices to not use as training inputs. Applicable only if \"use_columns\" is not provided.", - ) - exclude_outputs_columns = hyperparams.Set( - elements=hyperparams.Hyperparameter[int](-1), - default=(), - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="A set of column indices to not use as training target. Applicable only if \"use_columns\" is not provided.", - ) - return_result = hyperparams.Enumeration( - values=['append', 'replace', 'new'], - default='new', - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="Should parsed columns be appended, should they replace original columns, or should only parsed columns be returned? This hyperparam is ignored if use_semantic_types is set to false.", - ) - use_semantic_types = hyperparams.UniformBool( - default=False, - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="Controls whether semantic_types metadata will be used for filtering columns in input dataframe. Setting this to false makes the code ignore return_result and will produce only the output dataframe" - ) - add_index_columns = hyperparams.UniformBool( - default=False, - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="Also include primary index columns if input data has them. Applicable only if \"return_result\" is set to \"new\".", - ) - error_on_no_input = hyperparams.UniformBool( - default=True, - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="Throw an exception if no input column is selected/provided. Defaults to true to behave like sklearn. To prevent pipelines from breaking set this to False.", - ) - - return_semantic_type = hyperparams.Enumeration[str]( - values=['https://metadata.datadrivendiscovery.org/types/Attribute', 'https://metadata.datadrivendiscovery.org/types/ConstructedAttribute', 'https://metadata.datadrivendiscovery.org/types/PredictedTarget'], - default='https://metadata.datadrivendiscovery.org/types/PredictedTarget', - description='Decides what semantic type to attach to generated output', - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'] - ) - -class SKKernelRidge(SupervisedLearnerPrimitiveBase[Inputs, Outputs, Params, Hyperparams]): - """ - Primitive wrapping for sklearn KernelRidge - `sklearn documentation `_ - - """ - - __author__ = "JPL MARVIN" - metadata = metadata_base.PrimitiveMetadata({ - "algorithm_types": [metadata_base.PrimitiveAlgorithmType.SUPPORT_VECTOR_MACHINE, ], - "name": "sklearn.kernel_ridge.KernelRidge", - "primitive_family": metadata_base.PrimitiveFamily.REGRESSION, - "python_path": "d3m.primitives.regression.kernel_ridge.SKlearn", - "source": {'name': 'JPL', 'contact': 'mailto:shah@jpl.nasa.gov', 'uris': ['https://gitlab.com/datadrivendiscovery/sklearn-wrap/issues', 'https://scikit-learn.org/stable/modules/generated/sklearn.kernel_ridge.KernelRidge.html']}, - "version": "2019.11.13", - "id": "0fca4b96-d46b-3598-a4a5-bfa428d039fc", - "hyperparams_to_tune": ['alpha', 'kernel'], - 'installation': [ - {'type': metadata_base.PrimitiveInstallationType.PIP, - 'package_uri': 'git+https://gitlab.com/datadrivendiscovery/sklearn-wrap.git@{git_commit}#egg=sklearn_wrap'.format( - git_commit=utils.current_git_commit(os.path.dirname(__file__)), - ), - }] - }) - - def __init__(self, *, - hyperparams: Hyperparams, - random_seed: int = 0, - docker_containers: Dict[str, DockerContainer] = None) -> None: - - super().__init__(hyperparams=hyperparams, random_seed=random_seed, docker_containers=docker_containers) - - # False - self._clf = KernelRidge( - alpha=self.hyperparams['alpha'], - kernel=self.hyperparams['kernel']['choice'], - degree=self.hyperparams['kernel'].get('degree', 3), - gamma=self.hyperparams['kernel'].get('gamma', 0), - coef0=self.hyperparams['kernel'].get('coef0', 1), - ) - - self._inputs = None - self._outputs = None - self._training_inputs = None - self._training_outputs = None - self._target_names = None - self._training_indices = None - self._target_column_indices = None - self._target_columns_metadata: List[OrderedDict] = None - self._input_column_names = None - self._fitted = False - self._new_training_data = False - - def set_training_data(self, *, inputs: Inputs, outputs: Outputs) -> None: - self._inputs = inputs - self._outputs = outputs - self._fitted = False - self._new_training_data = True - - def fit(self, *, timeout: float = None, iterations: int = None) -> CallResult[None]: - if self._inputs is None or self._outputs is None: - raise ValueError("Missing training data.") - - if not self._new_training_data: - return CallResult(None) - self._new_training_data = False - - self._training_inputs, self._training_indices = self._get_columns_to_fit(self._inputs, self.hyperparams) - self._training_outputs, self._target_names, self._target_column_indices = self._get_targets(self._outputs, self.hyperparams) - self._input_column_names = self._training_inputs.columns - - if len(self._training_indices) > 0 and len(self._target_column_indices) > 0: - self._target_columns_metadata = self._get_target_columns_metadata(self._training_outputs.metadata, self.hyperparams) - sk_training_output = self._training_outputs.values - - shape = sk_training_output.shape - if len(shape) == 2 and shape[1] == 1: - sk_training_output = numpy.ravel(sk_training_output) - - self._clf.fit(self._training_inputs, sk_training_output) - self._fitted = True - else: - if self.hyperparams['error_on_no_input']: - raise RuntimeError("No input columns were selected") - self.logger.warn("No input columns were selected") - - return CallResult(None) - - - - def produce(self, *, inputs: Inputs, timeout: float = None, iterations: int = None) -> CallResult[Outputs]: - sk_inputs, columns_to_use = self._get_columns_to_fit(inputs, self.hyperparams) - output = [] - if len(sk_inputs.columns): - try: - sk_output = self._clf.predict(sk_inputs) - except sklearn.exceptions.NotFittedError as error: - raise PrimitiveNotFittedError("Primitive not fitted.") from error - # For primitives that allow predicting without fitting like GaussianProcessRegressor - if not self._fitted: - raise PrimitiveNotFittedError("Primitive not fitted.") - if sparse.issparse(sk_output): - sk_output = sk_output.toarray() - output = self._wrap_predictions(inputs, sk_output) - output.columns = self._target_names - output = [output] - else: - if self.hyperparams['error_on_no_input']: - raise RuntimeError("No input columns were selected") - self.logger.warn("No input columns were selected") - outputs = base_utils.combine_columns(return_result=self.hyperparams['return_result'], - add_index_columns=self.hyperparams['add_index_columns'], - inputs=inputs, column_indices=self._target_column_indices, - columns_list=output) - - return CallResult(outputs) - - - def get_params(self) -> Params: - if not self._fitted: - return Params( - dual_coef_=None, - X_fit_=None, - input_column_names=self._input_column_names, - training_indices_=self._training_indices, - target_names_=self._target_names, - target_column_indices_=self._target_column_indices, - target_columns_metadata_=self._target_columns_metadata - ) - - return Params( - dual_coef_=getattr(self._clf, 'dual_coef_', None), - X_fit_=getattr(self._clf, 'X_fit_', None), - input_column_names=self._input_column_names, - training_indices_=self._training_indices, - target_names_=self._target_names, - target_column_indices_=self._target_column_indices, - target_columns_metadata_=self._target_columns_metadata - ) - - def set_params(self, *, params: Params) -> None: - self._clf.dual_coef_ = params['dual_coef_'] - self._clf.X_fit_ = params['X_fit_'] - self._input_column_names = params['input_column_names'] - self._training_indices = params['training_indices_'] - self._target_names = params['target_names_'] - self._target_column_indices = params['target_column_indices_'] - self._target_columns_metadata = params['target_columns_metadata_'] - - if params['dual_coef_'] is not None: - self._fitted = True - if params['X_fit_'] is not None: - self._fitted = True - - - - - - - - @classmethod - def _get_columns_to_fit(cls, inputs: Inputs, hyperparams: Hyperparams): - if not hyperparams['use_semantic_types']: - return inputs, list(range(len(inputs.columns))) - - inputs_metadata = inputs.metadata - - def can_produce_column(column_index: int) -> bool: - return cls._can_produce_column(inputs_metadata, column_index, hyperparams) - - columns_to_produce, columns_not_to_produce = base_utils.get_columns_to_use(inputs_metadata, - use_columns=hyperparams['use_inputs_columns'], - exclude_columns=hyperparams['exclude_inputs_columns'], - can_use_column=can_produce_column) - return inputs.iloc[:, columns_to_produce], columns_to_produce - # return columns_to_produce - - @classmethod - def _can_produce_column(cls, inputs_metadata: metadata_base.DataMetadata, column_index: int, hyperparams: Hyperparams) -> bool: - column_metadata = inputs_metadata.query((metadata_base.ALL_ELEMENTS, column_index)) - - accepted_structural_types = (int, float, numpy.integer, numpy.float64) - accepted_semantic_types = set() - accepted_semantic_types.add("https://metadata.datadrivendiscovery.org/types/Attribute") - if not issubclass(column_metadata['structural_type'], accepted_structural_types): - return False - - semantic_types = set(column_metadata.get('semantic_types', [])) - - if len(semantic_types) == 0: - cls.logger.warning("No semantic types found in column metadata") - return False - # Making sure all accepted_semantic_types are available in semantic_types - if len(accepted_semantic_types - semantic_types) == 0: - return True - - return False - - @classmethod - def _get_targets(cls, data: d3m_dataframe, hyperparams: Hyperparams): - if not hyperparams['use_semantic_types']: - return data, list(data.columns), list(range(len(data.columns))) - - metadata = data.metadata - - def can_produce_column(column_index: int) -> bool: - accepted_semantic_types = set() - accepted_semantic_types.add("https://metadata.datadrivendiscovery.org/types/TrueTarget") - column_metadata = metadata.query((metadata_base.ALL_ELEMENTS, column_index)) - semantic_types = set(column_metadata.get('semantic_types', [])) - if len(semantic_types) == 0: - cls.logger.warning("No semantic types found in column metadata") - return False - # Making sure all accepted_semantic_types are available in semantic_types - if len(accepted_semantic_types - semantic_types) == 0: - return True - return False - - target_column_indices, target_columns_not_to_produce = base_utils.get_columns_to_use(metadata, - use_columns=hyperparams[ - 'use_outputs_columns'], - exclude_columns= - hyperparams[ - 'exclude_outputs_columns'], - can_use_column=can_produce_column) - targets = [] - if target_column_indices: - targets = data.select_columns(target_column_indices) - target_column_names = [] - for idx in target_column_indices: - target_column_names.append(data.columns[idx]) - return targets, target_column_names, target_column_indices - - @classmethod - def _get_target_columns_metadata(cls, outputs_metadata: metadata_base.DataMetadata, hyperparams) -> List[OrderedDict]: - outputs_length = outputs_metadata.query((metadata_base.ALL_ELEMENTS,))['dimension']['length'] - - target_columns_metadata: List[OrderedDict] = [] - for column_index in range(outputs_length): - column_metadata = OrderedDict(outputs_metadata.query_column(column_index)) - - # Update semantic types and prepare it for predicted targets. - semantic_types = set(column_metadata.get('semantic_types', [])) - semantic_types_to_remove = set(["https://metadata.datadrivendiscovery.org/types/TrueTarget","https://metadata.datadrivendiscovery.org/types/SuggestedTarget",]) - add_semantic_types = set(["https://metadata.datadrivendiscovery.org/types/PredictedTarget",]) - add_semantic_types.add(hyperparams["return_semantic_type"]) - semantic_types = semantic_types - semantic_types_to_remove - semantic_types = semantic_types.union(add_semantic_types) - column_metadata['semantic_types'] = list(semantic_types) - - target_columns_metadata.append(column_metadata) - - return target_columns_metadata - - @classmethod - def _update_predictions_metadata(cls, inputs_metadata: metadata_base.DataMetadata, outputs: Optional[Outputs], - target_columns_metadata: List[OrderedDict]) -> metadata_base.DataMetadata: - outputs_metadata = metadata_base.DataMetadata().generate(value=outputs) - - for column_index, column_metadata in enumerate(target_columns_metadata): - column_metadata.pop("structural_type", None) - outputs_metadata = outputs_metadata.update_column(column_index, column_metadata) - - return outputs_metadata - - def _wrap_predictions(self, inputs: Inputs, predictions: ndarray) -> Outputs: - outputs = d3m_dataframe(predictions, generate_metadata=False) - outputs.metadata = self._update_predictions_metadata(inputs.metadata, outputs, self._target_columns_metadata) - return outputs - - - @classmethod - def _add_target_columns_metadata(cls, outputs_metadata: metadata_base.DataMetadata): - outputs_length = outputs_metadata.query((metadata_base.ALL_ELEMENTS,))['dimension']['length'] - - target_columns_metadata: List[OrderedDict] = [] - for column_index in range(outputs_length): - column_metadata = OrderedDict() - semantic_types = [] - semantic_types.append('https://metadata.datadrivendiscovery.org/types/PredictedTarget') - column_name = outputs_metadata.query((metadata_base.ALL_ELEMENTS, column_index)).get("name") - if column_name is None: - column_name = "output_{}".format(column_index) - column_metadata["semantic_types"] = semantic_types - column_metadata["name"] = str(column_name) - target_columns_metadata.append(column_metadata) - - return target_columns_metadata - - -SKKernelRidge.__doc__ = KernelRidge.__doc__ \ No newline at end of file diff --git a/common-primitives/sklearn-wrap/sklearn_wrap/SKLars.py b/common-primitives/sklearn-wrap/sklearn_wrap/SKLars.py deleted file mode 100644 index 1136d16..0000000 --- a/common-primitives/sklearn-wrap/sklearn_wrap/SKLars.py +++ /dev/null @@ -1,460 +0,0 @@ -from typing import Any, Callable, List, Dict, Union, Optional, Sequence, Tuple -from numpy import ndarray -from collections import OrderedDict -from scipy import sparse -import os -import sklearn -import numpy -import typing - -# Custom import commands if any -from sklearn.linear_model.least_angle import Lars - - -from d3m.container.numpy import ndarray as d3m_ndarray -from d3m.container import DataFrame as d3m_dataframe -from d3m.metadata import hyperparams, params, base as metadata_base -from d3m import utils -from d3m.base import utils as base_utils -from d3m.exceptions import PrimitiveNotFittedError -from d3m.primitive_interfaces.base import CallResult, DockerContainer - -from d3m.primitive_interfaces.supervised_learning import SupervisedLearnerPrimitiveBase -from d3m.primitive_interfaces.base import ProbabilisticCompositionalityMixin, ContinueFitMixin -from d3m import exceptions -import pandas - - - -Inputs = d3m_dataframe -Outputs = d3m_dataframe - - -class Params(params.Params): - alphas_: Optional[ndarray] - active_: Optional[Sequence[Any]] - coef_path_: Optional[ndarray] - coef_: Optional[ndarray] - intercept_: Optional[Union[float, ndarray]] - n_iter_: Optional[Union[int, ndarray, None]] - input_column_names: Optional[Any] - target_names_: Optional[Sequence[Any]] - training_indices_: Optional[Sequence[int]] - target_column_indices_: Optional[Sequence[int]] - target_columns_metadata_: Optional[List[OrderedDict]] - - - -class Hyperparams(hyperparams.Hyperparams): - fit_intercept = hyperparams.UniformBool( - default=True, - description='Whether to calculate the intercept for this model. If set to false, no intercept will be used in calculations (e.g. data is expected to be already centered).', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] - ) - normalize = hyperparams.UniformBool( - default=True, - description='This parameter is ignored when ``fit_intercept`` is set to False. If True, the regressors X will be normalized before regression by subtracting the mean and dividing by the l2-norm. If you wish to standardize, please use :class:`sklearn.preprocessing.StandardScaler` before calling ``fit`` on an estimator with ``normalize=False``.', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] - ) - precompute = hyperparams.Union( - configuration=OrderedDict({ - 'bool': hyperparams.UniformBool( - default=False, - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'], - ), - 'auto': hyperparams.Constant( - default='auto', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'], - ) - }), - default='auto', - description='Whether to use a precomputed Gram matrix to speed up calculations. If set to ``\'auto\'`` let us decide. The Gram matrix can also be passed as argument.', - semantic_types=['https://metadata.datadrivendiscovery.org/types/ResourcesUseParameter'] - ) - n_nonzero_coefs = hyperparams.Bounded[int]( - default=500, - lower=0, - upper=None, - description='Target number of non-zero coefficients. Use ``np.inf`` for no limit.', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] - ) - eps = hyperparams.Bounded[float]( - default=numpy.finfo(numpy.float).eps, - lower=0, - upper=None, - description='The machine-precision regularization in the computation of the Cholesky diagonal factors. Increase this for very ill-conditioned systems. Unlike the ``tol`` parameter in some iterative optimization-based algorithms, this parameter does not control the tolerance of the optimization. copy_X : boolean, optional, default True If ``True``, X will be copied; else, it may be overwritten.', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] - ) - fit_path = hyperparams.UniformBool( - default=True, - description='If True the full path is stored in the ``coef_path_`` attribute. If you compute the solution for a large problem or many targets, setting ``fit_path`` to ``False`` will lead to a speedup, especially with a small alpha.', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] - ) - - use_inputs_columns = hyperparams.Set( - elements=hyperparams.Hyperparameter[int](-1), - default=(), - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="A set of column indices to force primitive to use as training input. If any specified column cannot be parsed, it is skipped.", - ) - use_outputs_columns = hyperparams.Set( - elements=hyperparams.Hyperparameter[int](-1), - default=(), - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="A set of column indices to force primitive to use as training target. If any specified column cannot be parsed, it is skipped.", - ) - exclude_inputs_columns = hyperparams.Set( - elements=hyperparams.Hyperparameter[int](-1), - default=(), - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="A set of column indices to not use as training inputs. Applicable only if \"use_columns\" is not provided.", - ) - exclude_outputs_columns = hyperparams.Set( - elements=hyperparams.Hyperparameter[int](-1), - default=(), - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="A set of column indices to not use as training target. Applicable only if \"use_columns\" is not provided.", - ) - return_result = hyperparams.Enumeration( - values=['append', 'replace', 'new'], - default='new', - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="Should parsed columns be appended, should they replace original columns, or should only parsed columns be returned? This hyperparam is ignored if use_semantic_types is set to false.", - ) - use_semantic_types = hyperparams.UniformBool( - default=False, - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="Controls whether semantic_types metadata will be used for filtering columns in input dataframe. Setting this to false makes the code ignore return_result and will produce only the output dataframe" - ) - add_index_columns = hyperparams.UniformBool( - default=False, - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="Also include primary index columns if input data has them. Applicable only if \"return_result\" is set to \"new\".", - ) - error_on_no_input = hyperparams.UniformBool( - default=True, - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="Throw an exception if no input column is selected/provided. Defaults to true to behave like sklearn. To prevent pipelines from breaking set this to False.", - ) - - return_semantic_type = hyperparams.Enumeration[str]( - values=['https://metadata.datadrivendiscovery.org/types/Attribute', 'https://metadata.datadrivendiscovery.org/types/ConstructedAttribute', 'https://metadata.datadrivendiscovery.org/types/PredictedTarget'], - default='https://metadata.datadrivendiscovery.org/types/PredictedTarget', - description='Decides what semantic type to attach to generated output', - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'] - ) - -class SKLars(SupervisedLearnerPrimitiveBase[Inputs, Outputs, Params, Hyperparams]): - """ - Primitive wrapping for sklearn Lars - `sklearn documentation `_ - - """ - - __author__ = "JPL MARVIN" - metadata = metadata_base.PrimitiveMetadata({ - "algorithm_types": [metadata_base.PrimitiveAlgorithmType.LINEAR_REGRESSION, ], - "name": "sklearn.linear_model.least_angle.Lars", - "primitive_family": metadata_base.PrimitiveFamily.REGRESSION, - "python_path": "d3m.primitives.regression.lars.SKlearn", - "source": {'name': 'JPL', 'contact': 'mailto:shah@jpl.nasa.gov', 'uris': ['https://gitlab.com/datadrivendiscovery/sklearn-wrap/issues', 'https://scikit-learn.org/stable/modules/generated/sklearn.linear_model.Lars.html']}, - "version": "2019.11.13", - "id": "989a40cd-114c-309d-9a94-59d2669d6c94", - "hyperparams_to_tune": ['eps'], - 'installation': [ - {'type': metadata_base.PrimitiveInstallationType.PIP, - 'package_uri': 'git+https://gitlab.com/datadrivendiscovery/sklearn-wrap.git@{git_commit}#egg=sklearn_wrap'.format( - git_commit=utils.current_git_commit(os.path.dirname(__file__)), - ), - }] - }) - - def __init__(self, *, - hyperparams: Hyperparams, - random_seed: int = 0, - docker_containers: Dict[str, DockerContainer] = None, - _verbose: int = 0) -> None: - - super().__init__(hyperparams=hyperparams, random_seed=random_seed, docker_containers=docker_containers) - - # False - self._clf = Lars( - fit_intercept=self.hyperparams['fit_intercept'], - normalize=self.hyperparams['normalize'], - precompute=self.hyperparams['precompute'], - n_nonzero_coefs=self.hyperparams['n_nonzero_coefs'], - eps=self.hyperparams['eps'], - fit_path=self.hyperparams['fit_path'], - verbose=_verbose - ) - - self._inputs = None - self._outputs = None - self._training_inputs = None - self._training_outputs = None - self._target_names = None - self._training_indices = None - self._target_column_indices = None - self._target_columns_metadata: List[OrderedDict] = None - self._input_column_names = None - self._fitted = False - self._new_training_data = False - - def set_training_data(self, *, inputs: Inputs, outputs: Outputs) -> None: - self._inputs = inputs - self._outputs = outputs - self._fitted = False - self._new_training_data = True - - def fit(self, *, timeout: float = None, iterations: int = None) -> CallResult[None]: - if self._inputs is None or self._outputs is None: - raise ValueError("Missing training data.") - - if not self._new_training_data: - return CallResult(None) - self._new_training_data = False - - self._training_inputs, self._training_indices = self._get_columns_to_fit(self._inputs, self.hyperparams) - self._training_outputs, self._target_names, self._target_column_indices = self._get_targets(self._outputs, self.hyperparams) - self._input_column_names = self._training_inputs.columns - - if len(self._training_indices) > 0 and len(self._target_column_indices) > 0: - self._target_columns_metadata = self._get_target_columns_metadata(self._training_outputs.metadata, self.hyperparams) - sk_training_output = self._training_outputs.values - - shape = sk_training_output.shape - if len(shape) == 2 and shape[1] == 1: - sk_training_output = numpy.ravel(sk_training_output) - - self._clf.fit(self._training_inputs, sk_training_output) - self._fitted = True - else: - if self.hyperparams['error_on_no_input']: - raise RuntimeError("No input columns were selected") - self.logger.warn("No input columns were selected") - - return CallResult(None) - - - - def produce(self, *, inputs: Inputs, timeout: float = None, iterations: int = None) -> CallResult[Outputs]: - sk_inputs, columns_to_use = self._get_columns_to_fit(inputs, self.hyperparams) - output = [] - if len(sk_inputs.columns): - try: - sk_output = self._clf.predict(sk_inputs) - except sklearn.exceptions.NotFittedError as error: - raise PrimitiveNotFittedError("Primitive not fitted.") from error - # For primitives that allow predicting without fitting like GaussianProcessRegressor - if not self._fitted: - raise PrimitiveNotFittedError("Primitive not fitted.") - if sparse.issparse(sk_output): - sk_output = sk_output.toarray() - output = self._wrap_predictions(inputs, sk_output) - output.columns = self._target_names - output = [output] - else: - if self.hyperparams['error_on_no_input']: - raise RuntimeError("No input columns were selected") - self.logger.warn("No input columns were selected") - outputs = base_utils.combine_columns(return_result=self.hyperparams['return_result'], - add_index_columns=self.hyperparams['add_index_columns'], - inputs=inputs, column_indices=self._target_column_indices, - columns_list=output) - - return CallResult(outputs) - - - def get_params(self) -> Params: - if not self._fitted: - return Params( - alphas_=None, - active_=None, - coef_path_=None, - coef_=None, - intercept_=None, - n_iter_=None, - input_column_names=self._input_column_names, - training_indices_=self._training_indices, - target_names_=self._target_names, - target_column_indices_=self._target_column_indices, - target_columns_metadata_=self._target_columns_metadata - ) - - return Params( - alphas_=getattr(self._clf, 'alphas_', None), - active_=getattr(self._clf, 'active_', None), - coef_path_=getattr(self._clf, 'coef_path_', None), - coef_=getattr(self._clf, 'coef_', None), - intercept_=getattr(self._clf, 'intercept_', None), - n_iter_=getattr(self._clf, 'n_iter_', None), - input_column_names=self._input_column_names, - training_indices_=self._training_indices, - target_names_=self._target_names, - target_column_indices_=self._target_column_indices, - target_columns_metadata_=self._target_columns_metadata - ) - - def set_params(self, *, params: Params) -> None: - self._clf.alphas_ = params['alphas_'] - self._clf.active_ = params['active_'] - self._clf.coef_path_ = params['coef_path_'] - self._clf.coef_ = params['coef_'] - self._clf.intercept_ = params['intercept_'] - self._clf.n_iter_ = params['n_iter_'] - self._input_column_names = params['input_column_names'] - self._training_indices = params['training_indices_'] - self._target_names = params['target_names_'] - self._target_column_indices = params['target_column_indices_'] - self._target_columns_metadata = params['target_columns_metadata_'] - - if params['alphas_'] is not None: - self._fitted = True - if params['active_'] is not None: - self._fitted = True - if params['coef_path_'] is not None: - self._fitted = True - if params['coef_'] is not None: - self._fitted = True - if params['intercept_'] is not None: - self._fitted = True - if params['n_iter_'] is not None: - self._fitted = True - - - - - - - - @classmethod - def _get_columns_to_fit(cls, inputs: Inputs, hyperparams: Hyperparams): - if not hyperparams['use_semantic_types']: - return inputs, list(range(len(inputs.columns))) - - inputs_metadata = inputs.metadata - - def can_produce_column(column_index: int) -> bool: - return cls._can_produce_column(inputs_metadata, column_index, hyperparams) - - columns_to_produce, columns_not_to_produce = base_utils.get_columns_to_use(inputs_metadata, - use_columns=hyperparams['use_inputs_columns'], - exclude_columns=hyperparams['exclude_inputs_columns'], - can_use_column=can_produce_column) - return inputs.iloc[:, columns_to_produce], columns_to_produce - # return columns_to_produce - - @classmethod - def _can_produce_column(cls, inputs_metadata: metadata_base.DataMetadata, column_index: int, hyperparams: Hyperparams) -> bool: - column_metadata = inputs_metadata.query((metadata_base.ALL_ELEMENTS, column_index)) - - accepted_structural_types = (int, float, numpy.integer, numpy.float64) - accepted_semantic_types = set() - accepted_semantic_types.add("https://metadata.datadrivendiscovery.org/types/Attribute") - if not issubclass(column_metadata['structural_type'], accepted_structural_types): - return False - - semantic_types = set(column_metadata.get('semantic_types', [])) - - if len(semantic_types) == 0: - cls.logger.warning("No semantic types found in column metadata") - return False - # Making sure all accepted_semantic_types are available in semantic_types - if len(accepted_semantic_types - semantic_types) == 0: - return True - - return False - - @classmethod - def _get_targets(cls, data: d3m_dataframe, hyperparams: Hyperparams): - if not hyperparams['use_semantic_types']: - return data, list(data.columns), list(range(len(data.columns))) - - metadata = data.metadata - - def can_produce_column(column_index: int) -> bool: - accepted_semantic_types = set() - accepted_semantic_types.add("https://metadata.datadrivendiscovery.org/types/TrueTarget") - column_metadata = metadata.query((metadata_base.ALL_ELEMENTS, column_index)) - semantic_types = set(column_metadata.get('semantic_types', [])) - if len(semantic_types) == 0: - cls.logger.warning("No semantic types found in column metadata") - return False - # Making sure all accepted_semantic_types are available in semantic_types - if len(accepted_semantic_types - semantic_types) == 0: - return True - return False - - target_column_indices, target_columns_not_to_produce = base_utils.get_columns_to_use(metadata, - use_columns=hyperparams[ - 'use_outputs_columns'], - exclude_columns= - hyperparams[ - 'exclude_outputs_columns'], - can_use_column=can_produce_column) - targets = [] - if target_column_indices: - targets = data.select_columns(target_column_indices) - target_column_names = [] - for idx in target_column_indices: - target_column_names.append(data.columns[idx]) - return targets, target_column_names, target_column_indices - - @classmethod - def _get_target_columns_metadata(cls, outputs_metadata: metadata_base.DataMetadata, hyperparams) -> List[OrderedDict]: - outputs_length = outputs_metadata.query((metadata_base.ALL_ELEMENTS,))['dimension']['length'] - - target_columns_metadata: List[OrderedDict] = [] - for column_index in range(outputs_length): - column_metadata = OrderedDict(outputs_metadata.query_column(column_index)) - - # Update semantic types and prepare it for predicted targets. - semantic_types = set(column_metadata.get('semantic_types', [])) - semantic_types_to_remove = set(["https://metadata.datadrivendiscovery.org/types/TrueTarget","https://metadata.datadrivendiscovery.org/types/SuggestedTarget",]) - add_semantic_types = set(["https://metadata.datadrivendiscovery.org/types/PredictedTarget",]) - add_semantic_types.add(hyperparams["return_semantic_type"]) - semantic_types = semantic_types - semantic_types_to_remove - semantic_types = semantic_types.union(add_semantic_types) - column_metadata['semantic_types'] = list(semantic_types) - - target_columns_metadata.append(column_metadata) - - return target_columns_metadata - - @classmethod - def _update_predictions_metadata(cls, inputs_metadata: metadata_base.DataMetadata, outputs: Optional[Outputs], - target_columns_metadata: List[OrderedDict]) -> metadata_base.DataMetadata: - outputs_metadata = metadata_base.DataMetadata().generate(value=outputs) - - for column_index, column_metadata in enumerate(target_columns_metadata): - column_metadata.pop("structural_type", None) - outputs_metadata = outputs_metadata.update_column(column_index, column_metadata) - - return outputs_metadata - - def _wrap_predictions(self, inputs: Inputs, predictions: ndarray) -> Outputs: - outputs = d3m_dataframe(predictions, generate_metadata=False) - outputs.metadata = self._update_predictions_metadata(inputs.metadata, outputs, self._target_columns_metadata) - return outputs - - - @classmethod - def _add_target_columns_metadata(cls, outputs_metadata: metadata_base.DataMetadata): - outputs_length = outputs_metadata.query((metadata_base.ALL_ELEMENTS,))['dimension']['length'] - - target_columns_metadata: List[OrderedDict] = [] - for column_index in range(outputs_length): - column_metadata = OrderedDict() - semantic_types = [] - semantic_types.append('https://metadata.datadrivendiscovery.org/types/PredictedTarget') - column_name = outputs_metadata.query((metadata_base.ALL_ELEMENTS, column_index)).get("name") - if column_name is None: - column_name = "output_{}".format(column_index) - column_metadata["semantic_types"] = semantic_types - column_metadata["name"] = str(column_name) - target_columns_metadata.append(column_metadata) - - return target_columns_metadata - - -SKLars.__doc__ = Lars.__doc__ \ No newline at end of file diff --git a/common-primitives/sklearn-wrap/sklearn_wrap/SKLasso.py b/common-primitives/sklearn-wrap/sklearn_wrap/SKLasso.py deleted file mode 100644 index 028f7f7..0000000 --- a/common-primitives/sklearn-wrap/sklearn_wrap/SKLasso.py +++ /dev/null @@ -1,474 +0,0 @@ -from typing import Any, Callable, List, Dict, Union, Optional, Sequence, Tuple -from numpy import ndarray -from collections import OrderedDict -from scipy import sparse -import os -import sklearn -import numpy -import typing - -# Custom import commands if any -from sklearn.linear_model.coordinate_descent import Lasso - - -from d3m.container.numpy import ndarray as d3m_ndarray -from d3m.container import DataFrame as d3m_dataframe -from d3m.metadata import hyperparams, params, base as metadata_base -from d3m import utils -from d3m.base import utils as base_utils -from d3m.exceptions import PrimitiveNotFittedError -from d3m.primitive_interfaces.base import CallResult, DockerContainer - -from d3m.primitive_interfaces.supervised_learning import SupervisedLearnerPrimitiveBase -from d3m.primitive_interfaces.base import ProbabilisticCompositionalityMixin, ContinueFitMixin -from d3m import exceptions -import pandas - - - -Inputs = d3m_dataframe -Outputs = d3m_dataframe - - -class Params(params.Params): - coef_: Optional[ndarray] - intercept_: Optional[Union[float, ndarray]] - n_iter_: Optional[int] - dual_gap_: Optional[float] - l1_ratio: Optional[float] - input_column_names: Optional[Any] - target_names_: Optional[Sequence[Any]] - training_indices_: Optional[Sequence[int]] - target_column_indices_: Optional[Sequence[int]] - target_columns_metadata_: Optional[List[OrderedDict]] - - - -class Hyperparams(hyperparams.Hyperparams): - alpha = hyperparams.Bounded[float]( - default=1, - lower=0, - upper=None, - description='Constant that multiplies the L1 term. Defaults to 1.0. ``alpha = 0`` is equivalent to an ordinary least square, solved by the :class:`LinearRegression` object. For numerical reasons, using ``alpha = 0`` with the ``Lasso`` object is not advised. Given this, you should use the :class:`LinearRegression` object.', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] - ) - fit_intercept = hyperparams.UniformBool( - default=True, - description='whether to calculate the intercept for this model. If set to false, no intercept will be used in calculations (e.g. data is expected to be already centered).', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] - ) - normalize = hyperparams.UniformBool( - default=False, - description='This parameter is ignored when ``fit_intercept`` is set to False. If True, the regressors X will be normalized before regression by subtracting the mean and dividing by the l2-norm. If you wish to standardize, please use :class:`sklearn.preprocessing.StandardScaler` before calling ``fit`` on an estimator with ``normalize=False``.', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] - ) - precompute = hyperparams.Union( - configuration=OrderedDict({ - 'bool': hyperparams.UniformBool( - default=False, - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'], - ), - 'auto': hyperparams.Constant( - default='auto', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'], - ) - }), - default='bool', - description='Whether to use a precomputed Gram matrix to speed up calculations. If set to ``\'auto\'`` let us decide. The Gram matrix can also be passed as argument. For sparse input this option is always ``True`` to preserve sparsity. copy_X : boolean, optional, default True If ``True``, X will be copied; else, it may be overwritten.', - semantic_types=['https://metadata.datadrivendiscovery.org/types/ResourcesUseParameter'] - ) - max_iter = hyperparams.Bounded[int]( - default=1000, - lower=0, - upper=None, - description='The maximum number of iterations', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] - ) - tol = hyperparams.Bounded[float]( - default=0.0001, - lower=0, - upper=None, - description='The tolerance for the optimization: if the updates are smaller than ``tol``, the optimization code checks the dual gap for optimality and continues until it is smaller than ``tol``.', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] - ) - warm_start = hyperparams.UniformBool( - default=False, - description='When set to True, reuse the solution of the previous call to fit as initialization, otherwise, just erase the previous solution.', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] - ) - positive = hyperparams.UniformBool( - default=False, - description='When set to ``True``, forces the coefficients to be positive.', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] - ) - selection = hyperparams.Enumeration[str]( - default='cyclic', - values=['cyclic', 'random'], - description='If set to \'random\', a random coefficient is updated every iteration rather than looping over features sequentially by default. This (setting to \'random\') often leads to significantly faster convergence especially when tol is higher than 1e-4.', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] - ) - - use_inputs_columns = hyperparams.Set( - elements=hyperparams.Hyperparameter[int](-1), - default=(), - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="A set of column indices to force primitive to use as training input. If any specified column cannot be parsed, it is skipped.", - ) - use_outputs_columns = hyperparams.Set( - elements=hyperparams.Hyperparameter[int](-1), - default=(), - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="A set of column indices to force primitive to use as training target. If any specified column cannot be parsed, it is skipped.", - ) - exclude_inputs_columns = hyperparams.Set( - elements=hyperparams.Hyperparameter[int](-1), - default=(), - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="A set of column indices to not use as training inputs. Applicable only if \"use_columns\" is not provided.", - ) - exclude_outputs_columns = hyperparams.Set( - elements=hyperparams.Hyperparameter[int](-1), - default=(), - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="A set of column indices to not use as training target. Applicable only if \"use_columns\" is not provided.", - ) - return_result = hyperparams.Enumeration( - values=['append', 'replace', 'new'], - default='new', - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="Should parsed columns be appended, should they replace original columns, or should only parsed columns be returned? This hyperparam is ignored if use_semantic_types is set to false.", - ) - use_semantic_types = hyperparams.UniformBool( - default=False, - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="Controls whether semantic_types metadata will be used for filtering columns in input dataframe. Setting this to false makes the code ignore return_result and will produce only the output dataframe" - ) - add_index_columns = hyperparams.UniformBool( - default=False, - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="Also include primary index columns if input data has them. Applicable only if \"return_result\" is set to \"new\".", - ) - error_on_no_input = hyperparams.UniformBool( - default=True, - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="Throw an exception if no input column is selected/provided. Defaults to true to behave like sklearn. To prevent pipelines from breaking set this to False.", - ) - - return_semantic_type = hyperparams.Enumeration[str]( - values=['https://metadata.datadrivendiscovery.org/types/Attribute', 'https://metadata.datadrivendiscovery.org/types/ConstructedAttribute', 'https://metadata.datadrivendiscovery.org/types/PredictedTarget'], - default='https://metadata.datadrivendiscovery.org/types/PredictedTarget', - description='Decides what semantic type to attach to generated output', - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'] - ) - -class SKLasso(SupervisedLearnerPrimitiveBase[Inputs, Outputs, Params, Hyperparams]): - """ - Primitive wrapping for sklearn Lasso - `sklearn documentation `_ - - """ - - __author__ = "JPL MARVIN" - metadata = metadata_base.PrimitiveMetadata({ - "algorithm_types": [metadata_base.PrimitiveAlgorithmType.LASSO, ], - "name": "sklearn.linear_model.coordinate_descent.Lasso", - "primitive_family": metadata_base.PrimitiveFamily.REGRESSION, - "python_path": "d3m.primitives.regression.lasso.SKlearn", - "source": {'name': 'JPL', 'contact': 'mailto:shah@jpl.nasa.gov', 'uris': ['https://gitlab.com/datadrivendiscovery/sklearn-wrap/issues', 'https://scikit-learn.org/stable/modules/generated/sklearn.linear_model.Lasso.html']}, - "version": "2019.11.13", - "id": "a7100c7d-8d8e-3f2a-a0ee-b4380383ed6c", - "hyperparams_to_tune": ['alpha', 'max_iter'], - 'installation': [ - {'type': metadata_base.PrimitiveInstallationType.PIP, - 'package_uri': 'git+https://gitlab.com/datadrivendiscovery/sklearn-wrap.git@{git_commit}#egg=sklearn_wrap'.format( - git_commit=utils.current_git_commit(os.path.dirname(__file__)), - ), - }] - }) - - def __init__(self, *, - hyperparams: Hyperparams, - random_seed: int = 0, - docker_containers: Dict[str, DockerContainer] = None) -> None: - - super().__init__(hyperparams=hyperparams, random_seed=random_seed, docker_containers=docker_containers) - - # False - self._clf = Lasso( - alpha=self.hyperparams['alpha'], - fit_intercept=self.hyperparams['fit_intercept'], - normalize=self.hyperparams['normalize'], - precompute=self.hyperparams['precompute'], - max_iter=self.hyperparams['max_iter'], - tol=self.hyperparams['tol'], - warm_start=self.hyperparams['warm_start'], - positive=self.hyperparams['positive'], - selection=self.hyperparams['selection'], - random_state=self.random_seed, - ) - - self._inputs = None - self._outputs = None - self._training_inputs = None - self._training_outputs = None - self._target_names = None - self._training_indices = None - self._target_column_indices = None - self._target_columns_metadata: List[OrderedDict] = None - self._input_column_names = None - self._fitted = False - self._new_training_data = False - - def set_training_data(self, *, inputs: Inputs, outputs: Outputs) -> None: - self._inputs = inputs - self._outputs = outputs - self._fitted = False - self._new_training_data = True - - def fit(self, *, timeout: float = None, iterations: int = None) -> CallResult[None]: - if self._inputs is None or self._outputs is None: - raise ValueError("Missing training data.") - - if not self._new_training_data: - return CallResult(None) - self._new_training_data = False - - self._training_inputs, self._training_indices = self._get_columns_to_fit(self._inputs, self.hyperparams) - self._training_outputs, self._target_names, self._target_column_indices = self._get_targets(self._outputs, self.hyperparams) - self._input_column_names = self._training_inputs.columns - - if len(self._training_indices) > 0 and len(self._target_column_indices) > 0: - self._target_columns_metadata = self._get_target_columns_metadata(self._training_outputs.metadata, self.hyperparams) - sk_training_output = self._training_outputs.values - - shape = sk_training_output.shape - if len(shape) == 2 and shape[1] == 1: - sk_training_output = numpy.ravel(sk_training_output) - - self._clf.fit(self._training_inputs, sk_training_output) - self._fitted = True - else: - if self.hyperparams['error_on_no_input']: - raise RuntimeError("No input columns were selected") - self.logger.warn("No input columns were selected") - - return CallResult(None) - - - - def produce(self, *, inputs: Inputs, timeout: float = None, iterations: int = None) -> CallResult[Outputs]: - sk_inputs, columns_to_use = self._get_columns_to_fit(inputs, self.hyperparams) - output = [] - if len(sk_inputs.columns): - try: - sk_output = self._clf.predict(sk_inputs) - except sklearn.exceptions.NotFittedError as error: - raise PrimitiveNotFittedError("Primitive not fitted.") from error - # For primitives that allow predicting without fitting like GaussianProcessRegressor - if not self._fitted: - raise PrimitiveNotFittedError("Primitive not fitted.") - if sparse.issparse(sk_output): - sk_output = sk_output.toarray() - output = self._wrap_predictions(inputs, sk_output) - output.columns = self._target_names - output = [output] - else: - if self.hyperparams['error_on_no_input']: - raise RuntimeError("No input columns were selected") - self.logger.warn("No input columns were selected") - outputs = base_utils.combine_columns(return_result=self.hyperparams['return_result'], - add_index_columns=self.hyperparams['add_index_columns'], - inputs=inputs, column_indices=self._target_column_indices, - columns_list=output) - - return CallResult(outputs) - - - def get_params(self) -> Params: - if not self._fitted: - return Params( - coef_=None, - intercept_=None, - n_iter_=None, - dual_gap_=None, - l1_ratio=None, - input_column_names=self._input_column_names, - training_indices_=self._training_indices, - target_names_=self._target_names, - target_column_indices_=self._target_column_indices, - target_columns_metadata_=self._target_columns_metadata - ) - - return Params( - coef_=getattr(self._clf, 'coef_', None), - intercept_=getattr(self._clf, 'intercept_', None), - n_iter_=getattr(self._clf, 'n_iter_', None), - dual_gap_=getattr(self._clf, 'dual_gap_', None), - l1_ratio=getattr(self._clf, 'l1_ratio', None), - input_column_names=self._input_column_names, - training_indices_=self._training_indices, - target_names_=self._target_names, - target_column_indices_=self._target_column_indices, - target_columns_metadata_=self._target_columns_metadata - ) - - def set_params(self, *, params: Params) -> None: - self._clf.coef_ = params['coef_'] - self._clf.intercept_ = params['intercept_'] - self._clf.n_iter_ = params['n_iter_'] - self._clf.dual_gap_ = params['dual_gap_'] - self._clf.l1_ratio = params['l1_ratio'] - self._input_column_names = params['input_column_names'] - self._training_indices = params['training_indices_'] - self._target_names = params['target_names_'] - self._target_column_indices = params['target_column_indices_'] - self._target_columns_metadata = params['target_columns_metadata_'] - - if params['coef_'] is not None: - self._fitted = True - if params['intercept_'] is not None: - self._fitted = True - if params['n_iter_'] is not None: - self._fitted = True - if params['dual_gap_'] is not None: - self._fitted = True - if params['l1_ratio'] is not None: - self._fitted = True - - - - - - - - @classmethod - def _get_columns_to_fit(cls, inputs: Inputs, hyperparams: Hyperparams): - if not hyperparams['use_semantic_types']: - return inputs, list(range(len(inputs.columns))) - - inputs_metadata = inputs.metadata - - def can_produce_column(column_index: int) -> bool: - return cls._can_produce_column(inputs_metadata, column_index, hyperparams) - - columns_to_produce, columns_not_to_produce = base_utils.get_columns_to_use(inputs_metadata, - use_columns=hyperparams['use_inputs_columns'], - exclude_columns=hyperparams['exclude_inputs_columns'], - can_use_column=can_produce_column) - return inputs.iloc[:, columns_to_produce], columns_to_produce - # return columns_to_produce - - @classmethod - def _can_produce_column(cls, inputs_metadata: metadata_base.DataMetadata, column_index: int, hyperparams: Hyperparams) -> bool: - column_metadata = inputs_metadata.query((metadata_base.ALL_ELEMENTS, column_index)) - - accepted_structural_types = (int, float, numpy.integer, numpy.float64) - accepted_semantic_types = set() - accepted_semantic_types.add("https://metadata.datadrivendiscovery.org/types/Attribute") - if not issubclass(column_metadata['structural_type'], accepted_structural_types): - return False - - semantic_types = set(column_metadata.get('semantic_types', [])) - - if len(semantic_types) == 0: - cls.logger.warning("No semantic types found in column metadata") - return False - # Making sure all accepted_semantic_types are available in semantic_types - if len(accepted_semantic_types - semantic_types) == 0: - return True - - return False - - @classmethod - def _get_targets(cls, data: d3m_dataframe, hyperparams: Hyperparams): - if not hyperparams['use_semantic_types']: - return data, list(data.columns), list(range(len(data.columns))) - - metadata = data.metadata - - def can_produce_column(column_index: int) -> bool: - accepted_semantic_types = set() - accepted_semantic_types.add("https://metadata.datadrivendiscovery.org/types/TrueTarget") - column_metadata = metadata.query((metadata_base.ALL_ELEMENTS, column_index)) - semantic_types = set(column_metadata.get('semantic_types', [])) - if len(semantic_types) == 0: - cls.logger.warning("No semantic types found in column metadata") - return False - # Making sure all accepted_semantic_types are available in semantic_types - if len(accepted_semantic_types - semantic_types) == 0: - return True - return False - - target_column_indices, target_columns_not_to_produce = base_utils.get_columns_to_use(metadata, - use_columns=hyperparams[ - 'use_outputs_columns'], - exclude_columns= - hyperparams[ - 'exclude_outputs_columns'], - can_use_column=can_produce_column) - targets = [] - if target_column_indices: - targets = data.select_columns(target_column_indices) - target_column_names = [] - for idx in target_column_indices: - target_column_names.append(data.columns[idx]) - return targets, target_column_names, target_column_indices - - @classmethod - def _get_target_columns_metadata(cls, outputs_metadata: metadata_base.DataMetadata, hyperparams) -> List[OrderedDict]: - outputs_length = outputs_metadata.query((metadata_base.ALL_ELEMENTS,))['dimension']['length'] - - target_columns_metadata: List[OrderedDict] = [] - for column_index in range(outputs_length): - column_metadata = OrderedDict(outputs_metadata.query_column(column_index)) - - # Update semantic types and prepare it for predicted targets. - semantic_types = set(column_metadata.get('semantic_types', [])) - semantic_types_to_remove = set(["https://metadata.datadrivendiscovery.org/types/TrueTarget","https://metadata.datadrivendiscovery.org/types/SuggestedTarget",]) - add_semantic_types = set(["https://metadata.datadrivendiscovery.org/types/PredictedTarget",]) - add_semantic_types.add(hyperparams["return_semantic_type"]) - semantic_types = semantic_types - semantic_types_to_remove - semantic_types = semantic_types.union(add_semantic_types) - column_metadata['semantic_types'] = list(semantic_types) - - target_columns_metadata.append(column_metadata) - - return target_columns_metadata - - @classmethod - def _update_predictions_metadata(cls, inputs_metadata: metadata_base.DataMetadata, outputs: Optional[Outputs], - target_columns_metadata: List[OrderedDict]) -> metadata_base.DataMetadata: - outputs_metadata = metadata_base.DataMetadata().generate(value=outputs) - - for column_index, column_metadata in enumerate(target_columns_metadata): - column_metadata.pop("structural_type", None) - outputs_metadata = outputs_metadata.update_column(column_index, column_metadata) - - return outputs_metadata - - def _wrap_predictions(self, inputs: Inputs, predictions: ndarray) -> Outputs: - outputs = d3m_dataframe(predictions, generate_metadata=False) - outputs.metadata = self._update_predictions_metadata(inputs.metadata, outputs, self._target_columns_metadata) - return outputs - - - @classmethod - def _add_target_columns_metadata(cls, outputs_metadata: metadata_base.DataMetadata): - outputs_length = outputs_metadata.query((metadata_base.ALL_ELEMENTS,))['dimension']['length'] - - target_columns_metadata: List[OrderedDict] = [] - for column_index in range(outputs_length): - column_metadata = OrderedDict() - semantic_types = [] - semantic_types.append('https://metadata.datadrivendiscovery.org/types/PredictedTarget') - column_name = outputs_metadata.query((metadata_base.ALL_ELEMENTS, column_index)).get("name") - if column_name is None: - column_name = "output_{}".format(column_index) - column_metadata["semantic_types"] = semantic_types - column_metadata["name"] = str(column_name) - target_columns_metadata.append(column_metadata) - - return target_columns_metadata - - -SKLasso.__doc__ = Lasso.__doc__ \ No newline at end of file diff --git a/common-primitives/sklearn-wrap/sklearn_wrap/SKLassoCV.py b/common-primitives/sklearn-wrap/sklearn_wrap/SKLassoCV.py deleted file mode 100644 index 5c53829..0000000 --- a/common-primitives/sklearn-wrap/sklearn_wrap/SKLassoCV.py +++ /dev/null @@ -1,526 +0,0 @@ -from typing import Any, Callable, List, Dict, Union, Optional, Sequence, Tuple -from numpy import ndarray -from collections import OrderedDict -from scipy import sparse -import os -import sklearn -import numpy -import typing - -# Custom import commands if any -from sklearn.linear_model.coordinate_descent import LassoCV - - -from d3m.container.numpy import ndarray as d3m_ndarray -from d3m.container import DataFrame as d3m_dataframe -from d3m.metadata import hyperparams, params, base as metadata_base -from d3m import utils -from d3m.base import utils as base_utils -from d3m.exceptions import PrimitiveNotFittedError -from d3m.primitive_interfaces.base import CallResult, DockerContainer - -from d3m.primitive_interfaces.supervised_learning import SupervisedLearnerPrimitiveBase -from d3m.primitive_interfaces.base import ProbabilisticCompositionalityMixin, ContinueFitMixin -from d3m import exceptions -import pandas - - - -Inputs = d3m_dataframe -Outputs = d3m_dataframe - - -class Params(params.Params): - alpha_: Optional[float] - coef_: Optional[ndarray] - intercept_: Optional[float] - mse_path_: Optional[ndarray] - alphas_: Optional[ndarray] - dual_gap_: Optional[float] - n_iter_: Optional[int] - input_column_names: Optional[Any] - target_names_: Optional[Sequence[Any]] - training_indices_: Optional[Sequence[int]] - target_column_indices_: Optional[Sequence[int]] - target_columns_metadata_: Optional[List[OrderedDict]] - - - -class Hyperparams(hyperparams.Hyperparams): - eps = hyperparams.Bounded[float]( - default=0.001, - lower=0, - upper=None, - description='Length of the path. ``eps=1e-3`` means that ``alpha_min / alpha_max = 1e-3``.', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] - ) - n_alphas = hyperparams.Bounded[int]( - default=100, - lower=0, - upper=None, - description='Number of alphas along the regularization path', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] - ) - fit_intercept = hyperparams.UniformBool( - default=True, - description='whether to calculate the intercept for this model. If set to false, no intercept will be used in calculations (e.g. data is expected to be already centered).', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] - ) - normalize = hyperparams.UniformBool( - default=False, - description='This parameter is ignored when ``fit_intercept`` is set to False. If True, the regressors X will be normalized before regression by subtracting the mean and dividing by the l2-norm. If you wish to standardize, please use :class:`sklearn.preprocessing.StandardScaler` before calling ``fit`` on an estimator with ``normalize=False``.', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] - ) - precompute = hyperparams.Union( - configuration=OrderedDict({ - 'auto': hyperparams.Constant( - default='auto', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'], - ), - 'bool': hyperparams.UniformBool( - default=False, - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'], - ) - }), - default='auto', - description='Whether to use a precomputed Gram matrix to speed up calculations. If set to ``\'auto\'`` let us decide. The Gram matrix can also be passed as argument.', - semantic_types=['https://metadata.datadrivendiscovery.org/types/ResourcesUseParameter'] - ) - max_iter = hyperparams.Bounded[int]( - default=1000, - lower=0, - upper=None, - description='The maximum number of iterations', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] - ) - tol = hyperparams.Bounded[float]( - default=0.0001, - lower=0, - upper=None, - description='The tolerance for the optimization: if the updates are smaller than ``tol``, the optimization code checks the dual gap for optimality and continues until it is smaller than ``tol``. copy_X : boolean, optional, default True If ``True``, X will be copied; else, it may be overwritten.', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] - ) - cv = hyperparams.Union( - configuration=OrderedDict({ - 'int': hyperparams.Bounded[int]( - default=5, - lower=0, - upper=None, - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'], - ), - 'none': hyperparams.Constant( - default=None, - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'], - ) - }), - default='int', - description='Determines the cross-validation splitting strategy. Possible inputs for cv are: - None, to use the default 3-fold cross-validation, - integer, to specify the number of folds. - An object to be used as a cross-validation generator. - An iterable yielding train/test splits. For integer/None inputs, :class:`KFold` is used. Refer :ref:`User Guide ` for the various cross-validation strategies that can be used here.', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] - ) - n_jobs = hyperparams.Union( - configuration=OrderedDict({ - 'limit': hyperparams.Bounded[int]( - default=1, - lower=1, - upper=None, - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'], - ), - 'all_cores': hyperparams.Constant( - default=-1, - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'], - ) - }), - default='limit', - description='Number of CPUs to use during the cross validation. If ``-1``, use all the CPUs.', - semantic_types=['https://metadata.datadrivendiscovery.org/types/ResourcesUseParameter'] - ) - positive = hyperparams.UniformBool( - default=False, - description='If positive, restrict regression coefficients to be positive', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] - ) - selection = hyperparams.Enumeration[str]( - default='cyclic', - values=['cyclic', 'random'], - description='If set to \'random\', a random coefficient is updated every iteration rather than looping over features sequentially by default. This (setting to \'random\') often leads to significantly faster convergence especially when tol is higher than 1e-4.', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] - ) - - use_inputs_columns = hyperparams.Set( - elements=hyperparams.Hyperparameter[int](-1), - default=(), - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="A set of column indices to force primitive to use as training input. If any specified column cannot be parsed, it is skipped.", - ) - use_outputs_columns = hyperparams.Set( - elements=hyperparams.Hyperparameter[int](-1), - default=(), - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="A set of column indices to force primitive to use as training target. If any specified column cannot be parsed, it is skipped.", - ) - exclude_inputs_columns = hyperparams.Set( - elements=hyperparams.Hyperparameter[int](-1), - default=(), - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="A set of column indices to not use as training inputs. Applicable only if \"use_columns\" is not provided.", - ) - exclude_outputs_columns = hyperparams.Set( - elements=hyperparams.Hyperparameter[int](-1), - default=(), - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="A set of column indices to not use as training target. Applicable only if \"use_columns\" is not provided.", - ) - return_result = hyperparams.Enumeration( - values=['append', 'replace', 'new'], - default='new', - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="Should parsed columns be appended, should they replace original columns, or should only parsed columns be returned? This hyperparam is ignored if use_semantic_types is set to false.", - ) - use_semantic_types = hyperparams.UniformBool( - default=False, - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="Controls whether semantic_types metadata will be used for filtering columns in input dataframe. Setting this to false makes the code ignore return_result and will produce only the output dataframe" - ) - add_index_columns = hyperparams.UniformBool( - default=False, - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="Also include primary index columns if input data has them. Applicable only if \"return_result\" is set to \"new\".", - ) - error_on_no_input = hyperparams.UniformBool( - default=True, - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="Throw an exception if no input column is selected/provided. Defaults to true to behave like sklearn. To prevent pipelines from breaking set this to False.", - ) - - return_semantic_type = hyperparams.Enumeration[str]( - values=['https://metadata.datadrivendiscovery.org/types/Attribute', 'https://metadata.datadrivendiscovery.org/types/ConstructedAttribute', 'https://metadata.datadrivendiscovery.org/types/PredictedTarget'], - default='https://metadata.datadrivendiscovery.org/types/PredictedTarget', - description='Decides what semantic type to attach to generated output', - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'] - ) - -class SKLassoCV(SupervisedLearnerPrimitiveBase[Inputs, Outputs, Params, Hyperparams]): - """ - Primitive wrapping for sklearn LassoCV - `sklearn documentation `_ - - """ - - __author__ = "JPL MARVIN" - metadata = metadata_base.PrimitiveMetadata({ - "algorithm_types": [metadata_base.PrimitiveAlgorithmType.LASSO, ], - "name": "sklearn.linear_model.coordinate_descent.LassoCV", - "primitive_family": metadata_base.PrimitiveFamily.REGRESSION, - "python_path": "d3m.primitives.regression.lasso_cv.SKlearn", - "source": {'name': 'JPL', 'contact': 'mailto:shah@jpl.nasa.gov', 'uris': ['https://gitlab.com/datadrivendiscovery/sklearn-wrap/issues', 'https://scikit-learn.org/stable/modules/generated/sklearn.linear_model.LassoCV.html']}, - "version": "2019.11.13", - "id": "cfd0482b-d639-3d2b-b876-87f25277a088", - "hyperparams_to_tune": ['eps', 'max_iter'], - 'installation': [ - {'type': metadata_base.PrimitiveInstallationType.PIP, - 'package_uri': 'git+https://gitlab.com/datadrivendiscovery/sklearn-wrap.git@{git_commit}#egg=sklearn_wrap'.format( - git_commit=utils.current_git_commit(os.path.dirname(__file__)), - ), - }] - }) - - def __init__(self, *, - hyperparams: Hyperparams, - random_seed: int = 0, - docker_containers: Dict[str, DockerContainer] = None, - _verbose: int = 0) -> None: - - super().__init__(hyperparams=hyperparams, random_seed=random_seed, docker_containers=docker_containers) - - # False - self._clf = LassoCV( - eps=self.hyperparams['eps'], - n_alphas=self.hyperparams['n_alphas'], - fit_intercept=self.hyperparams['fit_intercept'], - normalize=self.hyperparams['normalize'], - precompute=self.hyperparams['precompute'], - max_iter=self.hyperparams['max_iter'], - tol=self.hyperparams['tol'], - cv=self.hyperparams['cv'], - n_jobs=self.hyperparams['n_jobs'], - positive=self.hyperparams['positive'], - selection=self.hyperparams['selection'], - verbose=_verbose, - random_state=self.random_seed, - ) - - self._inputs = None - self._outputs = None - self._training_inputs = None - self._training_outputs = None - self._target_names = None - self._training_indices = None - self._target_column_indices = None - self._target_columns_metadata: List[OrderedDict] = None - self._input_column_names = None - self._fitted = False - self._new_training_data = False - - def set_training_data(self, *, inputs: Inputs, outputs: Outputs) -> None: - self._inputs = inputs - self._outputs = outputs - self._fitted = False - self._new_training_data = True - - def fit(self, *, timeout: float = None, iterations: int = None) -> CallResult[None]: - if self._inputs is None or self._outputs is None: - raise ValueError("Missing training data.") - - if not self._new_training_data: - return CallResult(None) - self._new_training_data = False - - self._training_inputs, self._training_indices = self._get_columns_to_fit(self._inputs, self.hyperparams) - self._training_outputs, self._target_names, self._target_column_indices = self._get_targets(self._outputs, self.hyperparams) - self._input_column_names = self._training_inputs.columns - - if len(self._training_indices) > 0 and len(self._target_column_indices) > 0: - self._target_columns_metadata = self._get_target_columns_metadata(self._training_outputs.metadata, self.hyperparams) - sk_training_output = self._training_outputs.values - - shape = sk_training_output.shape - if len(shape) == 2 and shape[1] == 1: - sk_training_output = numpy.ravel(sk_training_output) - - self._clf.fit(self._training_inputs, sk_training_output) - self._fitted = True - else: - if self.hyperparams['error_on_no_input']: - raise RuntimeError("No input columns were selected") - self.logger.warn("No input columns were selected") - - return CallResult(None) - - - - def produce(self, *, inputs: Inputs, timeout: float = None, iterations: int = None) -> CallResult[Outputs]: - sk_inputs, columns_to_use = self._get_columns_to_fit(inputs, self.hyperparams) - output = [] - if len(sk_inputs.columns): - try: - sk_output = self._clf.predict(sk_inputs) - except sklearn.exceptions.NotFittedError as error: - raise PrimitiveNotFittedError("Primitive not fitted.") from error - # For primitives that allow predicting without fitting like GaussianProcessRegressor - if not self._fitted: - raise PrimitiveNotFittedError("Primitive not fitted.") - if sparse.issparse(sk_output): - sk_output = sk_output.toarray() - output = self._wrap_predictions(inputs, sk_output) - output.columns = self._target_names - output = [output] - else: - if self.hyperparams['error_on_no_input']: - raise RuntimeError("No input columns were selected") - self.logger.warn("No input columns were selected") - outputs = base_utils.combine_columns(return_result=self.hyperparams['return_result'], - add_index_columns=self.hyperparams['add_index_columns'], - inputs=inputs, column_indices=self._target_column_indices, - columns_list=output) - - return CallResult(outputs) - - - def get_params(self) -> Params: - if not self._fitted: - return Params( - alpha_=None, - coef_=None, - intercept_=None, - mse_path_=None, - alphas_=None, - dual_gap_=None, - n_iter_=None, - input_column_names=self._input_column_names, - training_indices_=self._training_indices, - target_names_=self._target_names, - target_column_indices_=self._target_column_indices, - target_columns_metadata_=self._target_columns_metadata - ) - - return Params( - alpha_=getattr(self._clf, 'alpha_', None), - coef_=getattr(self._clf, 'coef_', None), - intercept_=getattr(self._clf, 'intercept_', None), - mse_path_=getattr(self._clf, 'mse_path_', None), - alphas_=getattr(self._clf, 'alphas_', None), - dual_gap_=getattr(self._clf, 'dual_gap_', None), - n_iter_=getattr(self._clf, 'n_iter_', None), - input_column_names=self._input_column_names, - training_indices_=self._training_indices, - target_names_=self._target_names, - target_column_indices_=self._target_column_indices, - target_columns_metadata_=self._target_columns_metadata - ) - - def set_params(self, *, params: Params) -> None: - self._clf.alpha_ = params['alpha_'] - self._clf.coef_ = params['coef_'] - self._clf.intercept_ = params['intercept_'] - self._clf.mse_path_ = params['mse_path_'] - self._clf.alphas_ = params['alphas_'] - self._clf.dual_gap_ = params['dual_gap_'] - self._clf.n_iter_ = params['n_iter_'] - self._input_column_names = params['input_column_names'] - self._training_indices = params['training_indices_'] - self._target_names = params['target_names_'] - self._target_column_indices = params['target_column_indices_'] - self._target_columns_metadata = params['target_columns_metadata_'] - - if params['alpha_'] is not None: - self._fitted = True - if params['coef_'] is not None: - self._fitted = True - if params['intercept_'] is not None: - self._fitted = True - if params['mse_path_'] is not None: - self._fitted = True - if params['alphas_'] is not None: - self._fitted = True - if params['dual_gap_'] is not None: - self._fitted = True - if params['n_iter_'] is not None: - self._fitted = True - - - - - - - - @classmethod - def _get_columns_to_fit(cls, inputs: Inputs, hyperparams: Hyperparams): - if not hyperparams['use_semantic_types']: - return inputs, list(range(len(inputs.columns))) - - inputs_metadata = inputs.metadata - - def can_produce_column(column_index: int) -> bool: - return cls._can_produce_column(inputs_metadata, column_index, hyperparams) - - columns_to_produce, columns_not_to_produce = base_utils.get_columns_to_use(inputs_metadata, - use_columns=hyperparams['use_inputs_columns'], - exclude_columns=hyperparams['exclude_inputs_columns'], - can_use_column=can_produce_column) - return inputs.iloc[:, columns_to_produce], columns_to_produce - # return columns_to_produce - - @classmethod - def _can_produce_column(cls, inputs_metadata: metadata_base.DataMetadata, column_index: int, hyperparams: Hyperparams) -> bool: - column_metadata = inputs_metadata.query((metadata_base.ALL_ELEMENTS, column_index)) - - accepted_structural_types = (int, float, numpy.integer, numpy.float64) - accepted_semantic_types = set() - accepted_semantic_types.add("https://metadata.datadrivendiscovery.org/types/Attribute") - if not issubclass(column_metadata['structural_type'], accepted_structural_types): - return False - - semantic_types = set(column_metadata.get('semantic_types', [])) - - if len(semantic_types) == 0: - cls.logger.warning("No semantic types found in column metadata") - return False - # Making sure all accepted_semantic_types are available in semantic_types - if len(accepted_semantic_types - semantic_types) == 0: - return True - - return False - - @classmethod - def _get_targets(cls, data: d3m_dataframe, hyperparams: Hyperparams): - if not hyperparams['use_semantic_types']: - return data, list(data.columns), list(range(len(data.columns))) - - metadata = data.metadata - - def can_produce_column(column_index: int) -> bool: - accepted_semantic_types = set() - accepted_semantic_types.add("https://metadata.datadrivendiscovery.org/types/TrueTarget") - column_metadata = metadata.query((metadata_base.ALL_ELEMENTS, column_index)) - semantic_types = set(column_metadata.get('semantic_types', [])) - if len(semantic_types) == 0: - cls.logger.warning("No semantic types found in column metadata") - return False - # Making sure all accepted_semantic_types are available in semantic_types - if len(accepted_semantic_types - semantic_types) == 0: - return True - return False - - target_column_indices, target_columns_not_to_produce = base_utils.get_columns_to_use(metadata, - use_columns=hyperparams[ - 'use_outputs_columns'], - exclude_columns= - hyperparams[ - 'exclude_outputs_columns'], - can_use_column=can_produce_column) - targets = [] - if target_column_indices: - targets = data.select_columns(target_column_indices) - target_column_names = [] - for idx in target_column_indices: - target_column_names.append(data.columns[idx]) - return targets, target_column_names, target_column_indices - - @classmethod - def _get_target_columns_metadata(cls, outputs_metadata: metadata_base.DataMetadata, hyperparams) -> List[OrderedDict]: - outputs_length = outputs_metadata.query((metadata_base.ALL_ELEMENTS,))['dimension']['length'] - - target_columns_metadata: List[OrderedDict] = [] - for column_index in range(outputs_length): - column_metadata = OrderedDict(outputs_metadata.query_column(column_index)) - - # Update semantic types and prepare it for predicted targets. - semantic_types = set(column_metadata.get('semantic_types', [])) - semantic_types_to_remove = set(["https://metadata.datadrivendiscovery.org/types/TrueTarget","https://metadata.datadrivendiscovery.org/types/SuggestedTarget",]) - add_semantic_types = set(["https://metadata.datadrivendiscovery.org/types/PredictedTarget",]) - add_semantic_types.add(hyperparams["return_semantic_type"]) - semantic_types = semantic_types - semantic_types_to_remove - semantic_types = semantic_types.union(add_semantic_types) - column_metadata['semantic_types'] = list(semantic_types) - - target_columns_metadata.append(column_metadata) - - return target_columns_metadata - - @classmethod - def _update_predictions_metadata(cls, inputs_metadata: metadata_base.DataMetadata, outputs: Optional[Outputs], - target_columns_metadata: List[OrderedDict]) -> metadata_base.DataMetadata: - outputs_metadata = metadata_base.DataMetadata().generate(value=outputs) - - for column_index, column_metadata in enumerate(target_columns_metadata): - column_metadata.pop("structural_type", None) - outputs_metadata = outputs_metadata.update_column(column_index, column_metadata) - - return outputs_metadata - - def _wrap_predictions(self, inputs: Inputs, predictions: ndarray) -> Outputs: - outputs = d3m_dataframe(predictions, generate_metadata=False) - outputs.metadata = self._update_predictions_metadata(inputs.metadata, outputs, self._target_columns_metadata) - return outputs - - - @classmethod - def _add_target_columns_metadata(cls, outputs_metadata: metadata_base.DataMetadata): - outputs_length = outputs_metadata.query((metadata_base.ALL_ELEMENTS,))['dimension']['length'] - - target_columns_metadata: List[OrderedDict] = [] - for column_index in range(outputs_length): - column_metadata = OrderedDict() - semantic_types = [] - semantic_types.append('https://metadata.datadrivendiscovery.org/types/PredictedTarget') - column_name = outputs_metadata.query((metadata_base.ALL_ELEMENTS, column_index)).get("name") - if column_name is None: - column_name = "output_{}".format(column_index) - column_metadata["semantic_types"] = semantic_types - column_metadata["name"] = str(column_name) - target_columns_metadata.append(column_metadata) - - return target_columns_metadata - - -SKLassoCV.__doc__ = LassoCV.__doc__ \ No newline at end of file diff --git a/common-primitives/sklearn-wrap/sklearn_wrap/SKLinearDiscriminantAnalysis.py b/common-primitives/sklearn-wrap/sklearn_wrap/SKLinearDiscriminantAnalysis.py deleted file mode 100644 index b574279..0000000 --- a/common-primitives/sklearn-wrap/sklearn_wrap/SKLinearDiscriminantAnalysis.py +++ /dev/null @@ -1,535 +0,0 @@ -from typing import Any, Callable, List, Dict, Union, Optional, Sequence, Tuple -from numpy import ndarray -from collections import OrderedDict -from scipy import sparse -import os -import sklearn -import numpy -import typing - -# Custom import commands if any -from sklearn.discriminant_analysis import LinearDiscriminantAnalysis - - -from d3m.container.numpy import ndarray as d3m_ndarray -from d3m.container import DataFrame as d3m_dataframe -from d3m.metadata import hyperparams, params, base as metadata_base -from d3m import utils -from d3m.base import utils as base_utils -from d3m.exceptions import PrimitiveNotFittedError -from d3m.primitive_interfaces.base import CallResult, DockerContainer - -from d3m.primitive_interfaces.supervised_learning import SupervisedLearnerPrimitiveBase -from d3m.primitive_interfaces.base import ProbabilisticCompositionalityMixin, ContinueFitMixin -from d3m import exceptions -import pandas - - - -Inputs = d3m_dataframe -Outputs = d3m_dataframe - - -class Params(params.Params): - coef_: Optional[ndarray] - intercept_: Optional[ndarray] - covariance_: Optional[ndarray] - explained_variance_ratio_: Optional[ndarray] - means_: Optional[ndarray] - priors_: Optional[ndarray] - scalings_: Optional[ndarray] - xbar_: Optional[ndarray] - classes_: Optional[ndarray] - _max_components: Optional[int] - input_column_names: Optional[Any] - target_names_: Optional[Sequence[Any]] - training_indices_: Optional[Sequence[int]] - target_column_indices_: Optional[Sequence[int]] - target_columns_metadata_: Optional[List[OrderedDict]] - - - -class Hyperparams(hyperparams.Hyperparams): - solver = hyperparams.Enumeration[str]( - default='svd', - values=['svd', 'lsqr', 'eigen'], - description='Solver to use, possible values: - \'svd\': Singular value decomposition (default). Does not compute the covariance matrix, therefore this solver is recommended for data with a large number of features. - \'lsqr\': Least squares solution, can be combined with shrinkage. - \'eigen\': Eigenvalue decomposition, can be combined with shrinkage.', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] - ) - shrinkage = hyperparams.Union( - configuration=OrderedDict({ - 'string': hyperparams.Constant( - default='auto', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'], - ), - 'float': hyperparams.Bounded[float]( - default=0, - lower=0, - upper=1, - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'], - ), - 'none': hyperparams.Constant( - default=None, - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'], - ) - }), - default='none', - description='Shrinkage parameter, possible values: - None: no shrinkage (default). - \'auto\': automatic shrinkage using the Ledoit-Wolf lemma. - float between 0 and 1: fixed shrinkage parameter. Note that shrinkage works only with \'lsqr\' and \'eigen\' solvers.', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] - ) - n_components = hyperparams.Union( - configuration=OrderedDict({ - 'int': hyperparams.Bounded[int]( - default=0, - lower=0, - upper=None, - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'], - ), - 'none': hyperparams.Constant( - default=None, - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'], - ) - }), - default='none', - description='Number of components (< n_classes - 1) for dimensionality reduction.', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] - ) - tol = hyperparams.Bounded[float]( - default=0.0001, - lower=0, - upper=None, - description='Threshold used for rank estimation in SVD solver. .. versionadded:: 0.17', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] - ) - - use_inputs_columns = hyperparams.Set( - elements=hyperparams.Hyperparameter[int](-1), - default=(), - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="A set of column indices to force primitive to use as training input. If any specified column cannot be parsed, it is skipped.", - ) - use_outputs_columns = hyperparams.Set( - elements=hyperparams.Hyperparameter[int](-1), - default=(), - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="A set of column indices to force primitive to use as training target. If any specified column cannot be parsed, it is skipped.", - ) - exclude_inputs_columns = hyperparams.Set( - elements=hyperparams.Hyperparameter[int](-1), - default=(), - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="A set of column indices to not use as training inputs. Applicable only if \"use_columns\" is not provided.", - ) - exclude_outputs_columns = hyperparams.Set( - elements=hyperparams.Hyperparameter[int](-1), - default=(), - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="A set of column indices to not use as training target. Applicable only if \"use_columns\" is not provided.", - ) - return_result = hyperparams.Enumeration( - values=['append', 'replace', 'new'], - default='new', - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="Should parsed columns be appended, should they replace original columns, or should only parsed columns be returned? This hyperparam is ignored if use_semantic_types is set to false.", - ) - use_semantic_types = hyperparams.UniformBool( - default=False, - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="Controls whether semantic_types metadata will be used for filtering columns in input dataframe. Setting this to false makes the code ignore return_result and will produce only the output dataframe" - ) - add_index_columns = hyperparams.UniformBool( - default=False, - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="Also include primary index columns if input data has them. Applicable only if \"return_result\" is set to \"new\".", - ) - error_on_no_input = hyperparams.UniformBool( - default=True, - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="Throw an exception if no input column is selected/provided. Defaults to true to behave like sklearn. To prevent pipelines from breaking set this to False.", - ) - - return_semantic_type = hyperparams.Enumeration[str]( - values=['https://metadata.datadrivendiscovery.org/types/Attribute', 'https://metadata.datadrivendiscovery.org/types/ConstructedAttribute', 'https://metadata.datadrivendiscovery.org/types/PredictedTarget'], - default='https://metadata.datadrivendiscovery.org/types/PredictedTarget', - description='Decides what semantic type to attach to generated output', - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'] - ) - -class SKLinearDiscriminantAnalysis(SupervisedLearnerPrimitiveBase[Inputs, Outputs, Params, Hyperparams], - ProbabilisticCompositionalityMixin[Inputs, Outputs, Params, Hyperparams]): - """ - Primitive wrapping for sklearn LinearDiscriminantAnalysis - `sklearn documentation `_ - - """ - - __author__ = "JPL MARVIN" - metadata = metadata_base.PrimitiveMetadata({ - "algorithm_types": [metadata_base.PrimitiveAlgorithmType.LINEAR_DISCRIMINANT_ANALYSIS, ], - "name": "sklearn.discriminant_analysis.LinearDiscriminantAnalysis", - "primitive_family": metadata_base.PrimitiveFamily.CLASSIFICATION, - "python_path": "d3m.primitives.classification.linear_discriminant_analysis.SKlearn", - "source": {'name': 'JPL', 'contact': 'mailto:shah@jpl.nasa.gov', 'uris': ['https://gitlab.com/datadrivendiscovery/sklearn-wrap/issues', 'https://scikit-learn.org/stable/modules/generated/sklearn.discriminant_analysis.LinearDiscriminantAnalysis.html']}, - "version": "2019.11.13", - "id": "a323b46a-6c15-373e-91b4-20efbd65402f", - 'installation': [ - {'type': metadata_base.PrimitiveInstallationType.PIP, - 'package_uri': 'git+https://gitlab.com/datadrivendiscovery/sklearn-wrap.git@{git_commit}#egg=sklearn_wrap'.format( - git_commit=utils.current_git_commit(os.path.dirname(__file__)), - ), - }] - }) - - def __init__(self, *, - hyperparams: Hyperparams, - random_seed: int = 0, - docker_containers: Dict[str, DockerContainer] = None) -> None: - - super().__init__(hyperparams=hyperparams, random_seed=random_seed, docker_containers=docker_containers) - - # False - self._clf = LinearDiscriminantAnalysis( - solver=self.hyperparams['solver'], - shrinkage=self.hyperparams['shrinkage'], - n_components=self.hyperparams['n_components'], - tol=self.hyperparams['tol'], - ) - - self._inputs = None - self._outputs = None - self._training_inputs = None - self._training_outputs = None - self._target_names = None - self._training_indices = None - self._target_column_indices = None - self._target_columns_metadata: List[OrderedDict] = None - self._input_column_names = None - self._fitted = False - self._new_training_data = False - - def set_training_data(self, *, inputs: Inputs, outputs: Outputs) -> None: - self._inputs = inputs - self._outputs = outputs - self._fitted = False - self._new_training_data = True - - def fit(self, *, timeout: float = None, iterations: int = None) -> CallResult[None]: - if self._inputs is None or self._outputs is None: - raise ValueError("Missing training data.") - - if not self._new_training_data: - return CallResult(None) - self._new_training_data = False - - self._training_inputs, self._training_indices = self._get_columns_to_fit(self._inputs, self.hyperparams) - self._training_outputs, self._target_names, self._target_column_indices = self._get_targets(self._outputs, self.hyperparams) - self._input_column_names = self._training_inputs.columns - - if len(self._training_indices) > 0 and len(self._target_column_indices) > 0: - self._target_columns_metadata = self._get_target_columns_metadata(self._training_outputs.metadata, self.hyperparams) - sk_training_output = self._training_outputs.values - - shape = sk_training_output.shape - if len(shape) == 2 and shape[1] == 1: - sk_training_output = numpy.ravel(sk_training_output) - - self._clf.fit(self._training_inputs, sk_training_output) - self._fitted = True - else: - if self.hyperparams['error_on_no_input']: - raise RuntimeError("No input columns were selected") - self.logger.warn("No input columns were selected") - - return CallResult(None) - - - - def produce(self, *, inputs: Inputs, timeout: float = None, iterations: int = None) -> CallResult[Outputs]: - sk_inputs, columns_to_use = self._get_columns_to_fit(inputs, self.hyperparams) - output = [] - if len(sk_inputs.columns): - try: - sk_output = self._clf.predict(sk_inputs) - except sklearn.exceptions.NotFittedError as error: - raise PrimitiveNotFittedError("Primitive not fitted.") from error - # For primitives that allow predicting without fitting like GaussianProcessRegressor - if not self._fitted: - raise PrimitiveNotFittedError("Primitive not fitted.") - if sparse.issparse(sk_output): - sk_output = sk_output.toarray() - output = self._wrap_predictions(inputs, sk_output) - output.columns = self._target_names - output = [output] - else: - if self.hyperparams['error_on_no_input']: - raise RuntimeError("No input columns were selected") - self.logger.warn("No input columns were selected") - outputs = base_utils.combine_columns(return_result=self.hyperparams['return_result'], - add_index_columns=self.hyperparams['add_index_columns'], - inputs=inputs, column_indices=self._target_column_indices, - columns_list=output) - - return CallResult(outputs) - - - def get_params(self) -> Params: - if not self._fitted: - return Params( - coef_=None, - intercept_=None, - covariance_=None, - explained_variance_ratio_=None, - means_=None, - priors_=None, - scalings_=None, - xbar_=None, - classes_=None, - _max_components=None, - input_column_names=self._input_column_names, - training_indices_=self._training_indices, - target_names_=self._target_names, - target_column_indices_=self._target_column_indices, - target_columns_metadata_=self._target_columns_metadata - ) - - return Params( - coef_=getattr(self._clf, 'coef_', None), - intercept_=getattr(self._clf, 'intercept_', None), - covariance_=getattr(self._clf, 'covariance_', None), - explained_variance_ratio_=getattr(self._clf, 'explained_variance_ratio_', None), - means_=getattr(self._clf, 'means_', None), - priors_=getattr(self._clf, 'priors_', None), - scalings_=getattr(self._clf, 'scalings_', None), - xbar_=getattr(self._clf, 'xbar_', None), - classes_=getattr(self._clf, 'classes_', None), - _max_components=getattr(self._clf, '_max_components', None), - input_column_names=self._input_column_names, - training_indices_=self._training_indices, - target_names_=self._target_names, - target_column_indices_=self._target_column_indices, - target_columns_metadata_=self._target_columns_metadata - ) - - def set_params(self, *, params: Params) -> None: - self._clf.coef_ = params['coef_'] - self._clf.intercept_ = params['intercept_'] - self._clf.covariance_ = params['covariance_'] - self._clf.explained_variance_ratio_ = params['explained_variance_ratio_'] - self._clf.means_ = params['means_'] - self._clf.priors_ = params['priors_'] - self._clf.scalings_ = params['scalings_'] - self._clf.xbar_ = params['xbar_'] - self._clf.classes_ = params['classes_'] - self._clf._max_components = params['_max_components'] - self._input_column_names = params['input_column_names'] - self._training_indices = params['training_indices_'] - self._target_names = params['target_names_'] - self._target_column_indices = params['target_column_indices_'] - self._target_columns_metadata = params['target_columns_metadata_'] - - if params['coef_'] is not None: - self._fitted = True - if params['intercept_'] is not None: - self._fitted = True - if params['covariance_'] is not None: - self._fitted = True - if params['explained_variance_ratio_'] is not None: - self._fitted = True - if params['means_'] is not None: - self._fitted = True - if params['priors_'] is not None: - self._fitted = True - if params['scalings_'] is not None: - self._fitted = True - if params['xbar_'] is not None: - self._fitted = True - if params['classes_'] is not None: - self._fitted = True - if params['_max_components'] is not None: - self._fitted = True - - - def log_likelihoods(self, *, - outputs: Outputs, - inputs: Inputs, - timeout: float = None, - iterations: int = None) -> CallResult[Sequence[float]]: - inputs = inputs.iloc[:, self._training_indices] # Get ndarray - outputs = outputs.iloc[:, self._target_column_indices] - - if len(inputs.columns) and len(outputs.columns): - - if outputs.shape[1] != self._clf.n_outputs_: - raise exceptions.InvalidArgumentValueError("\"outputs\" argument does not have the correct number of target columns.") - - log_proba = self._clf.predict_log_proba(inputs) - - # Making it always a list, even when only one target. - if self._clf.n_outputs_ == 1: - log_proba = [log_proba] - classes = [self._clf.classes_] - else: - classes = self._clf.classes_ - - samples_length = inputs.shape[0] - - log_likelihoods = [] - for k in range(self._clf.n_outputs_): - # We have to map each class to its internal (numerical) index used in the learner. - # This allows "outputs" to contain string classes. - outputs_column = outputs.iloc[:, k] - classes_map = pandas.Series(numpy.arange(len(classes[k])), index=classes[k]) - mapped_outputs_column = outputs_column.map(classes_map) - - # For each target column (column in "outputs"), for each sample (row) we pick the log - # likelihood for a given class. - log_likelihoods.append(log_proba[k][numpy.arange(samples_length), mapped_outputs_column]) - - results = d3m_dataframe(dict(enumerate(log_likelihoods)), generate_metadata=True) - results.columns = outputs.columns - - for k in range(self._clf.n_outputs_): - column_metadata = outputs.metadata.query_column(k) - if 'name' in column_metadata: - results.metadata = results.metadata.update_column(k, {'name': column_metadata['name']}) - - else: - results = d3m_dataframe(generate_metadata=True) - - return CallResult(results) - - - - - - @classmethod - def _get_columns_to_fit(cls, inputs: Inputs, hyperparams: Hyperparams): - if not hyperparams['use_semantic_types']: - return inputs, list(range(len(inputs.columns))) - - inputs_metadata = inputs.metadata - - def can_produce_column(column_index: int) -> bool: - return cls._can_produce_column(inputs_metadata, column_index, hyperparams) - - columns_to_produce, columns_not_to_produce = base_utils.get_columns_to_use(inputs_metadata, - use_columns=hyperparams['use_inputs_columns'], - exclude_columns=hyperparams['exclude_inputs_columns'], - can_use_column=can_produce_column) - return inputs.iloc[:, columns_to_produce], columns_to_produce - # return columns_to_produce - - @classmethod - def _can_produce_column(cls, inputs_metadata: metadata_base.DataMetadata, column_index: int, hyperparams: Hyperparams) -> bool: - column_metadata = inputs_metadata.query((metadata_base.ALL_ELEMENTS, column_index)) - - accepted_structural_types = (int, float, numpy.integer, numpy.float64) - accepted_semantic_types = set() - accepted_semantic_types.add("https://metadata.datadrivendiscovery.org/types/Attribute") - if not issubclass(column_metadata['structural_type'], accepted_structural_types): - return False - - semantic_types = set(column_metadata.get('semantic_types', [])) - - if len(semantic_types) == 0: - cls.logger.warning("No semantic types found in column metadata") - return False - # Making sure all accepted_semantic_types are available in semantic_types - if len(accepted_semantic_types - semantic_types) == 0: - return True - - return False - - @classmethod - def _get_targets(cls, data: d3m_dataframe, hyperparams: Hyperparams): - if not hyperparams['use_semantic_types']: - return data, list(data.columns), list(range(len(data.columns))) - - metadata = data.metadata - - def can_produce_column(column_index: int) -> bool: - accepted_semantic_types = set() - accepted_semantic_types.add("https://metadata.datadrivendiscovery.org/types/TrueTarget") - column_metadata = metadata.query((metadata_base.ALL_ELEMENTS, column_index)) - semantic_types = set(column_metadata.get('semantic_types', [])) - if len(semantic_types) == 0: - cls.logger.warning("No semantic types found in column metadata") - return False - # Making sure all accepted_semantic_types are available in semantic_types - if len(accepted_semantic_types - semantic_types) == 0: - return True - return False - - target_column_indices, target_columns_not_to_produce = base_utils.get_columns_to_use(metadata, - use_columns=hyperparams[ - 'use_outputs_columns'], - exclude_columns= - hyperparams[ - 'exclude_outputs_columns'], - can_use_column=can_produce_column) - targets = [] - if target_column_indices: - targets = data.select_columns(target_column_indices) - target_column_names = [] - for idx in target_column_indices: - target_column_names.append(data.columns[idx]) - return targets, target_column_names, target_column_indices - - @classmethod - def _get_target_columns_metadata(cls, outputs_metadata: metadata_base.DataMetadata, hyperparams) -> List[OrderedDict]: - outputs_length = outputs_metadata.query((metadata_base.ALL_ELEMENTS,))['dimension']['length'] - - target_columns_metadata: List[OrderedDict] = [] - for column_index in range(outputs_length): - column_metadata = OrderedDict(outputs_metadata.query_column(column_index)) - - # Update semantic types and prepare it for predicted targets. - semantic_types = set(column_metadata.get('semantic_types', [])) - semantic_types_to_remove = set(["https://metadata.datadrivendiscovery.org/types/TrueTarget","https://metadata.datadrivendiscovery.org/types/SuggestedTarget",]) - add_semantic_types = set(["https://metadata.datadrivendiscovery.org/types/PredictedTarget",]) - add_semantic_types.add(hyperparams["return_semantic_type"]) - semantic_types = semantic_types - semantic_types_to_remove - semantic_types = semantic_types.union(add_semantic_types) - column_metadata['semantic_types'] = list(semantic_types) - - target_columns_metadata.append(column_metadata) - - return target_columns_metadata - - @classmethod - def _update_predictions_metadata(cls, inputs_metadata: metadata_base.DataMetadata, outputs: Optional[Outputs], - target_columns_metadata: List[OrderedDict]) -> metadata_base.DataMetadata: - outputs_metadata = metadata_base.DataMetadata().generate(value=outputs) - - for column_index, column_metadata in enumerate(target_columns_metadata): - column_metadata.pop("structural_type", None) - outputs_metadata = outputs_metadata.update_column(column_index, column_metadata) - - return outputs_metadata - - def _wrap_predictions(self, inputs: Inputs, predictions: ndarray) -> Outputs: - outputs = d3m_dataframe(predictions, generate_metadata=False) - outputs.metadata = self._update_predictions_metadata(inputs.metadata, outputs, self._target_columns_metadata) - return outputs - - - @classmethod - def _add_target_columns_metadata(cls, outputs_metadata: metadata_base.DataMetadata): - outputs_length = outputs_metadata.query((metadata_base.ALL_ELEMENTS,))['dimension']['length'] - - target_columns_metadata: List[OrderedDict] = [] - for column_index in range(outputs_length): - column_metadata = OrderedDict() - semantic_types = [] - semantic_types.append('https://metadata.datadrivendiscovery.org/types/PredictedTarget') - column_name = outputs_metadata.query((metadata_base.ALL_ELEMENTS, column_index)).get("name") - if column_name is None: - column_name = "output_{}".format(column_index) - column_metadata["semantic_types"] = semantic_types - column_metadata["name"] = str(column_name) - target_columns_metadata.append(column_metadata) - - return target_columns_metadata - - -SKLinearDiscriminantAnalysis.__doc__ = LinearDiscriminantAnalysis.__doc__ \ No newline at end of file diff --git a/common-primitives/sklearn-wrap/sklearn_wrap/SKLinearRegression.py b/common-primitives/sklearn-wrap/sklearn_wrap/SKLinearRegression.py deleted file mode 100644 index 62ce474..0000000 --- a/common-primitives/sklearn-wrap/sklearn_wrap/SKLinearRegression.py +++ /dev/null @@ -1,431 +0,0 @@ -from typing import Any, Callable, List, Dict, Union, Optional, Sequence, Tuple -from numpy import ndarray -from collections import OrderedDict -from scipy import sparse -import os -import sklearn -import numpy -import typing - -# Custom import commands if any -from sklearn.linear_model.base import LinearRegression - - -from d3m.container.numpy import ndarray as d3m_ndarray -from d3m.container import DataFrame as d3m_dataframe -from d3m.metadata import hyperparams, params, base as metadata_base -from d3m import utils -from d3m.base import utils as base_utils -from d3m.exceptions import PrimitiveNotFittedError -from d3m.primitive_interfaces.base import CallResult, DockerContainer - -from d3m.primitive_interfaces.supervised_learning import SupervisedLearnerPrimitiveBase -from d3m.primitive_interfaces.base import ProbabilisticCompositionalityMixin, ContinueFitMixin -from d3m import exceptions -import pandas - - - -Inputs = d3m_dataframe -Outputs = d3m_dataframe - - -class Params(params.Params): - coef_: Optional[ndarray] - intercept_: Optional[float] - _residues: Optional[float] - rank_: Optional[int] - singular_: Optional[ndarray] - input_column_names: Optional[Any] - target_names_: Optional[Sequence[Any]] - training_indices_: Optional[Sequence[int]] - target_column_indices_: Optional[Sequence[int]] - target_columns_metadata_: Optional[List[OrderedDict]] - - - -class Hyperparams(hyperparams.Hyperparams): - fit_intercept = hyperparams.UniformBool( - default=True, - description='whether to calculate the intercept for this model. If set to False, no intercept will be used in calculations (e.g. data is expected to be already centered).', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] - ) - normalize = hyperparams.UniformBool( - default=True, - description='This parameter is ignored when ``fit_intercept`` is set to False. If True, the regressors X will be normalized before regression by subtracting the mean and dividing by the l2-norm. If you wish to standardize, please use :class:`sklearn.preprocessing.StandardScaler` before calling ``fit`` on an estimator with ``normalize=False``.', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] - ) - n_jobs = hyperparams.Union( - configuration=OrderedDict({ - 'limit': hyperparams.Bounded[int]( - default=1, - lower=1, - upper=None, - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'], - ), - 'all_cores': hyperparams.Constant( - default=-1, - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'], - ) - }), - default='limit', - description='The number of jobs to use for the computation. This will only provide speedup for n_targets > 1 and sufficient large problems. ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context. ``-1`` means using all processors. See :term:`Glossary ` for more details.', - semantic_types=['https://metadata.datadrivendiscovery.org/types/ResourcesUseParameter'] - ) - - use_inputs_columns = hyperparams.Set( - elements=hyperparams.Hyperparameter[int](-1), - default=(), - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="A set of column indices to force primitive to use as training input. If any specified column cannot be parsed, it is skipped.", - ) - use_outputs_columns = hyperparams.Set( - elements=hyperparams.Hyperparameter[int](-1), - default=(), - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="A set of column indices to force primitive to use as training target. If any specified column cannot be parsed, it is skipped.", - ) - exclude_inputs_columns = hyperparams.Set( - elements=hyperparams.Hyperparameter[int](-1), - default=(), - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="A set of column indices to not use as training inputs. Applicable only if \"use_columns\" is not provided.", - ) - exclude_outputs_columns = hyperparams.Set( - elements=hyperparams.Hyperparameter[int](-1), - default=(), - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="A set of column indices to not use as training target. Applicable only if \"use_columns\" is not provided.", - ) - return_result = hyperparams.Enumeration( - values=['append', 'replace', 'new'], - default='new', - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="Should parsed columns be appended, should they replace original columns, or should only parsed columns be returned? This hyperparam is ignored if use_semantic_types is set to false.", - ) - use_semantic_types = hyperparams.UniformBool( - default=False, - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="Controls whether semantic_types metadata will be used for filtering columns in input dataframe. Setting this to false makes the code ignore return_result and will produce only the output dataframe" - ) - add_index_columns = hyperparams.UniformBool( - default=False, - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="Also include primary index columns if input data has them. Applicable only if \"return_result\" is set to \"new\".", - ) - error_on_no_input = hyperparams.UniformBool( - default=True, - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="Throw an exception if no input column is selected/provided. Defaults to true to behave like sklearn. To prevent pipelines from breaking set this to False.", - ) - - return_semantic_type = hyperparams.Enumeration[str]( - values=['https://metadata.datadrivendiscovery.org/types/Attribute', 'https://metadata.datadrivendiscovery.org/types/ConstructedAttribute', 'https://metadata.datadrivendiscovery.org/types/PredictedTarget'], - default='https://metadata.datadrivendiscovery.org/types/PredictedTarget', - description='Decides what semantic type to attach to generated output', - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'] - ) - -class SKLinearRegression(SupervisedLearnerPrimitiveBase[Inputs, Outputs, Params, Hyperparams]): - """ - Primitive wrapping for sklearn LinearRegression - `sklearn documentation `_ - - """ - - __author__ = "JPL MARVIN" - metadata = metadata_base.PrimitiveMetadata({ - "algorithm_types": [metadata_base.PrimitiveAlgorithmType.LINEAR_REGRESSION, ], - "name": "sklearn.linear_model.base.LinearRegression", - "primitive_family": metadata_base.PrimitiveFamily.REGRESSION, - "python_path": "d3m.primitives.regression.linear.SKlearn", - "source": {'name': 'JPL', 'contact': 'mailto:shah@jpl.nasa.gov', 'uris': ['https://gitlab.com/datadrivendiscovery/sklearn-wrap/issues', 'https://scikit-learn.org/stable/modules/generated/sklearn.linear_model.LinearRegression.html']}, - "version": "2019.11.13", - "id": "816cc0f8-8bf4-4d00-830d-272342349577", - 'installation': [ - {'type': metadata_base.PrimitiveInstallationType.PIP, - 'package_uri': 'git+https://gitlab.com/datadrivendiscovery/sklearn-wrap.git@{git_commit}#egg=sklearn_wrap'.format( - git_commit=utils.current_git_commit(os.path.dirname(__file__)), - ), - }] - }) - - def __init__(self, *, - hyperparams: Hyperparams, - random_seed: int = 0, - docker_containers: Dict[str, DockerContainer] = None) -> None: - - super().__init__(hyperparams=hyperparams, random_seed=random_seed, docker_containers=docker_containers) - - # False - self._clf = LinearRegression( - fit_intercept=self.hyperparams['fit_intercept'], - normalize=self.hyperparams['normalize'], - n_jobs=self.hyperparams['n_jobs'], - ) - - self._inputs = None - self._outputs = None - self._training_inputs = None - self._training_outputs = None - self._target_names = None - self._training_indices = None - self._target_column_indices = None - self._target_columns_metadata: List[OrderedDict] = None - self._input_column_names = None - self._fitted = False - self._new_training_data = False - - def set_training_data(self, *, inputs: Inputs, outputs: Outputs) -> None: - self._inputs = inputs - self._outputs = outputs - self._fitted = False - self._new_training_data = True - - def fit(self, *, timeout: float = None, iterations: int = None) -> CallResult[None]: - if self._inputs is None or self._outputs is None: - raise ValueError("Missing training data.") - - if not self._new_training_data: - return CallResult(None) - self._new_training_data = False - - self._training_inputs, self._training_indices = self._get_columns_to_fit(self._inputs, self.hyperparams) - self._training_outputs, self._target_names, self._target_column_indices = self._get_targets(self._outputs, self.hyperparams) - self._input_column_names = self._training_inputs.columns - - if len(self._training_indices) > 0 and len(self._target_column_indices) > 0: - self._target_columns_metadata = self._get_target_columns_metadata(self._training_outputs.metadata, self.hyperparams) - sk_training_output = self._training_outputs.values - - shape = sk_training_output.shape - if len(shape) == 2 and shape[1] == 1: - sk_training_output = numpy.ravel(sk_training_output) - - self._clf.fit(self._training_inputs, sk_training_output) - self._fitted = True - else: - if self.hyperparams['error_on_no_input']: - raise RuntimeError("No input columns were selected") - self.logger.warn("No input columns were selected") - - return CallResult(None) - - - - def produce(self, *, inputs: Inputs, timeout: float = None, iterations: int = None) -> CallResult[Outputs]: - sk_inputs, columns_to_use = self._get_columns_to_fit(inputs, self.hyperparams) - output = [] - if len(sk_inputs.columns): - try: - sk_output = self._clf.predict(sk_inputs) - except sklearn.exceptions.NotFittedError as error: - raise PrimitiveNotFittedError("Primitive not fitted.") from error - # For primitives that allow predicting without fitting like GaussianProcessRegressor - if not self._fitted: - raise PrimitiveNotFittedError("Primitive not fitted.") - if sparse.issparse(sk_output): - sk_output = sk_output.toarray() - output = self._wrap_predictions(inputs, sk_output) - output.columns = self._target_names - output = [output] - else: - if self.hyperparams['error_on_no_input']: - raise RuntimeError("No input columns were selected") - self.logger.warn("No input columns were selected") - outputs = base_utils.combine_columns(return_result=self.hyperparams['return_result'], - add_index_columns=self.hyperparams['add_index_columns'], - inputs=inputs, column_indices=self._target_column_indices, - columns_list=output) - - return CallResult(outputs) - - - def get_params(self) -> Params: - if not self._fitted: - return Params( - coef_=None, - intercept_=None, - _residues=None, - rank_=None, - singular_=None, - input_column_names=self._input_column_names, - training_indices_=self._training_indices, - target_names_=self._target_names, - target_column_indices_=self._target_column_indices, - target_columns_metadata_=self._target_columns_metadata - ) - - return Params( - coef_=getattr(self._clf, 'coef_', None), - intercept_=getattr(self._clf, 'intercept_', None), - _residues=getattr(self._clf, '_residues', None), - rank_=getattr(self._clf, 'rank_', None), - singular_=getattr(self._clf, 'singular_', None), - input_column_names=self._input_column_names, - training_indices_=self._training_indices, - target_names_=self._target_names, - target_column_indices_=self._target_column_indices, - target_columns_metadata_=self._target_columns_metadata - ) - - def set_params(self, *, params: Params) -> None: - self._clf.coef_ = params['coef_'] - self._clf.intercept_ = params['intercept_'] - self._clf._residues = params['_residues'] - self._clf.rank_ = params['rank_'] - self._clf.singular_ = params['singular_'] - self._input_column_names = params['input_column_names'] - self._training_indices = params['training_indices_'] - self._target_names = params['target_names_'] - self._target_column_indices = params['target_column_indices_'] - self._target_columns_metadata = params['target_columns_metadata_'] - - if params['coef_'] is not None: - self._fitted = True - if params['intercept_'] is not None: - self._fitted = True - if params['_residues'] is not None: - self._fitted = True - if params['rank_'] is not None: - self._fitted = True - if params['singular_'] is not None: - self._fitted = True - - - - - - - - @classmethod - def _get_columns_to_fit(cls, inputs: Inputs, hyperparams: Hyperparams): - if not hyperparams['use_semantic_types']: - return inputs, list(range(len(inputs.columns))) - - inputs_metadata = inputs.metadata - - def can_produce_column(column_index: int) -> bool: - return cls._can_produce_column(inputs_metadata, column_index, hyperparams) - - columns_to_produce, columns_not_to_produce = base_utils.get_columns_to_use(inputs_metadata, - use_columns=hyperparams['use_inputs_columns'], - exclude_columns=hyperparams['exclude_inputs_columns'], - can_use_column=can_produce_column) - return inputs.iloc[:, columns_to_produce], columns_to_produce - # return columns_to_produce - - @classmethod - def _can_produce_column(cls, inputs_metadata: metadata_base.DataMetadata, column_index: int, hyperparams: Hyperparams) -> bool: - column_metadata = inputs_metadata.query((metadata_base.ALL_ELEMENTS, column_index)) - - accepted_structural_types = (int, float, numpy.integer, numpy.float64) - accepted_semantic_types = set() - accepted_semantic_types.add("https://metadata.datadrivendiscovery.org/types/Attribute") - if not issubclass(column_metadata['structural_type'], accepted_structural_types): - return False - - semantic_types = set(column_metadata.get('semantic_types', [])) - - if len(semantic_types) == 0: - cls.logger.warning("No semantic types found in column metadata") - return False - # Making sure all accepted_semantic_types are available in semantic_types - if len(accepted_semantic_types - semantic_types) == 0: - return True - - return False - - @classmethod - def _get_targets(cls, data: d3m_dataframe, hyperparams: Hyperparams): - if not hyperparams['use_semantic_types']: - return data, list(data.columns), list(range(len(data.columns))) - - metadata = data.metadata - - def can_produce_column(column_index: int) -> bool: - accepted_semantic_types = set() - accepted_semantic_types.add("https://metadata.datadrivendiscovery.org/types/TrueTarget") - column_metadata = metadata.query((metadata_base.ALL_ELEMENTS, column_index)) - semantic_types = set(column_metadata.get('semantic_types', [])) - if len(semantic_types) == 0: - cls.logger.warning("No semantic types found in column metadata") - return False - # Making sure all accepted_semantic_types are available in semantic_types - if len(accepted_semantic_types - semantic_types) == 0: - return True - return False - - target_column_indices, target_columns_not_to_produce = base_utils.get_columns_to_use(metadata, - use_columns=hyperparams[ - 'use_outputs_columns'], - exclude_columns= - hyperparams[ - 'exclude_outputs_columns'], - can_use_column=can_produce_column) - targets = [] - if target_column_indices: - targets = data.select_columns(target_column_indices) - target_column_names = [] - for idx in target_column_indices: - target_column_names.append(data.columns[idx]) - return targets, target_column_names, target_column_indices - - @classmethod - def _get_target_columns_metadata(cls, outputs_metadata: metadata_base.DataMetadata, hyperparams) -> List[OrderedDict]: - outputs_length = outputs_metadata.query((metadata_base.ALL_ELEMENTS,))['dimension']['length'] - - target_columns_metadata: List[OrderedDict] = [] - for column_index in range(outputs_length): - column_metadata = OrderedDict(outputs_metadata.query_column(column_index)) - - # Update semantic types and prepare it for predicted targets. - semantic_types = set(column_metadata.get('semantic_types', [])) - semantic_types_to_remove = set(["https://metadata.datadrivendiscovery.org/types/TrueTarget","https://metadata.datadrivendiscovery.org/types/SuggestedTarget",]) - add_semantic_types = set(["https://metadata.datadrivendiscovery.org/types/PredictedTarget",]) - add_semantic_types.add(hyperparams["return_semantic_type"]) - semantic_types = semantic_types - semantic_types_to_remove - semantic_types = semantic_types.union(add_semantic_types) - column_metadata['semantic_types'] = list(semantic_types) - - target_columns_metadata.append(column_metadata) - - return target_columns_metadata - - @classmethod - def _update_predictions_metadata(cls, inputs_metadata: metadata_base.DataMetadata, outputs: Optional[Outputs], - target_columns_metadata: List[OrderedDict]) -> metadata_base.DataMetadata: - outputs_metadata = metadata_base.DataMetadata().generate(value=outputs) - - for column_index, column_metadata in enumerate(target_columns_metadata): - column_metadata.pop("structural_type", None) - outputs_metadata = outputs_metadata.update_column(column_index, column_metadata) - - return outputs_metadata - - def _wrap_predictions(self, inputs: Inputs, predictions: ndarray) -> Outputs: - outputs = d3m_dataframe(predictions, generate_metadata=False) - outputs.metadata = self._update_predictions_metadata(inputs.metadata, outputs, self._target_columns_metadata) - return outputs - - - @classmethod - def _add_target_columns_metadata(cls, outputs_metadata: metadata_base.DataMetadata): - outputs_length = outputs_metadata.query((metadata_base.ALL_ELEMENTS,))['dimension']['length'] - - target_columns_metadata: List[OrderedDict] = [] - for column_index in range(outputs_length): - column_metadata = OrderedDict() - semantic_types = [] - semantic_types.append('https://metadata.datadrivendiscovery.org/types/PredictedTarget') - column_name = outputs_metadata.query((metadata_base.ALL_ELEMENTS, column_index)).get("name") - if column_name is None: - column_name = "output_{}".format(column_index) - column_metadata["semantic_types"] = semantic_types - column_metadata["name"] = str(column_name) - target_columns_metadata.append(column_metadata) - - return target_columns_metadata - - -SKLinearRegression.__doc__ = LinearRegression.__doc__ \ No newline at end of file diff --git a/common-primitives/sklearn-wrap/sklearn_wrap/SKLinearSVC.py b/common-primitives/sklearn-wrap/sklearn_wrap/SKLinearSVC.py deleted file mode 100644 index 55bb114..0000000 --- a/common-primitives/sklearn-wrap/sklearn_wrap/SKLinearSVC.py +++ /dev/null @@ -1,478 +0,0 @@ -from typing import Any, Callable, List, Dict, Union, Optional, Sequence, Tuple -from numpy import ndarray -from collections import OrderedDict -from scipy import sparse -import os -import sklearn -import numpy -import typing - -# Custom import commands if any -from sklearn.svm.classes import LinearSVC - - -from d3m.container.numpy import ndarray as d3m_ndarray -from d3m.container import DataFrame as d3m_dataframe -from d3m.metadata import hyperparams, params, base as metadata_base -from d3m import utils -from d3m.base import utils as base_utils -from d3m.exceptions import PrimitiveNotFittedError -from d3m.primitive_interfaces.base import CallResult, DockerContainer - -from d3m.primitive_interfaces.supervised_learning import SupervisedLearnerPrimitiveBase -from d3m.primitive_interfaces.base import ProbabilisticCompositionalityMixin, ContinueFitMixin -from d3m import exceptions -import pandas - - - -Inputs = d3m_dataframe -Outputs = d3m_dataframe - - -class Params(params.Params): - coef_: Optional[ndarray] - intercept_: Optional[ndarray] - classes_: Optional[ndarray] - n_iter_: Optional[numpy.int32] - input_column_names: Optional[Any] - target_names_: Optional[Sequence[Any]] - training_indices_: Optional[Sequence[int]] - target_column_indices_: Optional[Sequence[int]] - target_columns_metadata_: Optional[List[OrderedDict]] - - - -class Hyperparams(hyperparams.Hyperparams): - penalty = hyperparams.Enumeration[str]( - values=['l1', 'l2'], - default='l2', - description='Specifies the norm used in the penalization. The \'l2\' penalty is the standard used in SVC. The \'l1\' leads to ``coef_`` vectors that are sparse.', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] - ) - loss = hyperparams.Enumeration[str]( - values=['hinge', 'squared_hinge'], - default='squared_hinge', - description='Specifies the loss function. \'hinge\' is the standard SVM loss (used e.g. by the SVC class) while \'squared_hinge\' is the square of the hinge loss.', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] - ) - dual = hyperparams.UniformBool( - default=True, - description='Select the algorithm to either solve the dual or primal optimization problem. Prefer dual=False when n_samples > n_features.', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] - ) - tol = hyperparams.Bounded[float]( - default=0.0001, - lower=0, - upper=None, - description='Tolerance for stopping criteria. multi_class: string, \'ovr\' or \'crammer_singer\' (default=\'ovr\') Determines the multi-class strategy if `y` contains more than two classes. ``"ovr"`` trains n_classes one-vs-rest classifiers, while ``"crammer_singer"`` optimizes a joint objective over all classes. While `crammer_singer` is interesting from a theoretical perspective as it is consistent, it is seldom used in practice as it rarely leads to better accuracy and is more expensive to compute. If ``"crammer_singer"`` is chosen, the options loss, penalty and dual will be ignored.', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] - ) - C = hyperparams.Bounded[float]( - default=1, - lower=0, - upper=None, - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'], - description='Penalty parameter C of the error term.' - ) - multi_class = hyperparams.Enumeration[str]( - values=['ovr', 'crammer_singer'], - default='ovr', - description='Determines the multi-class strategy if `y` contains more than two classes. ``"ovr"`` trains n_classes one-vs-rest classifiers, while ``"crammer_singer"`` optimizes a joint objective over all classes. While `crammer_singer` is interesting from a theoretical perspective as it is consistent, it is seldom used in practice as it rarely leads to better accuracy and is more expensive to compute. If ``"crammer_singer"`` is chosen, the options loss, penalty and dual will be ignored. ', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] - ) - fit_intercept = hyperparams.UniformBool( - default=True, - description='Whether to calculate the intercept for this model. If set to false, no intercept will be used in calculations (i.e. data is expected to be already centered).', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] - ) - intercept_scaling = hyperparams.Hyperparameter[float]( - default=1, - description='When self.fit_intercept is True, instance vector x becomes ``[x, self.intercept_scaling]``, i.e. a "synthetic" feature with constant value equals to intercept_scaling is appended to the instance vector. The intercept becomes intercept_scaling * synthetic feature weight Note! the synthetic feature weight is subject to l1/l2 regularization as all other features. To lessen the effect of regularization on synthetic feature weight (and therefore on the intercept) intercept_scaling has to be increased.', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] - ) - class_weight = hyperparams.Union( - configuration=OrderedDict({ - 'str': hyperparams.Constant( - default='balanced', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'], - ), - 'none': hyperparams.Constant( - default=None, - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'], - ) - }), - default='none', - description='Set the parameter C of class i to ``class_weight[i]*C`` for SVC. If not given, all classes are supposed to have weight one. The "balanced" mode uses the values of y to automatically adjust weights inversely proportional to class frequencies in the input data as ``n_samples / (n_classes * np.bincount(y))``', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] - ) - max_iter = hyperparams.Bounded[int]( - default=1000, - lower=0, - upper=None, - description='The maximum number of iterations to be run.', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] - ) - - use_inputs_columns = hyperparams.Set( - elements=hyperparams.Hyperparameter[int](-1), - default=(), - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="A set of column indices to force primitive to use as training input. If any specified column cannot be parsed, it is skipped.", - ) - use_outputs_columns = hyperparams.Set( - elements=hyperparams.Hyperparameter[int](-1), - default=(), - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="A set of column indices to force primitive to use as training target. If any specified column cannot be parsed, it is skipped.", - ) - exclude_inputs_columns = hyperparams.Set( - elements=hyperparams.Hyperparameter[int](-1), - default=(), - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="A set of column indices to not use as training inputs. Applicable only if \"use_columns\" is not provided.", - ) - exclude_outputs_columns = hyperparams.Set( - elements=hyperparams.Hyperparameter[int](-1), - default=(), - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="A set of column indices to not use as training target. Applicable only if \"use_columns\" is not provided.", - ) - return_result = hyperparams.Enumeration( - values=['append', 'replace', 'new'], - default='new', - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="Should parsed columns be appended, should they replace original columns, or should only parsed columns be returned? This hyperparam is ignored if use_semantic_types is set to false.", - ) - use_semantic_types = hyperparams.UniformBool( - default=False, - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="Controls whether semantic_types metadata will be used for filtering columns in input dataframe. Setting this to false makes the code ignore return_result and will produce only the output dataframe" - ) - add_index_columns = hyperparams.UniformBool( - default=False, - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="Also include primary index columns if input data has them. Applicable only if \"return_result\" is set to \"new\".", - ) - error_on_no_input = hyperparams.UniformBool( - default=True, - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="Throw an exception if no input column is selected/provided. Defaults to true to behave like sklearn. To prevent pipelines from breaking set this to False.", - ) - - return_semantic_type = hyperparams.Enumeration[str]( - values=['https://metadata.datadrivendiscovery.org/types/Attribute', 'https://metadata.datadrivendiscovery.org/types/ConstructedAttribute', 'https://metadata.datadrivendiscovery.org/types/PredictedTarget'], - default='https://metadata.datadrivendiscovery.org/types/PredictedTarget', - description='Decides what semantic type to attach to generated output', - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'] - ) - -class SKLinearSVC(SupervisedLearnerPrimitiveBase[Inputs, Outputs, Params, Hyperparams]): - """ - Primitive wrapping for sklearn LinearSVC - `sklearn documentation `_ - - """ - - __author__ = "JPL MARVIN" - metadata = metadata_base.PrimitiveMetadata({ - "algorithm_types": [metadata_base.PrimitiveAlgorithmType.SUPPORT_VECTOR_MACHINE, ], - "name": "sklearn.svm.classes.LinearSVC", - "primitive_family": metadata_base.PrimitiveFamily.CLASSIFICATION, - "python_path": "d3m.primitives.classification.linear_svc.SKlearn", - "source": {'name': 'JPL', 'contact': 'mailto:shah@jpl.nasa.gov', 'uris': ['https://gitlab.com/datadrivendiscovery/sklearn-wrap/issues', 'https://scikit-learn.org/stable/modules/generated/sklearn.svm.LinearSVC.html']}, - "version": "2019.11.13", - "id": "71749b20-80e9-3a8e-998e-25da5bbc1abc", - "hyperparams_to_tune": ['C'], - 'installation': [ - {'type': metadata_base.PrimitiveInstallationType.PIP, - 'package_uri': 'git+https://gitlab.com/datadrivendiscovery/sklearn-wrap.git@{git_commit}#egg=sklearn_wrap'.format( - git_commit=utils.current_git_commit(os.path.dirname(__file__)), - ), - }] - }) - - def __init__(self, *, - hyperparams: Hyperparams, - random_seed: int = 0, - docker_containers: Dict[str, DockerContainer] = None, - _verbose: int = 0) -> None: - - super().__init__(hyperparams=hyperparams, random_seed=random_seed, docker_containers=docker_containers) - - # False - self._clf = LinearSVC( - penalty=self.hyperparams['penalty'], - loss=self.hyperparams['loss'], - dual=self.hyperparams['dual'], - tol=self.hyperparams['tol'], - C=self.hyperparams['C'], - multi_class=self.hyperparams['multi_class'], - fit_intercept=self.hyperparams['fit_intercept'], - intercept_scaling=self.hyperparams['intercept_scaling'], - class_weight=self.hyperparams['class_weight'], - max_iter=self.hyperparams['max_iter'], - verbose=_verbose, - random_state=self.random_seed, - ) - - self._inputs = None - self._outputs = None - self._training_inputs = None - self._training_outputs = None - self._target_names = None - self._training_indices = None - self._target_column_indices = None - self._target_columns_metadata: List[OrderedDict] = None - self._input_column_names = None - self._fitted = False - self._new_training_data = False - - def set_training_data(self, *, inputs: Inputs, outputs: Outputs) -> None: - self._inputs = inputs - self._outputs = outputs - self._fitted = False - self._new_training_data = True - - def fit(self, *, timeout: float = None, iterations: int = None) -> CallResult[None]: - if self._inputs is None or self._outputs is None: - raise ValueError("Missing training data.") - - if not self._new_training_data: - return CallResult(None) - self._new_training_data = False - - self._training_inputs, self._training_indices = self._get_columns_to_fit(self._inputs, self.hyperparams) - self._training_outputs, self._target_names, self._target_column_indices = self._get_targets(self._outputs, self.hyperparams) - self._input_column_names = self._training_inputs.columns - - if len(self._training_indices) > 0 and len(self._target_column_indices) > 0: - self._target_columns_metadata = self._get_target_columns_metadata(self._training_outputs.metadata, self.hyperparams) - sk_training_output = self._training_outputs.values - - shape = sk_training_output.shape - if len(shape) == 2 and shape[1] == 1: - sk_training_output = numpy.ravel(sk_training_output) - - self._clf.fit(self._training_inputs, sk_training_output) - self._fitted = True - else: - if self.hyperparams['error_on_no_input']: - raise RuntimeError("No input columns were selected") - self.logger.warn("No input columns were selected") - - return CallResult(None) - - - - def produce(self, *, inputs: Inputs, timeout: float = None, iterations: int = None) -> CallResult[Outputs]: - sk_inputs, columns_to_use = self._get_columns_to_fit(inputs, self.hyperparams) - output = [] - if len(sk_inputs.columns): - try: - sk_output = self._clf.predict(sk_inputs) - except sklearn.exceptions.NotFittedError as error: - raise PrimitiveNotFittedError("Primitive not fitted.") from error - # For primitives that allow predicting without fitting like GaussianProcessRegressor - if not self._fitted: - raise PrimitiveNotFittedError("Primitive not fitted.") - if sparse.issparse(sk_output): - sk_output = sk_output.toarray() - output = self._wrap_predictions(inputs, sk_output) - output.columns = self._target_names - output = [output] - else: - if self.hyperparams['error_on_no_input']: - raise RuntimeError("No input columns were selected") - self.logger.warn("No input columns were selected") - outputs = base_utils.combine_columns(return_result=self.hyperparams['return_result'], - add_index_columns=self.hyperparams['add_index_columns'], - inputs=inputs, column_indices=self._target_column_indices, - columns_list=output) - - return CallResult(outputs) - - - def get_params(self) -> Params: - if not self._fitted: - return Params( - coef_=None, - intercept_=None, - classes_=None, - n_iter_=None, - input_column_names=self._input_column_names, - training_indices_=self._training_indices, - target_names_=self._target_names, - target_column_indices_=self._target_column_indices, - target_columns_metadata_=self._target_columns_metadata - ) - - return Params( - coef_=getattr(self._clf, 'coef_', None), - intercept_=getattr(self._clf, 'intercept_', None), - classes_=getattr(self._clf, 'classes_', None), - n_iter_=getattr(self._clf, 'n_iter_', None), - input_column_names=self._input_column_names, - training_indices_=self._training_indices, - target_names_=self._target_names, - target_column_indices_=self._target_column_indices, - target_columns_metadata_=self._target_columns_metadata - ) - - def set_params(self, *, params: Params) -> None: - self._clf.coef_ = params['coef_'] - self._clf.intercept_ = params['intercept_'] - self._clf.classes_ = params['classes_'] - self._clf.n_iter_ = params['n_iter_'] - self._input_column_names = params['input_column_names'] - self._training_indices = params['training_indices_'] - self._target_names = params['target_names_'] - self._target_column_indices = params['target_column_indices_'] - self._target_columns_metadata = params['target_columns_metadata_'] - - if params['coef_'] is not None: - self._fitted = True - if params['intercept_'] is not None: - self._fitted = True - if params['classes_'] is not None: - self._fitted = True - if params['n_iter_'] is not None: - self._fitted = True - - - - - - - - @classmethod - def _get_columns_to_fit(cls, inputs: Inputs, hyperparams: Hyperparams): - if not hyperparams['use_semantic_types']: - return inputs, list(range(len(inputs.columns))) - - inputs_metadata = inputs.metadata - - def can_produce_column(column_index: int) -> bool: - return cls._can_produce_column(inputs_metadata, column_index, hyperparams) - - columns_to_produce, columns_not_to_produce = base_utils.get_columns_to_use(inputs_metadata, - use_columns=hyperparams['use_inputs_columns'], - exclude_columns=hyperparams['exclude_inputs_columns'], - can_use_column=can_produce_column) - return inputs.iloc[:, columns_to_produce], columns_to_produce - # return columns_to_produce - - @classmethod - def _can_produce_column(cls, inputs_metadata: metadata_base.DataMetadata, column_index: int, hyperparams: Hyperparams) -> bool: - column_metadata = inputs_metadata.query((metadata_base.ALL_ELEMENTS, column_index)) - - accepted_structural_types = (int, float, numpy.integer, numpy.float64) - accepted_semantic_types = set() - accepted_semantic_types.add("https://metadata.datadrivendiscovery.org/types/Attribute") - if not issubclass(column_metadata['structural_type'], accepted_structural_types): - return False - - semantic_types = set(column_metadata.get('semantic_types', [])) - - if len(semantic_types) == 0: - cls.logger.warning("No semantic types found in column metadata") - return False - # Making sure all accepted_semantic_types are available in semantic_types - if len(accepted_semantic_types - semantic_types) == 0: - return True - - return False - - @classmethod - def _get_targets(cls, data: d3m_dataframe, hyperparams: Hyperparams): - if not hyperparams['use_semantic_types']: - return data, list(data.columns), list(range(len(data.columns))) - - metadata = data.metadata - - def can_produce_column(column_index: int) -> bool: - accepted_semantic_types = set() - accepted_semantic_types.add("https://metadata.datadrivendiscovery.org/types/TrueTarget") - column_metadata = metadata.query((metadata_base.ALL_ELEMENTS, column_index)) - semantic_types = set(column_metadata.get('semantic_types', [])) - if len(semantic_types) == 0: - cls.logger.warning("No semantic types found in column metadata") - return False - # Making sure all accepted_semantic_types are available in semantic_types - if len(accepted_semantic_types - semantic_types) == 0: - return True - return False - - target_column_indices, target_columns_not_to_produce = base_utils.get_columns_to_use(metadata, - use_columns=hyperparams[ - 'use_outputs_columns'], - exclude_columns= - hyperparams[ - 'exclude_outputs_columns'], - can_use_column=can_produce_column) - targets = [] - if target_column_indices: - targets = data.select_columns(target_column_indices) - target_column_names = [] - for idx in target_column_indices: - target_column_names.append(data.columns[idx]) - return targets, target_column_names, target_column_indices - - @classmethod - def _get_target_columns_metadata(cls, outputs_metadata: metadata_base.DataMetadata, hyperparams) -> List[OrderedDict]: - outputs_length = outputs_metadata.query((metadata_base.ALL_ELEMENTS,))['dimension']['length'] - - target_columns_metadata: List[OrderedDict] = [] - for column_index in range(outputs_length): - column_metadata = OrderedDict(outputs_metadata.query_column(column_index)) - - # Update semantic types and prepare it for predicted targets. - semantic_types = set(column_metadata.get('semantic_types', [])) - semantic_types_to_remove = set(["https://metadata.datadrivendiscovery.org/types/TrueTarget","https://metadata.datadrivendiscovery.org/types/SuggestedTarget",]) - add_semantic_types = set(["https://metadata.datadrivendiscovery.org/types/PredictedTarget",]) - add_semantic_types.add(hyperparams["return_semantic_type"]) - semantic_types = semantic_types - semantic_types_to_remove - semantic_types = semantic_types.union(add_semantic_types) - column_metadata['semantic_types'] = list(semantic_types) - - target_columns_metadata.append(column_metadata) - - return target_columns_metadata - - @classmethod - def _update_predictions_metadata(cls, inputs_metadata: metadata_base.DataMetadata, outputs: Optional[Outputs], - target_columns_metadata: List[OrderedDict]) -> metadata_base.DataMetadata: - outputs_metadata = metadata_base.DataMetadata().generate(value=outputs) - - for column_index, column_metadata in enumerate(target_columns_metadata): - column_metadata.pop("structural_type", None) - outputs_metadata = outputs_metadata.update_column(column_index, column_metadata) - - return outputs_metadata - - def _wrap_predictions(self, inputs: Inputs, predictions: ndarray) -> Outputs: - outputs = d3m_dataframe(predictions, generate_metadata=False) - outputs.metadata = self._update_predictions_metadata(inputs.metadata, outputs, self._target_columns_metadata) - return outputs - - - @classmethod - def _add_target_columns_metadata(cls, outputs_metadata: metadata_base.DataMetadata): - outputs_length = outputs_metadata.query((metadata_base.ALL_ELEMENTS,))['dimension']['length'] - - target_columns_metadata: List[OrderedDict] = [] - for column_index in range(outputs_length): - column_metadata = OrderedDict() - semantic_types = [] - semantic_types.append('https://metadata.datadrivendiscovery.org/types/PredictedTarget') - column_name = outputs_metadata.query((metadata_base.ALL_ELEMENTS, column_index)).get("name") - if column_name is None: - column_name = "output_{}".format(column_index) - column_metadata["semantic_types"] = semantic_types - column_metadata["name"] = str(column_name) - target_columns_metadata.append(column_metadata) - - return target_columns_metadata - - -SKLinearSVC.__doc__ = LinearSVC.__doc__ \ No newline at end of file diff --git a/common-primitives/sklearn-wrap/sklearn_wrap/SKLinearSVR.py b/common-primitives/sklearn-wrap/sklearn_wrap/SKLinearSVR.py deleted file mode 100644 index af809b8..0000000 --- a/common-primitives/sklearn-wrap/sklearn_wrap/SKLinearSVR.py +++ /dev/null @@ -1,452 +0,0 @@ -from typing import Any, Callable, List, Dict, Union, Optional, Sequence, Tuple -from numpy import ndarray -from collections import OrderedDict -from scipy import sparse -import os -import sklearn -import numpy -import typing - -# Custom import commands if any -from sklearn.svm.classes import LinearSVR - - -from d3m.container.numpy import ndarray as d3m_ndarray -from d3m.container import DataFrame as d3m_dataframe -from d3m.metadata import hyperparams, params, base as metadata_base -from d3m import utils -from d3m.base import utils as base_utils -from d3m.exceptions import PrimitiveNotFittedError -from d3m.primitive_interfaces.base import CallResult, DockerContainer - -from d3m.primitive_interfaces.supervised_learning import SupervisedLearnerPrimitiveBase -from d3m.primitive_interfaces.base import ProbabilisticCompositionalityMixin, ContinueFitMixin -from d3m import exceptions -import pandas - - - -Inputs = d3m_dataframe -Outputs = d3m_dataframe - - -class Params(params.Params): - coef_: Optional[ndarray] - intercept_: Optional[ndarray] - n_iter_: Optional[numpy.int32] - input_column_names: Optional[Any] - target_names_: Optional[Sequence[Any]] - training_indices_: Optional[Sequence[int]] - target_column_indices_: Optional[Sequence[int]] - target_columns_metadata_: Optional[List[OrderedDict]] - - - -class Hyperparams(hyperparams.Hyperparams): - C = hyperparams.Bounded[float]( - default=1, - lower=0, - upper=None, - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'], - description='Penalty parameter C of the error term. The penalty is a squared l2 penalty. The bigger this parameter, the less regularization is used.' - ) - loss = hyperparams.Enumeration[str]( - values=['epsilon_insensitive', 'squared_epsilon_insensitive'], - default='epsilon_insensitive', - description='Specifies the loss function. \'l1\' is the epsilon-insensitive loss (standard SVR) while \'l2\' is the squared epsilon-insensitive loss.', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] - ) - epsilon = hyperparams.Bounded[float]( - default=0.1, - lower=0, - upper=None, - description='Epsilon parameter in the epsilon-insensitive loss function. Note that the value of this parameter depends on the scale of the target variable y. If unsure, set ``epsilon=0``.', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] - ) - dual = hyperparams.UniformBool( - default=True, - description='Select the algorithm to either solve the dual or primal optimization problem. Prefer dual=False when n_samples > n_features.', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] - ) - tol = hyperparams.Bounded[float]( - default=0.0001, - lower=0, - upper=None, - description='Tolerance for stopping criteria.', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] - ) - fit_intercept = hyperparams.UniformBool( - default=True, - description='Whether to calculate the intercept for this model. If set to false, no intercept will be used in calculations (i.e. data is expected to be already centered).', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] - ) - intercept_scaling = hyperparams.Bounded[float]( - default=1, - lower=0, - upper=None, - description='When self.fit_intercept is True, instance vector x becomes [x, self.intercept_scaling], i.e. a "synthetic" feature with constant value equals to intercept_scaling is appended to the instance vector. The intercept becomes intercept_scaling * synthetic feature weight Note! the synthetic feature weight is subject to l1/l2 regularization as all other features. To lessen the effect of regularization on synthetic feature weight (and therefore on the intercept) intercept_scaling has to be increased.', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] - ) - max_iter = hyperparams.Bounded[int]( - default=1000, - lower=0, - upper=None, - description='The maximum number of iterations to be run.', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] - ) - - use_inputs_columns = hyperparams.Set( - elements=hyperparams.Hyperparameter[int](-1), - default=(), - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="A set of column indices to force primitive to use as training input. If any specified column cannot be parsed, it is skipped.", - ) - use_outputs_columns = hyperparams.Set( - elements=hyperparams.Hyperparameter[int](-1), - default=(), - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="A set of column indices to force primitive to use as training target. If any specified column cannot be parsed, it is skipped.", - ) - exclude_inputs_columns = hyperparams.Set( - elements=hyperparams.Hyperparameter[int](-1), - default=(), - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="A set of column indices to not use as training inputs. Applicable only if \"use_columns\" is not provided.", - ) - exclude_outputs_columns = hyperparams.Set( - elements=hyperparams.Hyperparameter[int](-1), - default=(), - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="A set of column indices to not use as training target. Applicable only if \"use_columns\" is not provided.", - ) - return_result = hyperparams.Enumeration( - values=['append', 'replace', 'new'], - default='new', - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="Should parsed columns be appended, should they replace original columns, or should only parsed columns be returned? This hyperparam is ignored if use_semantic_types is set to false.", - ) - use_semantic_types = hyperparams.UniformBool( - default=False, - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="Controls whether semantic_types metadata will be used for filtering columns in input dataframe. Setting this to false makes the code ignore return_result and will produce only the output dataframe" - ) - add_index_columns = hyperparams.UniformBool( - default=False, - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="Also include primary index columns if input data has them. Applicable only if \"return_result\" is set to \"new\".", - ) - error_on_no_input = hyperparams.UniformBool( - default=True, - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="Throw an exception if no input column is selected/provided. Defaults to true to behave like sklearn. To prevent pipelines from breaking set this to False.", - ) - - return_semantic_type = hyperparams.Enumeration[str]( - values=['https://metadata.datadrivendiscovery.org/types/Attribute', 'https://metadata.datadrivendiscovery.org/types/ConstructedAttribute', 'https://metadata.datadrivendiscovery.org/types/PredictedTarget'], - default='https://metadata.datadrivendiscovery.org/types/PredictedTarget', - description='Decides what semantic type to attach to generated output', - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'] - ) - -class SKLinearSVR(SupervisedLearnerPrimitiveBase[Inputs, Outputs, Params, Hyperparams]): - """ - Primitive wrapping for sklearn LinearSVR - `sklearn documentation `_ - - """ - - __author__ = "JPL MARVIN" - metadata = metadata_base.PrimitiveMetadata({ - "algorithm_types": [metadata_base.PrimitiveAlgorithmType.SUPPORT_VECTOR_MACHINE, ], - "name": "sklearn.svm.classes.LinearSVR", - "primitive_family": metadata_base.PrimitiveFamily.REGRESSION, - "python_path": "d3m.primitives.regression.linear_svr.SKlearn", - "source": {'name': 'JPL', 'contact': 'mailto:shah@jpl.nasa.gov', 'uris': ['https://gitlab.com/datadrivendiscovery/sklearn-wrap/issues', 'https://scikit-learn.org/stable/modules/generated/sklearn.svm.LinearSVR.html']}, - "version": "2019.11.13", - "id": "f40ffdc0-1d6d-3234-8fd0-a3e4d7a136a7", - "hyperparams_to_tune": ['C'], - 'installation': [ - {'type': metadata_base.PrimitiveInstallationType.PIP, - 'package_uri': 'git+https://gitlab.com/datadrivendiscovery/sklearn-wrap.git@{git_commit}#egg=sklearn_wrap'.format( - git_commit=utils.current_git_commit(os.path.dirname(__file__)), - ), - }] - }) - - def __init__(self, *, - hyperparams: Hyperparams, - random_seed: int = 0, - docker_containers: Dict[str, DockerContainer] = None, - _verbose: int = 0) -> None: - - super().__init__(hyperparams=hyperparams, random_seed=random_seed, docker_containers=docker_containers) - - # False - self._clf = LinearSVR( - C=self.hyperparams['C'], - loss=self.hyperparams['loss'], - epsilon=self.hyperparams['epsilon'], - dual=self.hyperparams['dual'], - tol=self.hyperparams['tol'], - fit_intercept=self.hyperparams['fit_intercept'], - intercept_scaling=self.hyperparams['intercept_scaling'], - max_iter=self.hyperparams['max_iter'], - verbose=_verbose, - random_state=self.random_seed, - ) - - self._inputs = None - self._outputs = None - self._training_inputs = None - self._training_outputs = None - self._target_names = None - self._training_indices = None - self._target_column_indices = None - self._target_columns_metadata: List[OrderedDict] = None - self._input_column_names = None - self._fitted = False - self._new_training_data = False - - def set_training_data(self, *, inputs: Inputs, outputs: Outputs) -> None: - self._inputs = inputs - self._outputs = outputs - self._fitted = False - self._new_training_data = True - - def fit(self, *, timeout: float = None, iterations: int = None) -> CallResult[None]: - if self._inputs is None or self._outputs is None: - raise ValueError("Missing training data.") - - if not self._new_training_data: - return CallResult(None) - self._new_training_data = False - - self._training_inputs, self._training_indices = self._get_columns_to_fit(self._inputs, self.hyperparams) - self._training_outputs, self._target_names, self._target_column_indices = self._get_targets(self._outputs, self.hyperparams) - self._input_column_names = self._training_inputs.columns - - if len(self._training_indices) > 0 and len(self._target_column_indices) > 0: - self._target_columns_metadata = self._get_target_columns_metadata(self._training_outputs.metadata, self.hyperparams) - sk_training_output = self._training_outputs.values - - shape = sk_training_output.shape - if len(shape) == 2 and shape[1] == 1: - sk_training_output = numpy.ravel(sk_training_output) - - self._clf.fit(self._training_inputs, sk_training_output) - self._fitted = True - else: - if self.hyperparams['error_on_no_input']: - raise RuntimeError("No input columns were selected") - self.logger.warn("No input columns were selected") - - return CallResult(None) - - - - def produce(self, *, inputs: Inputs, timeout: float = None, iterations: int = None) -> CallResult[Outputs]: - sk_inputs, columns_to_use = self._get_columns_to_fit(inputs, self.hyperparams) - output = [] - if len(sk_inputs.columns): - try: - sk_output = self._clf.predict(sk_inputs) - except sklearn.exceptions.NotFittedError as error: - raise PrimitiveNotFittedError("Primitive not fitted.") from error - # For primitives that allow predicting without fitting like GaussianProcessRegressor - if not self._fitted: - raise PrimitiveNotFittedError("Primitive not fitted.") - if sparse.issparse(sk_output): - sk_output = sk_output.toarray() - output = self._wrap_predictions(inputs, sk_output) - output.columns = self._target_names - output = [output] - else: - if self.hyperparams['error_on_no_input']: - raise RuntimeError("No input columns were selected") - self.logger.warn("No input columns were selected") - outputs = base_utils.combine_columns(return_result=self.hyperparams['return_result'], - add_index_columns=self.hyperparams['add_index_columns'], - inputs=inputs, column_indices=self._target_column_indices, - columns_list=output) - - return CallResult(outputs) - - - def get_params(self) -> Params: - if not self._fitted: - return Params( - coef_=None, - intercept_=None, - n_iter_=None, - input_column_names=self._input_column_names, - training_indices_=self._training_indices, - target_names_=self._target_names, - target_column_indices_=self._target_column_indices, - target_columns_metadata_=self._target_columns_metadata - ) - - return Params( - coef_=getattr(self._clf, 'coef_', None), - intercept_=getattr(self._clf, 'intercept_', None), - n_iter_=getattr(self._clf, 'n_iter_', None), - input_column_names=self._input_column_names, - training_indices_=self._training_indices, - target_names_=self._target_names, - target_column_indices_=self._target_column_indices, - target_columns_metadata_=self._target_columns_metadata - ) - - def set_params(self, *, params: Params) -> None: - self._clf.coef_ = params['coef_'] - self._clf.intercept_ = params['intercept_'] - self._clf.n_iter_ = params['n_iter_'] - self._input_column_names = params['input_column_names'] - self._training_indices = params['training_indices_'] - self._target_names = params['target_names_'] - self._target_column_indices = params['target_column_indices_'] - self._target_columns_metadata = params['target_columns_metadata_'] - - if params['coef_'] is not None: - self._fitted = True - if params['intercept_'] is not None: - self._fitted = True - if params['n_iter_'] is not None: - self._fitted = True - - - - - - - - @classmethod - def _get_columns_to_fit(cls, inputs: Inputs, hyperparams: Hyperparams): - if not hyperparams['use_semantic_types']: - return inputs, list(range(len(inputs.columns))) - - inputs_metadata = inputs.metadata - - def can_produce_column(column_index: int) -> bool: - return cls._can_produce_column(inputs_metadata, column_index, hyperparams) - - columns_to_produce, columns_not_to_produce = base_utils.get_columns_to_use(inputs_metadata, - use_columns=hyperparams['use_inputs_columns'], - exclude_columns=hyperparams['exclude_inputs_columns'], - can_use_column=can_produce_column) - return inputs.iloc[:, columns_to_produce], columns_to_produce - # return columns_to_produce - - @classmethod - def _can_produce_column(cls, inputs_metadata: metadata_base.DataMetadata, column_index: int, hyperparams: Hyperparams) -> bool: - column_metadata = inputs_metadata.query((metadata_base.ALL_ELEMENTS, column_index)) - - accepted_structural_types = (int, float, numpy.integer, numpy.float64) - accepted_semantic_types = set() - accepted_semantic_types.add("https://metadata.datadrivendiscovery.org/types/Attribute") - if not issubclass(column_metadata['structural_type'], accepted_structural_types): - return False - - semantic_types = set(column_metadata.get('semantic_types', [])) - - if len(semantic_types) == 0: - cls.logger.warning("No semantic types found in column metadata") - return False - # Making sure all accepted_semantic_types are available in semantic_types - if len(accepted_semantic_types - semantic_types) == 0: - return True - - return False - - @classmethod - def _get_targets(cls, data: d3m_dataframe, hyperparams: Hyperparams): - if not hyperparams['use_semantic_types']: - return data, list(data.columns), list(range(len(data.columns))) - - metadata = data.metadata - - def can_produce_column(column_index: int) -> bool: - accepted_semantic_types = set() - accepted_semantic_types.add("https://metadata.datadrivendiscovery.org/types/TrueTarget") - column_metadata = metadata.query((metadata_base.ALL_ELEMENTS, column_index)) - semantic_types = set(column_metadata.get('semantic_types', [])) - if len(semantic_types) == 0: - cls.logger.warning("No semantic types found in column metadata") - return False - # Making sure all accepted_semantic_types are available in semantic_types - if len(accepted_semantic_types - semantic_types) == 0: - return True - return False - - target_column_indices, target_columns_not_to_produce = base_utils.get_columns_to_use(metadata, - use_columns=hyperparams[ - 'use_outputs_columns'], - exclude_columns= - hyperparams[ - 'exclude_outputs_columns'], - can_use_column=can_produce_column) - targets = [] - if target_column_indices: - targets = data.select_columns(target_column_indices) - target_column_names = [] - for idx in target_column_indices: - target_column_names.append(data.columns[idx]) - return targets, target_column_names, target_column_indices - - @classmethod - def _get_target_columns_metadata(cls, outputs_metadata: metadata_base.DataMetadata, hyperparams) -> List[OrderedDict]: - outputs_length = outputs_metadata.query((metadata_base.ALL_ELEMENTS,))['dimension']['length'] - - target_columns_metadata: List[OrderedDict] = [] - for column_index in range(outputs_length): - column_metadata = OrderedDict(outputs_metadata.query_column(column_index)) - - # Update semantic types and prepare it for predicted targets. - semantic_types = set(column_metadata.get('semantic_types', [])) - semantic_types_to_remove = set(["https://metadata.datadrivendiscovery.org/types/TrueTarget","https://metadata.datadrivendiscovery.org/types/SuggestedTarget",]) - add_semantic_types = set(["https://metadata.datadrivendiscovery.org/types/PredictedTarget",]) - add_semantic_types.add(hyperparams["return_semantic_type"]) - semantic_types = semantic_types - semantic_types_to_remove - semantic_types = semantic_types.union(add_semantic_types) - column_metadata['semantic_types'] = list(semantic_types) - - target_columns_metadata.append(column_metadata) - - return target_columns_metadata - - @classmethod - def _update_predictions_metadata(cls, inputs_metadata: metadata_base.DataMetadata, outputs: Optional[Outputs], - target_columns_metadata: List[OrderedDict]) -> metadata_base.DataMetadata: - outputs_metadata = metadata_base.DataMetadata().generate(value=outputs) - - for column_index, column_metadata in enumerate(target_columns_metadata): - column_metadata.pop("structural_type", None) - outputs_metadata = outputs_metadata.update_column(column_index, column_metadata) - - return outputs_metadata - - def _wrap_predictions(self, inputs: Inputs, predictions: ndarray) -> Outputs: - outputs = d3m_dataframe(predictions, generate_metadata=False) - outputs.metadata = self._update_predictions_metadata(inputs.metadata, outputs, self._target_columns_metadata) - return outputs - - - @classmethod - def _add_target_columns_metadata(cls, outputs_metadata: metadata_base.DataMetadata): - outputs_length = outputs_metadata.query((metadata_base.ALL_ELEMENTS,))['dimension']['length'] - - target_columns_metadata: List[OrderedDict] = [] - for column_index in range(outputs_length): - column_metadata = OrderedDict() - semantic_types = [] - semantic_types.append('https://metadata.datadrivendiscovery.org/types/PredictedTarget') - column_name = outputs_metadata.query((metadata_base.ALL_ELEMENTS, column_index)).get("name") - if column_name is None: - column_name = "output_{}".format(column_index) - column_metadata["semantic_types"] = semantic_types - column_metadata["name"] = str(column_name) - target_columns_metadata.append(column_metadata) - - return target_columns_metadata - - -SKLinearSVR.__doc__ = LinearSVR.__doc__ \ No newline at end of file diff --git a/common-primitives/sklearn-wrap/sklearn_wrap/SKLogisticRegression.py b/common-primitives/sklearn-wrap/sklearn_wrap/SKLogisticRegression.py deleted file mode 100644 index f5578d7..0000000 --- a/common-primitives/sklearn-wrap/sklearn_wrap/SKLogisticRegression.py +++ /dev/null @@ -1,582 +0,0 @@ -from typing import Any, Callable, List, Dict, Union, Optional, Sequence, Tuple -from numpy import ndarray -from collections import OrderedDict -from scipy import sparse -import os -import sklearn -import numpy -import typing - -# Custom import commands if any -from sklearn.linear_model.logistic import LogisticRegression - - -from d3m.container.numpy import ndarray as d3m_ndarray -from d3m.container import DataFrame as d3m_dataframe -from d3m.metadata import hyperparams, params, base as metadata_base -from d3m import utils -from d3m.base import utils as base_utils -from d3m.exceptions import PrimitiveNotFittedError -from d3m.primitive_interfaces.base import CallResult, DockerContainer - -from d3m.primitive_interfaces.supervised_learning import SupervisedLearnerPrimitiveBase -from d3m.primitive_interfaces.base import ProbabilisticCompositionalityMixin, ContinueFitMixin -from d3m import exceptions -import pandas - - - -Inputs = d3m_dataframe -Outputs = d3m_dataframe - - -class Params(params.Params): - coef_: Optional[ndarray] - intercept_: Optional[ndarray] - n_iter_: Optional[ndarray] - classes_: Optional[ndarray] - input_column_names: Optional[Any] - target_names_: Optional[Sequence[Any]] - training_indices_: Optional[Sequence[int]] - target_column_indices_: Optional[Sequence[int]] - target_columns_metadata_: Optional[List[OrderedDict]] - - - -class Hyperparams(hyperparams.Hyperparams): - penalty = hyperparams.Choice( - choices={ - 'l1': hyperparams.Hyperparams.define( - configuration=OrderedDict({}) - ), - 'l2': hyperparams.Hyperparams.define( - configuration=OrderedDict({}) - ), - 'none': hyperparams.Hyperparams.define( - configuration=OrderedDict({}) - ), - 'elasticnet': hyperparams.Hyperparams.define( - configuration=OrderedDict({ - 'l1_ratio': hyperparams.Union( - configuration=OrderedDict({ - 'float': hyperparams.Uniform( - lower=0, - upper=1, - default=0.001, - lower_inclusive=True, - upper_inclusive=True, - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'], - ), - 'none': hyperparams.Constant( - default=None, - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'], - ) - }), - default='float', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] - ) - }) - ) - }, - default='l2', - description='Used to specify the norm used in the penalization. The \'newton-cg\', \'sag\' and \'lbfgs\' solvers support only l2 penalties.', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] - ) - dual = hyperparams.UniformBool( - default=False, - description='Dual or primal formulation. Dual formulation is only implemented for l2 penalty with liblinear solver. Prefer dual=False when n_samples > n_features.', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] - ) - fit_intercept = hyperparams.UniformBool( - default=True, - description='Specifies if a constant (a.k.a. bias or intercept) should be added to the decision function.', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] - ) - intercept_scaling = hyperparams.Hyperparameter[float]( - default=1, - description='Useful only when the solver \'liblinear\' is used and self.fit_intercept is set to True. In this case, x becomes [x, self.intercept_scaling], i.e. a "synthetic" feature with constant value equal to intercept_scaling is appended to the instance vector. The intercept becomes ``intercept_scaling * synthetic_feature_weight``. Note! the synthetic feature weight is subject to l1/l2 regularization as all other features. To lessen the effect of regularization on synthetic feature weight (and therefore on the intercept) intercept_scaling has to be increased.', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] - ) - class_weight = hyperparams.Union( - configuration=OrderedDict({ - 'str': hyperparams.Constant( - default='balanced', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'], - ), - 'none': hyperparams.Constant( - default=None, - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'], - ) - }), - default='none', - description='Weights associated with classes in the form ``{class_label: weight}``. If not given, all classes are supposed to have weight one. The "balanced" mode uses the values of y to automatically adjust weights inversely proportional to class frequencies in the input data as ``n_samples / (n_classes * np.bincount(y))``. Note that these weights will be multiplied with sample_weight (passed through the fit method) if sample_weight is specified. .. versionadded:: 0.17 *class_weight=\'balanced\'* instead of deprecated *class_weight=\'auto\'*.', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] - ) - max_iter = hyperparams.Bounded[int]( - default=100, - lower=0, - upper=None, - description='Useful only for the newton-cg, sag and lbfgs solvers. Maximum number of iterations taken for the solvers to converge.', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] - ) - solver = hyperparams.Enumeration[str]( - values=['newton-cg', 'lbfgs', 'liblinear', 'sag', 'saga'], - default='liblinear', - description='Algorithm to use in the optimization problem. - For small datasets, \'liblinear\' is a good choice, whereas \'sag\' is faster for large ones. - For multiclass problems, only \'newton-cg\', \'sag\' and \'lbfgs\' handle multinomial loss; \'liblinear\' is limited to one-versus-rest schemes. - \'newton-cg\', \'lbfgs\' and \'sag\' only handle L2 penalty. Note that \'sag\' fast convergence is only guaranteed on features with approximately the same scale. You can preprocess the data with a scaler from sklearn.preprocessing. .. versionadded:: 0.17 Stochastic Average Gradient descent solver.', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] - ) - tol = hyperparams.Bounded[float]( - default=0.0001, - lower=0, - upper=None, - description='Tolerance for stopping criteria.', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] - ) - C = hyperparams.Hyperparameter[float]( - default=1.0, - description='Inverse of regularization strength; must be a positive float. Like in support vector machines, smaller values specify stronger regularization.', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] - ) - multi_class = hyperparams.Enumeration[str]( - values=['ovr', 'multinomial'], - default='ovr', - description='Multiclass option can be either \'ovr\' or \'multinomial\'. If the option chosen is \'ovr\', then a binary problem is fit for each label. Else the loss minimised is the multinomial loss fit across the entire probability distribution. Works only for the \'newton-cg\', \'sag\' and \'lbfgs\' solver. .. versionadded:: 0.18 Stochastic Average Gradient descent solver for \'multinomial\' case.', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] - ) - warm_start = hyperparams.UniformBool( - default=False, - description='When set to True, reuse the solution of the previous call to fit as initialization, otherwise, just erase the previous solution. Useless for liblinear solver. .. versionadded:: 0.17 *warm_start* to support *lbfgs*, *newton-cg*, *sag* solvers.', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] - ) - n_jobs = hyperparams.Union( - configuration=OrderedDict({ - 'limit': hyperparams.Bounded[int]( - default=1, - lower=1, - upper=None, - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'], - ), - 'all_cores': hyperparams.Constant( - default=-1, - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'], - ) - }), - default='limit', - description='Number of CPU cores used during the cross-validation loop. If given a value of -1, all cores are used.', - semantic_types=['https://metadata.datadrivendiscovery.org/types/ResourcesUseParameter'] - ) - - use_inputs_columns = hyperparams.Set( - elements=hyperparams.Hyperparameter[int](-1), - default=(), - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="A set of column indices to force primitive to use as training input. If any specified column cannot be parsed, it is skipped.", - ) - use_outputs_columns = hyperparams.Set( - elements=hyperparams.Hyperparameter[int](-1), - default=(), - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="A set of column indices to force primitive to use as training target. If any specified column cannot be parsed, it is skipped.", - ) - exclude_inputs_columns = hyperparams.Set( - elements=hyperparams.Hyperparameter[int](-1), - default=(), - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="A set of column indices to not use as training inputs. Applicable only if \"use_columns\" is not provided.", - ) - exclude_outputs_columns = hyperparams.Set( - elements=hyperparams.Hyperparameter[int](-1), - default=(), - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="A set of column indices to not use as training target. Applicable only if \"use_columns\" is not provided.", - ) - return_result = hyperparams.Enumeration( - values=['append', 'replace', 'new'], - default='new', - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="Should parsed columns be appended, should they replace original columns, or should only parsed columns be returned? This hyperparam is ignored if use_semantic_types is set to false.", - ) - use_semantic_types = hyperparams.UniformBool( - default=False, - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="Controls whether semantic_types metadata will be used for filtering columns in input dataframe. Setting this to false makes the code ignore return_result and will produce only the output dataframe" - ) - add_index_columns = hyperparams.UniformBool( - default=False, - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="Also include primary index columns if input data has them. Applicable only if \"return_result\" is set to \"new\".", - ) - error_on_no_input = hyperparams.UniformBool( - default=True, - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="Throw an exception if no input column is selected/provided. Defaults to true to behave like sklearn. To prevent pipelines from breaking set this to False.", - ) - - return_semantic_type = hyperparams.Enumeration[str]( - values=['https://metadata.datadrivendiscovery.org/types/Attribute', 'https://metadata.datadrivendiscovery.org/types/ConstructedAttribute', 'https://metadata.datadrivendiscovery.org/types/PredictedTarget'], - default='https://metadata.datadrivendiscovery.org/types/PredictedTarget', - description='Decides what semantic type to attach to generated output', - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'] - ) - -class SKLogisticRegression(SupervisedLearnerPrimitiveBase[Inputs, Outputs, Params, Hyperparams], - ProbabilisticCompositionalityMixin[Inputs, Outputs, Params, Hyperparams]): - """ - Primitive wrapping for sklearn LogisticRegression - `sklearn documentation `_ - - """ - - __author__ = "JPL MARVIN" - metadata = metadata_base.PrimitiveMetadata({ - "algorithm_types": [metadata_base.PrimitiveAlgorithmType.LOGISTIC_REGRESSION, ], - "name": "sklearn.linear_model.logistic.LogisticRegression", - "primitive_family": metadata_base.PrimitiveFamily.CLASSIFICATION, - "python_path": "d3m.primitives.classification.logistic_regression.SKlearn", - "source": {'name': 'JPL', 'contact': 'mailto:shah@jpl.nasa.gov', 'uris': ['https://gitlab.com/datadrivendiscovery/sklearn-wrap/issues', 'https://scikit-learn.org/stable/modules/generated/sklearn.linear_model.LogisticRegression.html']}, - "version": "2019.11.13", - "id": "b9c81b40-8ed1-3b23-80cf-0d6fe6863962", - "hyperparams_to_tune": ['C', 'penalty'], - 'installation': [ - {'type': metadata_base.PrimitiveInstallationType.PIP, - 'package_uri': 'git+https://gitlab.com/datadrivendiscovery/sklearn-wrap.git@{git_commit}#egg=sklearn_wrap'.format( - git_commit=utils.current_git_commit(os.path.dirname(__file__)), - ), - }] - }) - - def __init__(self, *, - hyperparams: Hyperparams, - random_seed: int = 0, - docker_containers: Dict[str, DockerContainer] = None, - _verbose: int = 0) -> None: - - super().__init__(hyperparams=hyperparams, random_seed=random_seed, docker_containers=docker_containers) - - # False - self._clf = LogisticRegression( - penalty=self.hyperparams['penalty']['choice'], - l1_ratio=self.hyperparams['penalty'].get('l1_ratio', 'float'), - dual=self.hyperparams['dual'], - fit_intercept=self.hyperparams['fit_intercept'], - intercept_scaling=self.hyperparams['intercept_scaling'], - class_weight=self.hyperparams['class_weight'], - max_iter=self.hyperparams['max_iter'], - solver=self.hyperparams['solver'], - tol=self.hyperparams['tol'], - C=self.hyperparams['C'], - multi_class=self.hyperparams['multi_class'], - warm_start=self.hyperparams['warm_start'], - n_jobs=self.hyperparams['n_jobs'], - random_state=self.random_seed, - verbose=_verbose - ) - - self._inputs = None - self._outputs = None - self._training_inputs = None - self._training_outputs = None - self._target_names = None - self._training_indices = None - self._target_column_indices = None - self._target_columns_metadata: List[OrderedDict] = None - self._input_column_names = None - self._fitted = False - self._new_training_data = False - - def set_training_data(self, *, inputs: Inputs, outputs: Outputs) -> None: - self._inputs = inputs - self._outputs = outputs - self._fitted = False - self._new_training_data = True - - def fit(self, *, timeout: float = None, iterations: int = None) -> CallResult[None]: - if self._inputs is None or self._outputs is None: - raise ValueError("Missing training data.") - - if not self._new_training_data: - return CallResult(None) - self._new_training_data = False - - self._training_inputs, self._training_indices = self._get_columns_to_fit(self._inputs, self.hyperparams) - self._training_outputs, self._target_names, self._target_column_indices = self._get_targets(self._outputs, self.hyperparams) - self._input_column_names = self._training_inputs.columns - - if len(self._training_indices) > 0 and len(self._target_column_indices) > 0: - self._target_columns_metadata = self._get_target_columns_metadata(self._training_outputs.metadata, self.hyperparams) - sk_training_output = self._training_outputs.values - - shape = sk_training_output.shape - if len(shape) == 2 and shape[1] == 1: - sk_training_output = numpy.ravel(sk_training_output) - - self._clf.fit(self._training_inputs, sk_training_output) - self._fitted = True - else: - if self.hyperparams['error_on_no_input']: - raise RuntimeError("No input columns were selected") - self.logger.warn("No input columns were selected") - - return CallResult(None) - - - - def produce(self, *, inputs: Inputs, timeout: float = None, iterations: int = None) -> CallResult[Outputs]: - sk_inputs, columns_to_use = self._get_columns_to_fit(inputs, self.hyperparams) - output = [] - if len(sk_inputs.columns): - try: - sk_output = self._clf.predict(sk_inputs) - except sklearn.exceptions.NotFittedError as error: - raise PrimitiveNotFittedError("Primitive not fitted.") from error - # For primitives that allow predicting without fitting like GaussianProcessRegressor - if not self._fitted: - raise PrimitiveNotFittedError("Primitive not fitted.") - if sparse.issparse(sk_output): - sk_output = sk_output.toarray() - output = self._wrap_predictions(inputs, sk_output) - output.columns = self._target_names - output = [output] - else: - if self.hyperparams['error_on_no_input']: - raise RuntimeError("No input columns were selected") - self.logger.warn("No input columns were selected") - outputs = base_utils.combine_columns(return_result=self.hyperparams['return_result'], - add_index_columns=self.hyperparams['add_index_columns'], - inputs=inputs, column_indices=self._target_column_indices, - columns_list=output) - - return CallResult(outputs) - - - def get_params(self) -> Params: - if not self._fitted: - return Params( - coef_=None, - intercept_=None, - n_iter_=None, - classes_=None, - input_column_names=self._input_column_names, - training_indices_=self._training_indices, - target_names_=self._target_names, - target_column_indices_=self._target_column_indices, - target_columns_metadata_=self._target_columns_metadata - ) - - return Params( - coef_=getattr(self._clf, 'coef_', None), - intercept_=getattr(self._clf, 'intercept_', None), - n_iter_=getattr(self._clf, 'n_iter_', None), - classes_=getattr(self._clf, 'classes_', None), - input_column_names=self._input_column_names, - training_indices_=self._training_indices, - target_names_=self._target_names, - target_column_indices_=self._target_column_indices, - target_columns_metadata_=self._target_columns_metadata - ) - - def set_params(self, *, params: Params) -> None: - self._clf.coef_ = params['coef_'] - self._clf.intercept_ = params['intercept_'] - self._clf.n_iter_ = params['n_iter_'] - self._clf.classes_ = params['classes_'] - self._input_column_names = params['input_column_names'] - self._training_indices = params['training_indices_'] - self._target_names = params['target_names_'] - self._target_column_indices = params['target_column_indices_'] - self._target_columns_metadata = params['target_columns_metadata_'] - - if params['coef_'] is not None: - self._fitted = True - if params['intercept_'] is not None: - self._fitted = True - if params['n_iter_'] is not None: - self._fitted = True - if params['classes_'] is not None: - self._fitted = True - - - def log_likelihoods(self, *, - outputs: Outputs, - inputs: Inputs, - timeout: float = None, - iterations: int = None) -> CallResult[Sequence[float]]: - inputs = inputs.iloc[:, self._training_indices] # Get ndarray - outputs = outputs.iloc[:, self._target_column_indices] - - if len(inputs.columns) and len(outputs.columns): - - if outputs.shape[1] != self._clf.n_outputs_: - raise exceptions.InvalidArgumentValueError("\"outputs\" argument does not have the correct number of target columns.") - - log_proba = self._clf.predict_log_proba(inputs) - - # Making it always a list, even when only one target. - if self._clf.n_outputs_ == 1: - log_proba = [log_proba] - classes = [self._clf.classes_] - else: - classes = self._clf.classes_ - - samples_length = inputs.shape[0] - - log_likelihoods = [] - for k in range(self._clf.n_outputs_): - # We have to map each class to its internal (numerical) index used in the learner. - # This allows "outputs" to contain string classes. - outputs_column = outputs.iloc[:, k] - classes_map = pandas.Series(numpy.arange(len(classes[k])), index=classes[k]) - mapped_outputs_column = outputs_column.map(classes_map) - - # For each target column (column in "outputs"), for each sample (row) we pick the log - # likelihood for a given class. - log_likelihoods.append(log_proba[k][numpy.arange(samples_length), mapped_outputs_column]) - - results = d3m_dataframe(dict(enumerate(log_likelihoods)), generate_metadata=True) - results.columns = outputs.columns - - for k in range(self._clf.n_outputs_): - column_metadata = outputs.metadata.query_column(k) - if 'name' in column_metadata: - results.metadata = results.metadata.update_column(k, {'name': column_metadata['name']}) - - else: - results = d3m_dataframe(generate_metadata=True) - - return CallResult(results) - - - - - - @classmethod - def _get_columns_to_fit(cls, inputs: Inputs, hyperparams: Hyperparams): - if not hyperparams['use_semantic_types']: - return inputs, list(range(len(inputs.columns))) - - inputs_metadata = inputs.metadata - - def can_produce_column(column_index: int) -> bool: - return cls._can_produce_column(inputs_metadata, column_index, hyperparams) - - columns_to_produce, columns_not_to_produce = base_utils.get_columns_to_use(inputs_metadata, - use_columns=hyperparams['use_inputs_columns'], - exclude_columns=hyperparams['exclude_inputs_columns'], - can_use_column=can_produce_column) - return inputs.iloc[:, columns_to_produce], columns_to_produce - # return columns_to_produce - - @classmethod - def _can_produce_column(cls, inputs_metadata: metadata_base.DataMetadata, column_index: int, hyperparams: Hyperparams) -> bool: - column_metadata = inputs_metadata.query((metadata_base.ALL_ELEMENTS, column_index)) - - accepted_structural_types = (int, float, numpy.integer, numpy.float64) - accepted_semantic_types = set() - accepted_semantic_types.add("https://metadata.datadrivendiscovery.org/types/Attribute") - if not issubclass(column_metadata['structural_type'], accepted_structural_types): - return False - - semantic_types = set(column_metadata.get('semantic_types', [])) - - if len(semantic_types) == 0: - cls.logger.warning("No semantic types found in column metadata") - return False - # Making sure all accepted_semantic_types are available in semantic_types - if len(accepted_semantic_types - semantic_types) == 0: - return True - - return False - - @classmethod - def _get_targets(cls, data: d3m_dataframe, hyperparams: Hyperparams): - if not hyperparams['use_semantic_types']: - return data, list(data.columns), list(range(len(data.columns))) - - metadata = data.metadata - - def can_produce_column(column_index: int) -> bool: - accepted_semantic_types = set() - accepted_semantic_types.add("https://metadata.datadrivendiscovery.org/types/TrueTarget") - column_metadata = metadata.query((metadata_base.ALL_ELEMENTS, column_index)) - semantic_types = set(column_metadata.get('semantic_types', [])) - if len(semantic_types) == 0: - cls.logger.warning("No semantic types found in column metadata") - return False - # Making sure all accepted_semantic_types are available in semantic_types - if len(accepted_semantic_types - semantic_types) == 0: - return True - return False - - target_column_indices, target_columns_not_to_produce = base_utils.get_columns_to_use(metadata, - use_columns=hyperparams[ - 'use_outputs_columns'], - exclude_columns= - hyperparams[ - 'exclude_outputs_columns'], - can_use_column=can_produce_column) - targets = [] - if target_column_indices: - targets = data.select_columns(target_column_indices) - target_column_names = [] - for idx in target_column_indices: - target_column_names.append(data.columns[idx]) - return targets, target_column_names, target_column_indices - - @classmethod - def _get_target_columns_metadata(cls, outputs_metadata: metadata_base.DataMetadata, hyperparams) -> List[OrderedDict]: - outputs_length = outputs_metadata.query((metadata_base.ALL_ELEMENTS,))['dimension']['length'] - - target_columns_metadata: List[OrderedDict] = [] - for column_index in range(outputs_length): - column_metadata = OrderedDict(outputs_metadata.query_column(column_index)) - - # Update semantic types and prepare it for predicted targets. - semantic_types = set(column_metadata.get('semantic_types', [])) - semantic_types_to_remove = set(["https://metadata.datadrivendiscovery.org/types/TrueTarget","https://metadata.datadrivendiscovery.org/types/SuggestedTarget",]) - add_semantic_types = set(["https://metadata.datadrivendiscovery.org/types/PredictedTarget",]) - add_semantic_types.add(hyperparams["return_semantic_type"]) - semantic_types = semantic_types - semantic_types_to_remove - semantic_types = semantic_types.union(add_semantic_types) - column_metadata['semantic_types'] = list(semantic_types) - - target_columns_metadata.append(column_metadata) - - return target_columns_metadata - - @classmethod - def _update_predictions_metadata(cls, inputs_metadata: metadata_base.DataMetadata, outputs: Optional[Outputs], - target_columns_metadata: List[OrderedDict]) -> metadata_base.DataMetadata: - outputs_metadata = metadata_base.DataMetadata().generate(value=outputs) - - for column_index, column_metadata in enumerate(target_columns_metadata): - column_metadata.pop("structural_type", None) - outputs_metadata = outputs_metadata.update_column(column_index, column_metadata) - - return outputs_metadata - - def _wrap_predictions(self, inputs: Inputs, predictions: ndarray) -> Outputs: - outputs = d3m_dataframe(predictions, generate_metadata=False) - outputs.metadata = self._update_predictions_metadata(inputs.metadata, outputs, self._target_columns_metadata) - return outputs - - - @classmethod - def _add_target_columns_metadata(cls, outputs_metadata: metadata_base.DataMetadata): - outputs_length = outputs_metadata.query((metadata_base.ALL_ELEMENTS,))['dimension']['length'] - - target_columns_metadata: List[OrderedDict] = [] - for column_index in range(outputs_length): - column_metadata = OrderedDict() - semantic_types = [] - semantic_types.append('https://metadata.datadrivendiscovery.org/types/PredictedTarget') - column_name = outputs_metadata.query((metadata_base.ALL_ELEMENTS, column_index)).get("name") - if column_name is None: - column_name = "output_{}".format(column_index) - column_metadata["semantic_types"] = semantic_types - column_metadata["name"] = str(column_name) - target_columns_metadata.append(column_metadata) - - return target_columns_metadata - - -SKLogisticRegression.__doc__ = LogisticRegression.__doc__ \ No newline at end of file diff --git a/common-primitives/sklearn-wrap/sklearn_wrap/SKMLPClassifier.py b/common-primitives/sklearn-wrap/sklearn_wrap/SKMLPClassifier.py deleted file mode 100644 index c0acbcd..0000000 --- a/common-primitives/sklearn-wrap/sklearn_wrap/SKMLPClassifier.py +++ /dev/null @@ -1,730 +0,0 @@ -from typing import Any, Callable, List, Dict, Union, Optional, Sequence, Tuple -from numpy import ndarray -from collections import OrderedDict -from scipy import sparse -import os -import sklearn -import numpy -import typing - -# Custom import commands if any -from sklearn.neural_network.multilayer_perceptron import MLPClassifier - - -from d3m.container.numpy import ndarray as d3m_ndarray -from d3m.container import DataFrame as d3m_dataframe -from d3m.metadata import hyperparams, params, base as metadata_base -from d3m import utils -from d3m.base import utils as base_utils -from d3m.exceptions import PrimitiveNotFittedError -from d3m.primitive_interfaces.base import CallResult, DockerContainer - -from d3m.primitive_interfaces.supervised_learning import SupervisedLearnerPrimitiveBase -from d3m.primitive_interfaces.base import ProbabilisticCompositionalityMixin, ContinueFitMixin -from d3m import exceptions -import pandas - - - -Inputs = d3m_dataframe -Outputs = d3m_dataframe - - -class Params(params.Params): - classes_: Optional[ndarray] - loss_: Optional[float] - coefs_: Optional[Sequence[Any]] - intercepts_: Optional[Sequence[Any]] - n_iter_: Optional[int] - n_layers_: Optional[int] - n_outputs_: Optional[int] - out_activation_: Optional[str] - _best_coefs: Optional[Sequence[Any]] - _best_intercepts: Optional[Sequence[Any]] - _label_binarizer: Optional[sklearn.preprocessing.LabelBinarizer] - _no_improvement_count: Optional[int] - _random_state: Optional[numpy.random.mtrand.RandomState] - best_validation_score_: Optional[numpy.float64] - loss_curve_: Optional[Sequence[Any]] - t_: Optional[int] - _optimizer: Optional[sklearn.neural_network._stochastic_optimizers.AdamOptimizer] - validation_scores_: Optional[Sequence[Any]] - input_column_names: Optional[Any] - target_names_: Optional[Sequence[Any]] - training_indices_: Optional[Sequence[int]] - target_column_indices_: Optional[Sequence[int]] - target_columns_metadata_: Optional[List[OrderedDict]] - - - -class Hyperparams(hyperparams.Hyperparams): - hidden_layer_sizes = hyperparams.List( - elements=hyperparams.Bounded(1, None, 100), - default=(100, ), - min_size=1, - max_size=None, - description='The ith element represents the number of neurons in the ith hidden layer.', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] - ) - activation = hyperparams.Enumeration[str]( - values=['identity', 'logistic', 'tanh', 'relu'], - default='relu', - description='Activation function for the hidden layer. - \'identity\', no-op activation, useful to implement linear bottleneck, returns f(x) = x - \'logistic\', the logistic sigmoid function, returns f(x) = 1 / (1 + exp(-x)). - \'tanh\', the hyperbolic tan function, returns f(x) = tanh(x). - \'relu\', the rectified linear unit function, returns f(x) = max(0, x)', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] - ) - solver = hyperparams.Choice( - choices={ - 'lbfgs': hyperparams.Hyperparams.define( - configuration=OrderedDict({}) - ), - 'sgd': hyperparams.Hyperparams.define( - configuration=OrderedDict({ - 'learning_rate': hyperparams.Enumeration[str]( - values=['constant', 'invscaling', 'adaptive'], - default='constant', - description='Learning rate schedule for weight updates. Only used when solver=’sgd’.', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] - ), - 'learning_rate_init': hyperparams.Bounded[float]( - lower=0, - upper=None, - default=0.001, - description='The initial learning rate used. It controls the step-size in updating the weights. Only used when solver=’sgd’ or ‘adam’.', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] - ), - 'power_t': hyperparams.Bounded[float]( - lower=0, - upper=None, - default=0.5, - description='The exponent for inverse scaling learning rate. Only used when solver=’sgd’.', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] - ), - 'shuffle': hyperparams.UniformBool( - default=True, - description='Whether to shuffle samples in each iteration. Only used when solver=’sgd’ or ‘adam’.', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] - ), - 'momentum': hyperparams.Bounded[float]( - default=0.9, - lower=0, - upper=1, - description='Momentum for gradient descent update. Should be between 0 and 1. Only used when solver=’sgd’.', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] - ), - 'nesterovs_momentum': hyperparams.UniformBool( - default=True, - description='Whether to use Nesterov’s momentum. Only used when solver=’sgd’ and momentum > 0.', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] - ), - 'early_stopping': hyperparams.UniformBool( - default=False, - description='Whether to use early stopping to terminate training when validation score is not improving.If set to true, it will automatically set aside 10% of training data as validation and terminate training when validation score is not improving by at least tol for n_iter_no_change consecutive epochs.', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] - ), - 'n_iter_no_change': hyperparams.Bounded[int]( - default=10, - lower=1, - upper=None, - description='Maximum number of epochs to not meet tol improvement. Only effective when solver=’sgd’ or ‘adam’.', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] - ) - }) - ), - 'adam': hyperparams.Hyperparams.define( - configuration=OrderedDict({ - 'learning_rate_init': hyperparams.Bounded[float]( - lower=0, - upper=None, - default=0.001, - description='The initial learning rate used. It controls the step-size in updating the weights. Only used when solver=’sgd’ or ‘adam’.', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] - ), - 'shuffle': hyperparams.UniformBool( - default=True, - description='Whether to shuffle samples in each iteration. Only used when solver=’sgd’ or ‘adam’.', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] - ), - 'early_stopping': hyperparams.UniformBool( - default=False, - description='Whether to use early stopping to terminate training when validation score is not improving.If set to true, it will automatically set aside 10% of training data as validation and terminate training when validation score is not improving by at least tol for n_iter_no_change consecutive epochs.', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] - ), - 'beta_1': hyperparams.Bounded[float]( - default=0.9, - lower=0, - upper=1, - description='Exponential decay rate for estimates of first moment vector in adam, should be in [0, 1).', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] - ), - 'beta_2': hyperparams.Bounded[float]( - default=0.999, - lower=0, - upper=1, - description='Exponential decay rate for estimates of second moment vector in adam, should be in [0, 1).', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] - ), - 'epsilon': hyperparams.Bounded[float]( - default=1e-08, - lower=0, - upper=None, - description='Value for numerical stability in adam. Only used when solver=’adam’', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] - ), - 'n_iter_no_change': hyperparams.Bounded[int]( - default=10, - lower=1, - upper=None, - description='Maximum number of epochs to not meet tol improvement. Only effective when solver=’sgd’ or ‘adam’.', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] - ) - }) - ) - }, - default='adam', - description='The solver for weight optimization. - \'lbfgs\' is an optimizer in the family of quasi-Newton methods. - \'sgd\' refers to stochastic gradient descent. - \'adam\' refers to a stochastic gradient-based optimizer proposed by Kingma, Diederik, and Jimmy Ba Note: The default solver \'adam\' works pretty well on relatively large datasets (with thousands of training samples or more) in terms of both training time and validation score. For small datasets, however, \'lbfgs\' can converge faster and perform better.', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] - ) - alpha = hyperparams.Bounded[float]( - lower=0, - upper=None, - default=0.0001, - description='L2 penalty (regularization term) parameter.', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] - ) - batch_size = hyperparams.Union( - configuration=OrderedDict({ - 'int': hyperparams.Bounded[int]( - lower=0, - upper=None, - default=16, - description='Size of minibatches for stochastic optimizers. If the solver is ‘lbfgs’, the classifier will not use minibatch', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'], - ), - 'auto': hyperparams.Constant( - default='auto', - description='When set to “auto”, batch_size=min(200, n_samples)', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'], - ) - }), - default='auto', - description='Size of minibatches for stochastic optimizers. If the solver is \'lbfgs\', the classifier will not use minibatch. When set to "auto", `batch_size=min(200, n_samples)`', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] - ) - max_iter = hyperparams.Bounded[float]( - lower=0, - upper=None, - default=200, - description='Maximum number of iterations. The solver iterates until convergence (determined by \'tol\') or this number of iterations. For stochastic solvers (\'sgd\', \'adam\'), note that this determines the number of epochs (how many times each data point will be used), not the number of gradient steps.', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] - ) - tol = hyperparams.Bounded[float]( - default=0.0001, - lower=0, - upper=None, - description='Tolerance for the optimization. When the loss or score is not improving by at least ``tol`` for ``n_iter_no_change`` consecutive iterations, unless ``learning_rate`` is set to \'adaptive\', convergence is considered to be reached and training stops.', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] - ) - validation_fraction = hyperparams.Bounded[float]( - default=0.1, - lower=0, - upper=None, - description='The proportion of training data to set aside as validation set for early stopping. Must be between 0 and 1. Only used if early_stopping is True', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] - ) - warm_start = hyperparams.UniformBool( - default=False, - description='When set to True, reuse the solution of the previous call to fit as initialization, otherwise, just erase the previous solution. See :term:`the Glossary `.', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] - ) - - use_inputs_columns = hyperparams.Set( - elements=hyperparams.Hyperparameter[int](-1), - default=(), - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="A set of column indices to force primitive to use as training input. If any specified column cannot be parsed, it is skipped.", - ) - use_outputs_columns = hyperparams.Set( - elements=hyperparams.Hyperparameter[int](-1), - default=(), - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="A set of column indices to force primitive to use as training target. If any specified column cannot be parsed, it is skipped.", - ) - exclude_inputs_columns = hyperparams.Set( - elements=hyperparams.Hyperparameter[int](-1), - default=(), - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="A set of column indices to not use as training inputs. Applicable only if \"use_columns\" is not provided.", - ) - exclude_outputs_columns = hyperparams.Set( - elements=hyperparams.Hyperparameter[int](-1), - default=(), - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="A set of column indices to not use as training target. Applicable only if \"use_columns\" is not provided.", - ) - return_result = hyperparams.Enumeration( - values=['append', 'replace', 'new'], - default='new', - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="Should parsed columns be appended, should they replace original columns, or should only parsed columns be returned? This hyperparam is ignored if use_semantic_types is set to false.", - ) - use_semantic_types = hyperparams.UniformBool( - default=False, - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="Controls whether semantic_types metadata will be used for filtering columns in input dataframe. Setting this to false makes the code ignore return_result and will produce only the output dataframe" - ) - add_index_columns = hyperparams.UniformBool( - default=False, - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="Also include primary index columns if input data has them. Applicable only if \"return_result\" is set to \"new\".", - ) - error_on_no_input = hyperparams.UniformBool( - default=True, - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="Throw an exception if no input column is selected/provided. Defaults to true to behave like sklearn. To prevent pipelines from breaking set this to False.", - ) - - return_semantic_type = hyperparams.Enumeration[str]( - values=['https://metadata.datadrivendiscovery.org/types/Attribute', 'https://metadata.datadrivendiscovery.org/types/ConstructedAttribute', 'https://metadata.datadrivendiscovery.org/types/PredictedTarget'], - default='https://metadata.datadrivendiscovery.org/types/PredictedTarget', - description='Decides what semantic type to attach to generated output', - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'] - ) - -class SKMLPClassifier(SupervisedLearnerPrimitiveBase[Inputs, Outputs, Params, Hyperparams], - ProbabilisticCompositionalityMixin[Inputs, Outputs, Params, Hyperparams]): - """ - Primitive wrapping for sklearn MLPClassifier - `sklearn documentation `_ - - """ - - __author__ = "JPL MARVIN" - metadata = metadata_base.PrimitiveMetadata({ - "algorithm_types": [metadata_base.PrimitiveAlgorithmType.MULTILAYER_PERCEPTRON, ], - "name": "sklearn.neural_network.multilayer_perceptron.MLPClassifier", - "primitive_family": metadata_base.PrimitiveFamily.CLASSIFICATION, - "python_path": "d3m.primitives.classification.mlp.SKlearn", - "source": {'name': 'JPL', 'contact': 'mailto:shah@jpl.nasa.gov', 'uris': ['https://gitlab.com/datadrivendiscovery/sklearn-wrap/issues', 'https://scikit-learn.org/stable/modules/generated/sklearn.neural_network.MLPClassifier.html']}, - "version": "2019.11.13", - "id": "89d7ffbd-df5d-352f-a038-311b7d379cd0", - "hyperparams_to_tune": ['hidden_layer_sizes', 'activation', 'solver', 'alpha'], - 'installation': [ - {'type': metadata_base.PrimitiveInstallationType.PIP, - 'package_uri': 'git+https://gitlab.com/datadrivendiscovery/sklearn-wrap.git@{git_commit}#egg=sklearn_wrap'.format( - git_commit=utils.current_git_commit(os.path.dirname(__file__)), - ), - }] - }) - - def __init__(self, *, - hyperparams: Hyperparams, - random_seed: int = 0, - docker_containers: Dict[str, DockerContainer] = None, - _verbose: bool = None) -> None: - - super().__init__(hyperparams=hyperparams, random_seed=random_seed, docker_containers=docker_containers) - - # False - self._clf = MLPClassifier( - hidden_layer_sizes=self.hyperparams['hidden_layer_sizes'], - activation=self.hyperparams['activation'], - solver=self.hyperparams['solver']['choice'], - learning_rate=self.hyperparams['solver'].get('learning_rate', 'constant'), - learning_rate_init=self.hyperparams['solver'].get('learning_rate_init', 0.001), - power_t=self.hyperparams['solver'].get('power_t', 0.5), - shuffle=self.hyperparams['solver'].get('shuffle', True), - momentum=self.hyperparams['solver'].get('momentum', 0.9), - nesterovs_momentum=self.hyperparams['solver'].get('nesterovs_momentum', True), - early_stopping=self.hyperparams['solver'].get('early_stopping', False), - beta_1=self.hyperparams['solver'].get('beta_1', 0.9), - beta_2=self.hyperparams['solver'].get('beta_2', 0.999), - epsilon=self.hyperparams['solver'].get('epsilon', 1e-08), - n_iter_no_change=self.hyperparams['solver'].get('n_iter_no_change', 10), - alpha=self.hyperparams['alpha'], - batch_size=self.hyperparams['batch_size'], - max_iter=self.hyperparams['max_iter'], - tol=self.hyperparams['tol'], - validation_fraction=self.hyperparams['validation_fraction'], - warm_start=self.hyperparams['warm_start'], - random_state=self.random_seed, - verbose=_verbose - ) - - self._inputs = None - self._outputs = None - self._training_inputs = None - self._training_outputs = None - self._target_names = None - self._training_indices = None - self._target_column_indices = None - self._target_columns_metadata: List[OrderedDict] = None - self._input_column_names = None - self._fitted = False - self._new_training_data = False - - def set_training_data(self, *, inputs: Inputs, outputs: Outputs) -> None: - self._inputs = inputs - self._outputs = outputs - self._fitted = False - self._new_training_data = True - - def fit(self, *, timeout: float = None, iterations: int = None) -> CallResult[None]: - if self._inputs is None or self._outputs is None: - raise ValueError("Missing training data.") - - if not self._new_training_data: - return CallResult(None) - self._new_training_data = False - - self._training_inputs, self._training_indices = self._get_columns_to_fit(self._inputs, self.hyperparams) - self._training_outputs, self._target_names, self._target_column_indices = self._get_targets(self._outputs, self.hyperparams) - self._input_column_names = self._training_inputs.columns - - if len(self._training_indices) > 0 and len(self._target_column_indices) > 0: - self._target_columns_metadata = self._get_target_columns_metadata(self._training_outputs.metadata, self.hyperparams) - sk_training_output = self._training_outputs.values - - shape = sk_training_output.shape - if len(shape) == 2 and shape[1] == 1: - sk_training_output = numpy.ravel(sk_training_output) - - self._clf.fit(self._training_inputs, sk_training_output) - self._fitted = True - else: - if self.hyperparams['error_on_no_input']: - raise RuntimeError("No input columns were selected") - self.logger.warn("No input columns were selected") - - return CallResult(None) - - - - def produce(self, *, inputs: Inputs, timeout: float = None, iterations: int = None) -> CallResult[Outputs]: - sk_inputs, columns_to_use = self._get_columns_to_fit(inputs, self.hyperparams) - output = [] - if len(sk_inputs.columns): - try: - sk_output = self._clf.predict(sk_inputs) - except sklearn.exceptions.NotFittedError as error: - raise PrimitiveNotFittedError("Primitive not fitted.") from error - # For primitives that allow predicting without fitting like GaussianProcessRegressor - if not self._fitted: - raise PrimitiveNotFittedError("Primitive not fitted.") - if sparse.issparse(sk_output): - sk_output = sk_output.toarray() - output = self._wrap_predictions(inputs, sk_output) - output.columns = self._target_names - output = [output] - else: - if self.hyperparams['error_on_no_input']: - raise RuntimeError("No input columns were selected") - self.logger.warn("No input columns were selected") - outputs = base_utils.combine_columns(return_result=self.hyperparams['return_result'], - add_index_columns=self.hyperparams['add_index_columns'], - inputs=inputs, column_indices=self._target_column_indices, - columns_list=output) - - return CallResult(outputs) - - - def get_params(self) -> Params: - if not self._fitted: - return Params( - classes_=None, - loss_=None, - coefs_=None, - intercepts_=None, - n_iter_=None, - n_layers_=None, - n_outputs_=None, - out_activation_=None, - _best_coefs=None, - _best_intercepts=None, - _label_binarizer=None, - _no_improvement_count=None, - _random_state=None, - best_validation_score_=None, - loss_curve_=None, - t_=None, - _optimizer=None, - validation_scores_=None, - input_column_names=self._input_column_names, - training_indices_=self._training_indices, - target_names_=self._target_names, - target_column_indices_=self._target_column_indices, - target_columns_metadata_=self._target_columns_metadata - ) - - return Params( - classes_=getattr(self._clf, 'classes_', None), - loss_=getattr(self._clf, 'loss_', None), - coefs_=getattr(self._clf, 'coefs_', None), - intercepts_=getattr(self._clf, 'intercepts_', None), - n_iter_=getattr(self._clf, 'n_iter_', None), - n_layers_=getattr(self._clf, 'n_layers_', None), - n_outputs_=getattr(self._clf, 'n_outputs_', None), - out_activation_=getattr(self._clf, 'out_activation_', None), - _best_coefs=getattr(self._clf, '_best_coefs', None), - _best_intercepts=getattr(self._clf, '_best_intercepts', None), - _label_binarizer=getattr(self._clf, '_label_binarizer', None), - _no_improvement_count=getattr(self._clf, '_no_improvement_count', None), - _random_state=getattr(self._clf, '_random_state', None), - best_validation_score_=getattr(self._clf, 'best_validation_score_', None), - loss_curve_=getattr(self._clf, 'loss_curve_', None), - t_=getattr(self._clf, 't_', None), - _optimizer=getattr(self._clf, '_optimizer', None), - validation_scores_=getattr(self._clf, 'validation_scores_', None), - input_column_names=self._input_column_names, - training_indices_=self._training_indices, - target_names_=self._target_names, - target_column_indices_=self._target_column_indices, - target_columns_metadata_=self._target_columns_metadata - ) - - def set_params(self, *, params: Params) -> None: - self._clf.classes_ = params['classes_'] - self._clf.loss_ = params['loss_'] - self._clf.coefs_ = params['coefs_'] - self._clf.intercepts_ = params['intercepts_'] - self._clf.n_iter_ = params['n_iter_'] - self._clf.n_layers_ = params['n_layers_'] - self._clf.n_outputs_ = params['n_outputs_'] - self._clf.out_activation_ = params['out_activation_'] - self._clf._best_coefs = params['_best_coefs'] - self._clf._best_intercepts = params['_best_intercepts'] - self._clf._label_binarizer = params['_label_binarizer'] - self._clf._no_improvement_count = params['_no_improvement_count'] - self._clf._random_state = params['_random_state'] - self._clf.best_validation_score_ = params['best_validation_score_'] - self._clf.loss_curve_ = params['loss_curve_'] - self._clf.t_ = params['t_'] - self._clf._optimizer = params['_optimizer'] - self._clf.validation_scores_ = params['validation_scores_'] - self._input_column_names = params['input_column_names'] - self._training_indices = params['training_indices_'] - self._target_names = params['target_names_'] - self._target_column_indices = params['target_column_indices_'] - self._target_columns_metadata = params['target_columns_metadata_'] - - if params['classes_'] is not None: - self._fitted = True - if params['loss_'] is not None: - self._fitted = True - if params['coefs_'] is not None: - self._fitted = True - if params['intercepts_'] is not None: - self._fitted = True - if params['n_iter_'] is not None: - self._fitted = True - if params['n_layers_'] is not None: - self._fitted = True - if params['n_outputs_'] is not None: - self._fitted = True - if params['out_activation_'] is not None: - self._fitted = True - if params['_best_coefs'] is not None: - self._fitted = True - if params['_best_intercepts'] is not None: - self._fitted = True - if params['_label_binarizer'] is not None: - self._fitted = True - if params['_no_improvement_count'] is not None: - self._fitted = True - if params['_random_state'] is not None: - self._fitted = True - if params['best_validation_score_'] is not None: - self._fitted = True - if params['loss_curve_'] is not None: - self._fitted = True - if params['t_'] is not None: - self._fitted = True - if params['_optimizer'] is not None: - self._fitted = True - if params['validation_scores_'] is not None: - self._fitted = True - - - def log_likelihoods(self, *, - outputs: Outputs, - inputs: Inputs, - timeout: float = None, - iterations: int = None) -> CallResult[Sequence[float]]: - inputs = inputs.iloc[:, self._training_indices] # Get ndarray - outputs = outputs.iloc[:, self._target_column_indices] - - if len(inputs.columns) and len(outputs.columns): - - if outputs.shape[1] != self._clf.n_outputs_: - raise exceptions.InvalidArgumentValueError("\"outputs\" argument does not have the correct number of target columns.") - - log_proba = self._clf.predict_log_proba(inputs) - - # Making it always a list, even when only one target. - if self._clf.n_outputs_ == 1: - log_proba = [log_proba] - classes = [self._clf.classes_] - else: - classes = self._clf.classes_ - - samples_length = inputs.shape[0] - - log_likelihoods = [] - for k in range(self._clf.n_outputs_): - # We have to map each class to its internal (numerical) index used in the learner. - # This allows "outputs" to contain string classes. - outputs_column = outputs.iloc[:, k] - classes_map = pandas.Series(numpy.arange(len(classes[k])), index=classes[k]) - mapped_outputs_column = outputs_column.map(classes_map) - - # For each target column (column in "outputs"), for each sample (row) we pick the log - # likelihood for a given class. - log_likelihoods.append(log_proba[k][numpy.arange(samples_length), mapped_outputs_column]) - - results = d3m_dataframe(dict(enumerate(log_likelihoods)), generate_metadata=True) - results.columns = outputs.columns - - for k in range(self._clf.n_outputs_): - column_metadata = outputs.metadata.query_column(k) - if 'name' in column_metadata: - results.metadata = results.metadata.update_column(k, {'name': column_metadata['name']}) - - else: - results = d3m_dataframe(generate_metadata=True) - - return CallResult(results) - - - - - - @classmethod - def _get_columns_to_fit(cls, inputs: Inputs, hyperparams: Hyperparams): - if not hyperparams['use_semantic_types']: - return inputs, list(range(len(inputs.columns))) - - inputs_metadata = inputs.metadata - - def can_produce_column(column_index: int) -> bool: - return cls._can_produce_column(inputs_metadata, column_index, hyperparams) - - columns_to_produce, columns_not_to_produce = base_utils.get_columns_to_use(inputs_metadata, - use_columns=hyperparams['use_inputs_columns'], - exclude_columns=hyperparams['exclude_inputs_columns'], - can_use_column=can_produce_column) - return inputs.iloc[:, columns_to_produce], columns_to_produce - # return columns_to_produce - - @classmethod - def _can_produce_column(cls, inputs_metadata: metadata_base.DataMetadata, column_index: int, hyperparams: Hyperparams) -> bool: - column_metadata = inputs_metadata.query((metadata_base.ALL_ELEMENTS, column_index)) - - accepted_structural_types = (int, float, numpy.integer, numpy.float64) - accepted_semantic_types = set() - accepted_semantic_types.add("https://metadata.datadrivendiscovery.org/types/Attribute") - if not issubclass(column_metadata['structural_type'], accepted_structural_types): - return False - - semantic_types = set(column_metadata.get('semantic_types', [])) - - if len(semantic_types) == 0: - cls.logger.warning("No semantic types found in column metadata") - return False - # Making sure all accepted_semantic_types are available in semantic_types - if len(accepted_semantic_types - semantic_types) == 0: - return True - - return False - - @classmethod - def _get_targets(cls, data: d3m_dataframe, hyperparams: Hyperparams): - if not hyperparams['use_semantic_types']: - return data, list(data.columns), list(range(len(data.columns))) - - metadata = data.metadata - - def can_produce_column(column_index: int) -> bool: - accepted_semantic_types = set() - accepted_semantic_types.add("https://metadata.datadrivendiscovery.org/types/TrueTarget") - column_metadata = metadata.query((metadata_base.ALL_ELEMENTS, column_index)) - semantic_types = set(column_metadata.get('semantic_types', [])) - if len(semantic_types) == 0: - cls.logger.warning("No semantic types found in column metadata") - return False - # Making sure all accepted_semantic_types are available in semantic_types - if len(accepted_semantic_types - semantic_types) == 0: - return True - return False - - target_column_indices, target_columns_not_to_produce = base_utils.get_columns_to_use(metadata, - use_columns=hyperparams[ - 'use_outputs_columns'], - exclude_columns= - hyperparams[ - 'exclude_outputs_columns'], - can_use_column=can_produce_column) - targets = [] - if target_column_indices: - targets = data.select_columns(target_column_indices) - target_column_names = [] - for idx in target_column_indices: - target_column_names.append(data.columns[idx]) - return targets, target_column_names, target_column_indices - - @classmethod - def _get_target_columns_metadata(cls, outputs_metadata: metadata_base.DataMetadata, hyperparams) -> List[OrderedDict]: - outputs_length = outputs_metadata.query((metadata_base.ALL_ELEMENTS,))['dimension']['length'] - - target_columns_metadata: List[OrderedDict] = [] - for column_index in range(outputs_length): - column_metadata = OrderedDict(outputs_metadata.query_column(column_index)) - - # Update semantic types and prepare it for predicted targets. - semantic_types = set(column_metadata.get('semantic_types', [])) - semantic_types_to_remove = set(["https://metadata.datadrivendiscovery.org/types/TrueTarget","https://metadata.datadrivendiscovery.org/types/SuggestedTarget",]) - add_semantic_types = set(["https://metadata.datadrivendiscovery.org/types/PredictedTarget",]) - add_semantic_types.add(hyperparams["return_semantic_type"]) - semantic_types = semantic_types - semantic_types_to_remove - semantic_types = semantic_types.union(add_semantic_types) - column_metadata['semantic_types'] = list(semantic_types) - - target_columns_metadata.append(column_metadata) - - return target_columns_metadata - - @classmethod - def _update_predictions_metadata(cls, inputs_metadata: metadata_base.DataMetadata, outputs: Optional[Outputs], - target_columns_metadata: List[OrderedDict]) -> metadata_base.DataMetadata: - outputs_metadata = metadata_base.DataMetadata().generate(value=outputs) - - for column_index, column_metadata in enumerate(target_columns_metadata): - column_metadata.pop("structural_type", None) - outputs_metadata = outputs_metadata.update_column(column_index, column_metadata) - - return outputs_metadata - - def _wrap_predictions(self, inputs: Inputs, predictions: ndarray) -> Outputs: - outputs = d3m_dataframe(predictions, generate_metadata=False) - outputs.metadata = self._update_predictions_metadata(inputs.metadata, outputs, self._target_columns_metadata) - return outputs - - - @classmethod - def _add_target_columns_metadata(cls, outputs_metadata: metadata_base.DataMetadata): - outputs_length = outputs_metadata.query((metadata_base.ALL_ELEMENTS,))['dimension']['length'] - - target_columns_metadata: List[OrderedDict] = [] - for column_index in range(outputs_length): - column_metadata = OrderedDict() - semantic_types = [] - semantic_types.append('https://metadata.datadrivendiscovery.org/types/PredictedTarget') - column_name = outputs_metadata.query((metadata_base.ALL_ELEMENTS, column_index)).get("name") - if column_name is None: - column_name = "output_{}".format(column_index) - column_metadata["semantic_types"] = semantic_types - column_metadata["name"] = str(column_name) - target_columns_metadata.append(column_metadata) - - return target_columns_metadata - - -SKMLPClassifier.__doc__ = MLPClassifier.__doc__ \ No newline at end of file diff --git a/common-primitives/sklearn-wrap/sklearn_wrap/SKMLPRegressor.py b/common-primitives/sklearn-wrap/sklearn_wrap/SKMLPRegressor.py deleted file mode 100644 index df6b0e9..0000000 --- a/common-primitives/sklearn-wrap/sklearn_wrap/SKMLPRegressor.py +++ /dev/null @@ -1,669 +0,0 @@ -from typing import Any, Callable, List, Dict, Union, Optional, Sequence, Tuple -from numpy import ndarray -from collections import OrderedDict -from scipy import sparse -import os -import sklearn -import numpy -import typing - -# Custom import commands if any -from sklearn.neural_network.multilayer_perceptron import MLPRegressor - - -from d3m.container.numpy import ndarray as d3m_ndarray -from d3m.container import DataFrame as d3m_dataframe -from d3m.metadata import hyperparams, params, base as metadata_base -from d3m import utils -from d3m.base import utils as base_utils -from d3m.exceptions import PrimitiveNotFittedError -from d3m.primitive_interfaces.base import CallResult, DockerContainer - -from d3m.primitive_interfaces.supervised_learning import SupervisedLearnerPrimitiveBase -from d3m.primitive_interfaces.base import ProbabilisticCompositionalityMixin, ContinueFitMixin -from d3m import exceptions -import pandas - - - -Inputs = d3m_dataframe -Outputs = d3m_dataframe - - -class Params(params.Params): - loss_: Optional[float] - coefs_: Optional[Sequence[Any]] - intercepts_: Optional[Sequence[Any]] - n_iter_: Optional[int] - n_layers_: Optional[int] - n_outputs_: Optional[int] - out_activation_: Optional[str] - _best_coefs: Optional[Sequence[Any]] - _best_intercepts: Optional[Sequence[Any]] - _no_improvement_count: Optional[int] - _random_state: Optional[numpy.random.mtrand.RandomState] - best_validation_score_: Optional[numpy.float64] - loss_curve_: Optional[Sequence[Any]] - t_: Optional[int] - _optimizer: Optional[sklearn.neural_network._stochastic_optimizers.AdamOptimizer] - validation_scores_: Optional[Sequence[Any]] - input_column_names: Optional[Any] - target_names_: Optional[Sequence[Any]] - training_indices_: Optional[Sequence[int]] - target_column_indices_: Optional[Sequence[int]] - target_columns_metadata_: Optional[List[OrderedDict]] - - - -class Hyperparams(hyperparams.Hyperparams): - hidden_layer_sizes = hyperparams.List( - elements=hyperparams.Bounded(1, None, 100), - default=(100, ), - min_size=1, - max_size=None, - description='The ith element represents the number of neurons in the ith hidden layer.', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] - ) - activation = hyperparams.Enumeration[str]( - values=['identity', 'logistic', 'tanh', 'relu'], - default='relu', - description='Activation function for the hidden layer. - \'identity\', no-op activation, useful to implement linear bottleneck, returns f(x) = x - \'logistic\', the logistic sigmoid function, returns f(x) = 1 / (1 + exp(-x)). - \'tanh\', the hyperbolic tan function, returns f(x) = tanh(x). - \'relu\', the rectified linear unit function, returns f(x) = max(0, x)', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] - ) - solver = hyperparams.Choice( - choices={ - 'lbfgs': hyperparams.Hyperparams.define( - configuration=OrderedDict({}) - ), - 'sgd': hyperparams.Hyperparams.define( - configuration=OrderedDict({ - 'learning_rate': hyperparams.Enumeration[str]( - values=['constant', 'invscaling', 'adaptive'], - default='constant', - description='Learning rate schedule for weight updates. Only used when solver=’sgd’.', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] - ), - 'learning_rate_init': hyperparams.Bounded[float]( - lower=0, - upper=None, - default=0.001, - description='The initial learning rate used. It controls the step-size in updating the weights. Only used when solver=’sgd’ or ‘adam’.', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] - ), - 'power_t': hyperparams.Bounded[float]( - lower=0, - upper=None, - default=0.5, - description='The exponent for inverse scaling learning rate. Only used when solver=’sgd’.', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] - ), - 'shuffle': hyperparams.UniformBool( - default=True, - description='Whether to shuffle samples in each iteration. Only used when solver=’sgd’ or ‘adam’.', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] - ), - 'momentum': hyperparams.Bounded[float]( - default=0.9, - lower=0, - upper=1, - description='Momentum for gradient descent update. Should be between 0 and 1. Only used when solver=’sgd’.', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] - ), - 'nesterovs_momentum': hyperparams.UniformBool( - default=True, - description='Whether to use Nesterov’s momentum. Only used when solver=’sgd’ and momentum > 0.', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] - ), - 'early_stopping': hyperparams.UniformBool( - default=False, - description='Whether to use early stopping to terminate training when validation score is not improving.If set to true, it will automatically set aside 10% of training data as validation and terminate training when validation score is not improving by at least tol for n_iter_no_change consecutive epochs.', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] - ), - 'n_iter_no_change': hyperparams.Bounded[int]( - default=10, - lower=1, - upper=None, - description='Maximum number of epochs to not meet tol improvement. Only effective when solver=’sgd’ or ‘adam’.', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] - ) - }) - ), - 'adam': hyperparams.Hyperparams.define( - configuration=OrderedDict({ - 'learning_rate_init': hyperparams.Bounded[float]( - lower=0, - upper=None, - default=0.001, - description='The initial learning rate used. It controls the step-size in updating the weights. Only used when solver=’sgd’ or ‘adam’.', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] - ), - 'shuffle': hyperparams.UniformBool( - default=True, - description='Whether to shuffle samples in each iteration. Only used when solver=’sgd’ or ‘adam’.', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] - ), - 'early_stopping': hyperparams.UniformBool( - default=False, - description='Whether to use early stopping to terminate training when validation score is not improving.If set to true, it will automatically set aside 10% of training data as validation and terminate training when validation score is not improving by at least tol for n_iter_no_change consecutive epochs.', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] - ), - 'beta_1': hyperparams.Bounded[float]( - default=0.9, - lower=0, - upper=1, - description='Exponential decay rate for estimates of first moment vector in adam, should be in [0, 1).', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] - ), - 'beta_2': hyperparams.Bounded[float]( - default=0.999, - lower=0, - upper=1, - description='Exponential decay rate for estimates of second moment vector in adam, should be in [0, 1).', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] - ), - 'epsilon': hyperparams.Bounded[float]( - default=1e-08, - lower=0, - upper=None, - description='Value for numerical stability in adam. Only used when solver=’adam’', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] - ), - 'n_iter_no_change': hyperparams.Bounded[int]( - default=10, - lower=1, - upper=None, - description='Maximum number of epochs to not meet tol improvement. Only effective when solver=’sgd’ or ‘adam’.', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] - ) - }) - ) - }, - default='adam', - description='The solver for weight optimization. - \'lbfgs\' is an optimizer in the family of quasi-Newton methods. - \'sgd\' refers to stochastic gradient descent. - \'adam\' refers to a stochastic gradient-based optimizer proposed by Kingma, Diederik, and Jimmy Ba Note: The default solver \'adam\' works pretty well on relatively large datasets (with thousands of training samples or more) in terms of both training time and validation score. For small datasets, however, \'lbfgs\' can converge faster and perform better.', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] - ) - alpha = hyperparams.Bounded[float]( - lower=0, - upper=None, - default=0.0001, - description='L2 penalty (regularization term) parameter.', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] - ) - batch_size = hyperparams.Union( - configuration=OrderedDict({ - 'int': hyperparams.Bounded[int]( - lower=0, - upper=None, - default=16, - description='Size of minibatches for stochastic optimizers. If the solver is \'lbfgs\', the classifier will not use minibatch', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'], - ), - 'auto': hyperparams.Constant( - default='auto', - description='When set to \'auto\', batch_size=min(200, n_samples)', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'], - ) - }), - default='auto', - description='Size of minibatches for stochastic optimizers. If the solver is \'lbfgs\', the classifier will not use minibatch. When set to "auto", `batch_size=min(200, n_samples)`', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] - ) - max_iter = hyperparams.Bounded[float]( - lower=0, - upper=None, - default=200, - description='Maximum number of iterations. The solver iterates until convergence (determined by \'tol\') or this number of iterations. For stochastic solvers (\'sgd\', \'adam\'), note that this determines the number of epochs (how many times each data point will be used), not the number of gradient steps.', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] - ) - tol = hyperparams.Bounded[float]( - default=0.0001, - lower=0, - upper=None, - description='Tolerance for the optimization. When the loss or score is not improving by at least ``tol`` for ``n_iter_no_change`` consecutive iterations, unless ``learning_rate`` is set to \'adaptive\', convergence is considered to be reached and training stops.', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] - ) - warm_start = hyperparams.UniformBool( - default=False, - description='When set to True, reuse the solution of the previous call to fit as initialization, otherwise, just erase the previous solution. See :term:`the Glossary `.', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] - ) - validation_fraction = hyperparams.Bounded[float]( - default=0.1, - lower=0, - upper=None, - description='The proportion of training data to set aside as validation set for early stopping. Must be between 0 and 1. Only used if early_stopping is True', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] - ) - - use_inputs_columns = hyperparams.Set( - elements=hyperparams.Hyperparameter[int](-1), - default=(), - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="A set of column indices to force primitive to use as training input. If any specified column cannot be parsed, it is skipped.", - ) - use_outputs_columns = hyperparams.Set( - elements=hyperparams.Hyperparameter[int](-1), - default=(), - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="A set of column indices to force primitive to use as training target. If any specified column cannot be parsed, it is skipped.", - ) - exclude_inputs_columns = hyperparams.Set( - elements=hyperparams.Hyperparameter[int](-1), - default=(), - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="A set of column indices to not use as training inputs. Applicable only if \"use_columns\" is not provided.", - ) - exclude_outputs_columns = hyperparams.Set( - elements=hyperparams.Hyperparameter[int](-1), - default=(), - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="A set of column indices to not use as training target. Applicable only if \"use_columns\" is not provided.", - ) - return_result = hyperparams.Enumeration( - values=['append', 'replace', 'new'], - default='new', - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="Should parsed columns be appended, should they replace original columns, or should only parsed columns be returned? This hyperparam is ignored if use_semantic_types is set to false.", - ) - use_semantic_types = hyperparams.UniformBool( - default=False, - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="Controls whether semantic_types metadata will be used for filtering columns in input dataframe. Setting this to false makes the code ignore return_result and will produce only the output dataframe" - ) - add_index_columns = hyperparams.UniformBool( - default=False, - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="Also include primary index columns if input data has them. Applicable only if \"return_result\" is set to \"new\".", - ) - error_on_no_input = hyperparams.UniformBool( - default=True, - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="Throw an exception if no input column is selected/provided. Defaults to true to behave like sklearn. To prevent pipelines from breaking set this to False.", - ) - - return_semantic_type = hyperparams.Enumeration[str]( - values=['https://metadata.datadrivendiscovery.org/types/Attribute', 'https://metadata.datadrivendiscovery.org/types/ConstructedAttribute', 'https://metadata.datadrivendiscovery.org/types/PredictedTarget'], - default='https://metadata.datadrivendiscovery.org/types/PredictedTarget', - description='Decides what semantic type to attach to generated output', - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'] - ) - -class SKMLPRegressor(SupervisedLearnerPrimitiveBase[Inputs, Outputs, Params, Hyperparams]): - """ - Primitive wrapping for sklearn MLPRegressor - `sklearn documentation `_ - - """ - - __author__ = "JPL MARVIN" - metadata = metadata_base.PrimitiveMetadata({ - "algorithm_types": [metadata_base.PrimitiveAlgorithmType.MULTILAYER_PERCEPTRON, ], - "name": "sklearn.neural_network.multilayer_perceptron.MLPRegressor", - "primitive_family": metadata_base.PrimitiveFamily.REGRESSION, - "python_path": "d3m.primitives.regression.mlp.SKlearn", - "source": {'name': 'JPL', 'contact': 'mailto:shah@jpl.nasa.gov', 'uris': ['https://gitlab.com/datadrivendiscovery/sklearn-wrap/issues', 'https://scikit-learn.org/stable/modules/generated/sklearn.neural_network.MLPRegressor.html']}, - "version": "2019.11.13", - "id": "a4fedbf8-f69a-3440-9423-559291dfbd61", - "hyperparams_to_tune": ['hidden_layer_sizes', 'activation', 'solver', 'alpha'], - 'installation': [ - {'type': metadata_base.PrimitiveInstallationType.PIP, - 'package_uri': 'git+https://gitlab.com/datadrivendiscovery/sklearn-wrap.git@{git_commit}#egg=sklearn_wrap'.format( - git_commit=utils.current_git_commit(os.path.dirname(__file__)), - ), - }] - }) - - def __init__(self, *, - hyperparams: Hyperparams, - random_seed: int = 0, - docker_containers: Dict[str, DockerContainer] = None, - _verbose: bool = None) -> None: - - super().__init__(hyperparams=hyperparams, random_seed=random_seed, docker_containers=docker_containers) - - # False - self._clf = MLPRegressor( - hidden_layer_sizes=self.hyperparams['hidden_layer_sizes'], - activation=self.hyperparams['activation'], - solver=self.hyperparams['solver']['choice'], - learning_rate=self.hyperparams['solver'].get('learning_rate', 'constant'), - learning_rate_init=self.hyperparams['solver'].get('learning_rate_init', 0.001), - power_t=self.hyperparams['solver'].get('power_t', 0.5), - shuffle=self.hyperparams['solver'].get('shuffle', True), - momentum=self.hyperparams['solver'].get('momentum', 0.9), - nesterovs_momentum=self.hyperparams['solver'].get('nesterovs_momentum', True), - early_stopping=self.hyperparams['solver'].get('early_stopping', False), - beta_1=self.hyperparams['solver'].get('beta_1', 0.9), - beta_2=self.hyperparams['solver'].get('beta_2', 0.999), - epsilon=self.hyperparams['solver'].get('epsilon', 1e-08), - n_iter_no_change=self.hyperparams['solver'].get('n_iter_no_change', 10), - alpha=self.hyperparams['alpha'], - batch_size=self.hyperparams['batch_size'], - max_iter=self.hyperparams['max_iter'], - tol=self.hyperparams['tol'], - warm_start=self.hyperparams['warm_start'], - validation_fraction=self.hyperparams['validation_fraction'], - random_state=self.random_seed, - verbose=_verbose - ) - - self._inputs = None - self._outputs = None - self._training_inputs = None - self._training_outputs = None - self._target_names = None - self._training_indices = None - self._target_column_indices = None - self._target_columns_metadata: List[OrderedDict] = None - self._input_column_names = None - self._fitted = False - self._new_training_data = False - - def set_training_data(self, *, inputs: Inputs, outputs: Outputs) -> None: - self._inputs = inputs - self._outputs = outputs - self._fitted = False - self._new_training_data = True - - def fit(self, *, timeout: float = None, iterations: int = None) -> CallResult[None]: - if self._inputs is None or self._outputs is None: - raise ValueError("Missing training data.") - - if not self._new_training_data: - return CallResult(None) - self._new_training_data = False - - self._training_inputs, self._training_indices = self._get_columns_to_fit(self._inputs, self.hyperparams) - self._training_outputs, self._target_names, self._target_column_indices = self._get_targets(self._outputs, self.hyperparams) - self._input_column_names = self._training_inputs.columns - - if len(self._training_indices) > 0 and len(self._target_column_indices) > 0: - self._target_columns_metadata = self._get_target_columns_metadata(self._training_outputs.metadata, self.hyperparams) - sk_training_output = self._training_outputs.values - - shape = sk_training_output.shape - if len(shape) == 2 and shape[1] == 1: - sk_training_output = numpy.ravel(sk_training_output) - - self._clf.fit(self._training_inputs, sk_training_output) - self._fitted = True - else: - if self.hyperparams['error_on_no_input']: - raise RuntimeError("No input columns were selected") - self.logger.warn("No input columns were selected") - - return CallResult(None) - - - - def produce(self, *, inputs: Inputs, timeout: float = None, iterations: int = None) -> CallResult[Outputs]: - sk_inputs, columns_to_use = self._get_columns_to_fit(inputs, self.hyperparams) - output = [] - if len(sk_inputs.columns): - try: - sk_output = self._clf.predict(sk_inputs) - except sklearn.exceptions.NotFittedError as error: - raise PrimitiveNotFittedError("Primitive not fitted.") from error - # For primitives that allow predicting without fitting like GaussianProcessRegressor - if not self._fitted: - raise PrimitiveNotFittedError("Primitive not fitted.") - if sparse.issparse(sk_output): - sk_output = sk_output.toarray() - output = self._wrap_predictions(inputs, sk_output) - output.columns = self._target_names - output = [output] - else: - if self.hyperparams['error_on_no_input']: - raise RuntimeError("No input columns were selected") - self.logger.warn("No input columns were selected") - outputs = base_utils.combine_columns(return_result=self.hyperparams['return_result'], - add_index_columns=self.hyperparams['add_index_columns'], - inputs=inputs, column_indices=self._target_column_indices, - columns_list=output) - - return CallResult(outputs) - - - def get_params(self) -> Params: - if not self._fitted: - return Params( - loss_=None, - coefs_=None, - intercepts_=None, - n_iter_=None, - n_layers_=None, - n_outputs_=None, - out_activation_=None, - _best_coefs=None, - _best_intercepts=None, - _no_improvement_count=None, - _random_state=None, - best_validation_score_=None, - loss_curve_=None, - t_=None, - _optimizer=None, - validation_scores_=None, - input_column_names=self._input_column_names, - training_indices_=self._training_indices, - target_names_=self._target_names, - target_column_indices_=self._target_column_indices, - target_columns_metadata_=self._target_columns_metadata - ) - - return Params( - loss_=getattr(self._clf, 'loss_', None), - coefs_=getattr(self._clf, 'coefs_', None), - intercepts_=getattr(self._clf, 'intercepts_', None), - n_iter_=getattr(self._clf, 'n_iter_', None), - n_layers_=getattr(self._clf, 'n_layers_', None), - n_outputs_=getattr(self._clf, 'n_outputs_', None), - out_activation_=getattr(self._clf, 'out_activation_', None), - _best_coefs=getattr(self._clf, '_best_coefs', None), - _best_intercepts=getattr(self._clf, '_best_intercepts', None), - _no_improvement_count=getattr(self._clf, '_no_improvement_count', None), - _random_state=getattr(self._clf, '_random_state', None), - best_validation_score_=getattr(self._clf, 'best_validation_score_', None), - loss_curve_=getattr(self._clf, 'loss_curve_', None), - t_=getattr(self._clf, 't_', None), - _optimizer=getattr(self._clf, '_optimizer', None), - validation_scores_=getattr(self._clf, 'validation_scores_', None), - input_column_names=self._input_column_names, - training_indices_=self._training_indices, - target_names_=self._target_names, - target_column_indices_=self._target_column_indices, - target_columns_metadata_=self._target_columns_metadata - ) - - def set_params(self, *, params: Params) -> None: - self._clf.loss_ = params['loss_'] - self._clf.coefs_ = params['coefs_'] - self._clf.intercepts_ = params['intercepts_'] - self._clf.n_iter_ = params['n_iter_'] - self._clf.n_layers_ = params['n_layers_'] - self._clf.n_outputs_ = params['n_outputs_'] - self._clf.out_activation_ = params['out_activation_'] - self._clf._best_coefs = params['_best_coefs'] - self._clf._best_intercepts = params['_best_intercepts'] - self._clf._no_improvement_count = params['_no_improvement_count'] - self._clf._random_state = params['_random_state'] - self._clf.best_validation_score_ = params['best_validation_score_'] - self._clf.loss_curve_ = params['loss_curve_'] - self._clf.t_ = params['t_'] - self._clf._optimizer = params['_optimizer'] - self._clf.validation_scores_ = params['validation_scores_'] - self._input_column_names = params['input_column_names'] - self._training_indices = params['training_indices_'] - self._target_names = params['target_names_'] - self._target_column_indices = params['target_column_indices_'] - self._target_columns_metadata = params['target_columns_metadata_'] - - if params['loss_'] is not None: - self._fitted = True - if params['coefs_'] is not None: - self._fitted = True - if params['intercepts_'] is not None: - self._fitted = True - if params['n_iter_'] is not None: - self._fitted = True - if params['n_layers_'] is not None: - self._fitted = True - if params['n_outputs_'] is not None: - self._fitted = True - if params['out_activation_'] is not None: - self._fitted = True - if params['_best_coefs'] is not None: - self._fitted = True - if params['_best_intercepts'] is not None: - self._fitted = True - if params['_no_improvement_count'] is not None: - self._fitted = True - if params['_random_state'] is not None: - self._fitted = True - if params['best_validation_score_'] is not None: - self._fitted = True - if params['loss_curve_'] is not None: - self._fitted = True - if params['t_'] is not None: - self._fitted = True - if params['_optimizer'] is not None: - self._fitted = True - if params['validation_scores_'] is not None: - self._fitted = True - - - - - - - - @classmethod - def _get_columns_to_fit(cls, inputs: Inputs, hyperparams: Hyperparams): - if not hyperparams['use_semantic_types']: - return inputs, list(range(len(inputs.columns))) - - inputs_metadata = inputs.metadata - - def can_produce_column(column_index: int) -> bool: - return cls._can_produce_column(inputs_metadata, column_index, hyperparams) - - columns_to_produce, columns_not_to_produce = base_utils.get_columns_to_use(inputs_metadata, - use_columns=hyperparams['use_inputs_columns'], - exclude_columns=hyperparams['exclude_inputs_columns'], - can_use_column=can_produce_column) - return inputs.iloc[:, columns_to_produce], columns_to_produce - # return columns_to_produce - - @classmethod - def _can_produce_column(cls, inputs_metadata: metadata_base.DataMetadata, column_index: int, hyperparams: Hyperparams) -> bool: - column_metadata = inputs_metadata.query((metadata_base.ALL_ELEMENTS, column_index)) - - accepted_structural_types = (int, float, numpy.integer, numpy.float64) - accepted_semantic_types = set() - accepted_semantic_types.add("https://metadata.datadrivendiscovery.org/types/Attribute") - if not issubclass(column_metadata['structural_type'], accepted_structural_types): - return False - - semantic_types = set(column_metadata.get('semantic_types', [])) - - if len(semantic_types) == 0: - cls.logger.warning("No semantic types found in column metadata") - return False - # Making sure all accepted_semantic_types are available in semantic_types - if len(accepted_semantic_types - semantic_types) == 0: - return True - - return False - - @classmethod - def _get_targets(cls, data: d3m_dataframe, hyperparams: Hyperparams): - if not hyperparams['use_semantic_types']: - return data, list(data.columns), list(range(len(data.columns))) - - metadata = data.metadata - - def can_produce_column(column_index: int) -> bool: - accepted_semantic_types = set() - accepted_semantic_types.add("https://metadata.datadrivendiscovery.org/types/TrueTarget") - column_metadata = metadata.query((metadata_base.ALL_ELEMENTS, column_index)) - semantic_types = set(column_metadata.get('semantic_types', [])) - if len(semantic_types) == 0: - cls.logger.warning("No semantic types found in column metadata") - return False - # Making sure all accepted_semantic_types are available in semantic_types - if len(accepted_semantic_types - semantic_types) == 0: - return True - return False - - target_column_indices, target_columns_not_to_produce = base_utils.get_columns_to_use(metadata, - use_columns=hyperparams[ - 'use_outputs_columns'], - exclude_columns= - hyperparams[ - 'exclude_outputs_columns'], - can_use_column=can_produce_column) - targets = [] - if target_column_indices: - targets = data.select_columns(target_column_indices) - target_column_names = [] - for idx in target_column_indices: - target_column_names.append(data.columns[idx]) - return targets, target_column_names, target_column_indices - - @classmethod - def _get_target_columns_metadata(cls, outputs_metadata: metadata_base.DataMetadata, hyperparams) -> List[OrderedDict]: - outputs_length = outputs_metadata.query((metadata_base.ALL_ELEMENTS,))['dimension']['length'] - - target_columns_metadata: List[OrderedDict] = [] - for column_index in range(outputs_length): - column_metadata = OrderedDict(outputs_metadata.query_column(column_index)) - - # Update semantic types and prepare it for predicted targets. - semantic_types = set(column_metadata.get('semantic_types', [])) - semantic_types_to_remove = set(["https://metadata.datadrivendiscovery.org/types/TrueTarget","https://metadata.datadrivendiscovery.org/types/SuggestedTarget",]) - add_semantic_types = set(["https://metadata.datadrivendiscovery.org/types/PredictedTarget",]) - add_semantic_types.add(hyperparams["return_semantic_type"]) - semantic_types = semantic_types - semantic_types_to_remove - semantic_types = semantic_types.union(add_semantic_types) - column_metadata['semantic_types'] = list(semantic_types) - - target_columns_metadata.append(column_metadata) - - return target_columns_metadata - - @classmethod - def _update_predictions_metadata(cls, inputs_metadata: metadata_base.DataMetadata, outputs: Optional[Outputs], - target_columns_metadata: List[OrderedDict]) -> metadata_base.DataMetadata: - outputs_metadata = metadata_base.DataMetadata().generate(value=outputs) - - for column_index, column_metadata in enumerate(target_columns_metadata): - column_metadata.pop("structural_type", None) - outputs_metadata = outputs_metadata.update_column(column_index, column_metadata) - - return outputs_metadata - - def _wrap_predictions(self, inputs: Inputs, predictions: ndarray) -> Outputs: - outputs = d3m_dataframe(predictions, generate_metadata=False) - outputs.metadata = self._update_predictions_metadata(inputs.metadata, outputs, self._target_columns_metadata) - return outputs - - - @classmethod - def _add_target_columns_metadata(cls, outputs_metadata: metadata_base.DataMetadata): - outputs_length = outputs_metadata.query((metadata_base.ALL_ELEMENTS,))['dimension']['length'] - - target_columns_metadata: List[OrderedDict] = [] - for column_index in range(outputs_length): - column_metadata = OrderedDict() - semantic_types = [] - semantic_types.append('https://metadata.datadrivendiscovery.org/types/PredictedTarget') - column_name = outputs_metadata.query((metadata_base.ALL_ELEMENTS, column_index)).get("name") - if column_name is None: - column_name = "output_{}".format(column_index) - column_metadata["semantic_types"] = semantic_types - column_metadata["name"] = str(column_name) - target_columns_metadata.append(column_metadata) - - return target_columns_metadata - - -SKMLPRegressor.__doc__ = MLPRegressor.__doc__ \ No newline at end of file diff --git a/common-primitives/sklearn-wrap/sklearn_wrap/SKMaxAbsScaler.py b/common-primitives/sklearn-wrap/sklearn_wrap/SKMaxAbsScaler.py deleted file mode 100644 index 50eaf4d..0000000 --- a/common-primitives/sklearn-wrap/sklearn_wrap/SKMaxAbsScaler.py +++ /dev/null @@ -1,339 +0,0 @@ -from typing import Any, Callable, List, Dict, Union, Optional, Sequence, Tuple -from numpy import ndarray -from collections import OrderedDict -from scipy import sparse -import os -import sklearn -import numpy -import typing - -# Custom import commands if any -from sklearn.preprocessing.data import MaxAbsScaler - - -from d3m.container.numpy import ndarray as d3m_ndarray -from d3m.container import DataFrame as d3m_dataframe -from d3m.metadata import hyperparams, params, base as metadata_base -from d3m import utils -from d3m.base import utils as base_utils -from d3m.exceptions import PrimitiveNotFittedError -from d3m.primitive_interfaces.base import CallResult, DockerContainer -from d3m.primitive_interfaces.unsupervised_learning import UnsupervisedLearnerPrimitiveBase - - -Inputs = d3m_dataframe -Outputs = d3m_dataframe - - -class Params(params.Params): - scale_: Optional[ndarray] - max_abs_: Optional[ndarray] - n_samples_seen_: Optional[int] - input_column_names: Optional[Any] - target_names_: Optional[Sequence[Any]] - training_indices_: Optional[Sequence[int]] - target_column_indices_: Optional[Sequence[int]] - target_columns_metadata_: Optional[List[OrderedDict]] - - - -class Hyperparams(hyperparams.Hyperparams): - - use_columns = hyperparams.Set( - elements=hyperparams.Hyperparameter[int](-1), - default=(), - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="A set of column indices to force primitive to operate on. If any specified column cannot be parsed, it is skipped.", - ) - exclude_columns = hyperparams.Set( - elements=hyperparams.Hyperparameter[int](-1), - default=(), - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="A set of column indices to not operate on. Applicable only if \"use_columns\" is not provided.", - ) - return_result = hyperparams.Enumeration( - values=['append', 'replace', 'new'], - default='new', - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="Should parsed columns be appended, should they replace original columns, or should only parsed columns be returned? This hyperparam is ignored if use_semantic_types is set to false.", - ) - use_semantic_types = hyperparams.UniformBool( - default=False, - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="Controls whether semantic_types metadata will be used for filtering columns in input dataframe. Setting this to false makes the code ignore return_result and will produce only the output dataframe" - ) - add_index_columns = hyperparams.UniformBool( - default=False, - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="Also include primary index columns if input data has them. Applicable only if \"return_result\" is set to \"new\".", - ) - error_on_no_input = hyperparams.UniformBool( - default=True, - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="Throw an exception if no input column is selected/provided. Defaults to true to behave like sklearn. To prevent pipelines from breaking set this to False.", - ) - - return_semantic_type = hyperparams.Enumeration[str]( - values=['https://metadata.datadrivendiscovery.org/types/Attribute', 'https://metadata.datadrivendiscovery.org/types/ConstructedAttribute'], - default='https://metadata.datadrivendiscovery.org/types/Attribute', - description='Decides what semantic type to attach to generated attributes', - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'] - ) - -class SKMaxAbsScaler(UnsupervisedLearnerPrimitiveBase[Inputs, Outputs, Params, Hyperparams]): - """ - Primitive wrapping for sklearn MaxAbsScaler - `sklearn documentation `_ - - """ - - __author__ = "JPL MARVIN" - metadata = metadata_base.PrimitiveMetadata({ - "algorithm_types": [metadata_base.PrimitiveAlgorithmType.FEATURE_SCALING, ], - "name": "sklearn.preprocessing.data.MaxAbsScaler", - "primitive_family": metadata_base.PrimitiveFamily.DATA_PREPROCESSING, - "python_path": "d3m.primitives.data_preprocessing.max_abs_scaler.SKlearn", - "source": {'name': 'JPL', 'contact': 'mailto:shah@jpl.nasa.gov', 'uris': ['https://gitlab.com/datadrivendiscovery/sklearn-wrap/issues', 'https://scikit-learn.org/stable/modules/generated/sklearn.preprocessing.MaxAbsScaler.html']}, - "version": "2019.11.13", - "id": "64d2ef5d-b221-3033-8342-76d0293fa99c", - 'installation': [ - {'type': metadata_base.PrimitiveInstallationType.PIP, - 'package_uri': 'git+https://gitlab.com/datadrivendiscovery/sklearn-wrap.git@{git_commit}#egg=sklearn_wrap'.format( - git_commit=utils.current_git_commit(os.path.dirname(__file__)), - ), - }] - }) - - def __init__(self, *, - hyperparams: Hyperparams, - random_seed: int = 0, - docker_containers: Dict[str, DockerContainer] = None) -> None: - - super().__init__(hyperparams=hyperparams, random_seed=random_seed, docker_containers=docker_containers) - - # False - self._clf = MaxAbsScaler( - ) - - self._inputs = None - self._outputs = None - self._training_inputs = None - self._training_outputs = None - self._target_names = None - self._training_indices = None - self._target_column_indices = None - self._target_columns_metadata: List[OrderedDict] = None - self._input_column_names = None - self._fitted = False - - - def set_training_data(self, *, inputs: Inputs) -> None: - self._inputs = inputs - self._fitted = False - - def fit(self, *, timeout: float = None, iterations: int = None) -> CallResult[None]: - if self._fitted: - return CallResult(None) - - self._training_inputs, self._training_indices = self._get_columns_to_fit(self._inputs, self.hyperparams) - self._input_column_names = self._training_inputs.columns - - if self._training_inputs is None: - return CallResult(None) - - if len(self._training_indices) > 0: - self._clf.fit(self._training_inputs) - self._fitted = True - else: - if self.hyperparams['error_on_no_input']: - raise RuntimeError("No input columns were selected") - self.logger.warn("No input columns were selected") - return CallResult(None) - - def produce(self, *, inputs: Inputs, timeout: float = None, iterations: int = None) -> CallResult[Outputs]: - if not self._fitted: - raise PrimitiveNotFittedError("Primitive not fitted.") - sk_inputs = inputs - if self.hyperparams['use_semantic_types']: - sk_inputs = inputs.iloc[:, self._training_indices] - output_columns = [] - if len(self._training_indices) > 0: - sk_output = self._clf.transform(sk_inputs) - if sparse.issparse(sk_output): - sk_output = sk_output.toarray() - outputs = self._wrap_predictions(inputs, sk_output) - if len(outputs.columns) == len(self._input_column_names): - outputs.columns = self._input_column_names - output_columns = [outputs] - else: - if self.hyperparams['error_on_no_input']: - raise RuntimeError("No input columns were selected") - self.logger.warn("No input columns were selected") - outputs = base_utils.combine_columns(return_result=self.hyperparams['return_result'], - add_index_columns=self.hyperparams['add_index_columns'], - inputs=inputs, column_indices=self._training_indices, - columns_list=output_columns) - return CallResult(outputs) - - - def get_params(self) -> Params: - if not self._fitted: - return Params( - scale_=None, - max_abs_=None, - n_samples_seen_=None, - input_column_names=self._input_column_names, - training_indices_=self._training_indices, - target_names_=self._target_names, - target_column_indices_=self._target_column_indices, - target_columns_metadata_=self._target_columns_metadata - ) - - return Params( - scale_=getattr(self._clf, 'scale_', None), - max_abs_=getattr(self._clf, 'max_abs_', None), - n_samples_seen_=getattr(self._clf, 'n_samples_seen_', None), - input_column_names=self._input_column_names, - training_indices_=self._training_indices, - target_names_=self._target_names, - target_column_indices_=self._target_column_indices, - target_columns_metadata_=self._target_columns_metadata - ) - - def set_params(self, *, params: Params) -> None: - self._clf.scale_ = params['scale_'] - self._clf.max_abs_ = params['max_abs_'] - self._clf.n_samples_seen_ = params['n_samples_seen_'] - self._input_column_names = params['input_column_names'] - self._training_indices = params['training_indices_'] - self._target_names = params['target_names_'] - self._target_column_indices = params['target_column_indices_'] - self._target_columns_metadata = params['target_columns_metadata_'] - - if params['scale_'] is not None: - self._fitted = True - if params['max_abs_'] is not None: - self._fitted = True - if params['n_samples_seen_'] is not None: - self._fitted = True - - - - - - @classmethod - def _get_columns_to_fit(cls, inputs: Inputs, hyperparams: Hyperparams): - if not hyperparams['use_semantic_types']: - return inputs, list(range(len(inputs.columns))) - - inputs_metadata = inputs.metadata - - def can_produce_column(column_index: int) -> bool: - return cls._can_produce_column(inputs_metadata, column_index, hyperparams) - - columns_to_produce, columns_not_to_produce = base_utils.get_columns_to_use(inputs_metadata, - use_columns=hyperparams['use_columns'], - exclude_columns=hyperparams['exclude_columns'], - can_use_column=can_produce_column) - return inputs.iloc[:, columns_to_produce], columns_to_produce - # return columns_to_produce - - @classmethod - def _can_produce_column(cls, inputs_metadata: metadata_base.DataMetadata, column_index: int, hyperparams: Hyperparams) -> bool: - column_metadata = inputs_metadata.query((metadata_base.ALL_ELEMENTS, column_index)) - - accepted_structural_types = (int, float, numpy.integer, numpy.float64) - accepted_semantic_types = set() - accepted_semantic_types.add("https://metadata.datadrivendiscovery.org/types/Attribute") - if not issubclass(column_metadata['structural_type'], accepted_structural_types): - return False - - semantic_types = set(column_metadata.get('semantic_types', [])) - - if len(semantic_types) == 0: - cls.logger.warning("No semantic types found in column metadata") - return False - - # Making sure all accepted_semantic_types are available in semantic_types - if len(accepted_semantic_types - semantic_types) == 0: - return True - - return False - - - @classmethod - def _get_target_columns_metadata(cls, outputs_metadata: metadata_base.DataMetadata, hyperparams) -> List[OrderedDict]: - outputs_length = outputs_metadata.query((metadata_base.ALL_ELEMENTS,))['dimension']['length'] - - target_columns_metadata: List[OrderedDict] = [] - for column_index in range(outputs_length): - column_metadata = OrderedDict(outputs_metadata.query_column(column_index)) - - # Update semantic types and prepare it for predicted targets. - semantic_types = set(column_metadata.get('semantic_types', [])) - semantic_types_to_remove = set([]) - add_semantic_types = [] - add_semantic_types.add(hyperparams["return_semantic_type"]) - semantic_types = semantic_types - semantic_types_to_remove - semantic_types = semantic_types.union(add_semantic_types) - column_metadata['semantic_types'] = list(semantic_types) - - target_columns_metadata.append(column_metadata) - - return target_columns_metadata - - @classmethod - def _update_predictions_metadata(cls, inputs_metadata: metadata_base.DataMetadata, outputs: Optional[Outputs], - target_columns_metadata: List[OrderedDict]) -> metadata_base.DataMetadata: - outputs_metadata = metadata_base.DataMetadata().generate(value=outputs) - - for column_index, column_metadata in enumerate(target_columns_metadata): - column_metadata.pop("structural_type", None) - outputs_metadata = outputs_metadata.update_column(column_index, column_metadata) - - return outputs_metadata - - def _wrap_predictions(self, inputs: Inputs, predictions: ndarray) -> Outputs: - outputs = d3m_dataframe(predictions, generate_metadata=True) - target_columns_metadata = self._copy_inputs_metadata(inputs.metadata, self._training_indices, outputs.metadata, self.hyperparams) - outputs.metadata = self._update_predictions_metadata(inputs.metadata, outputs, target_columns_metadata) - return outputs - - - @classmethod - def _copy_inputs_metadata(cls, inputs_metadata: metadata_base.DataMetadata, input_indices: List[int], - outputs_metadata: metadata_base.DataMetadata, hyperparams): - outputs_length = outputs_metadata.query((metadata_base.ALL_ELEMENTS,))['dimension']['length'] - target_columns_metadata: List[OrderedDict] = [] - for column_index in input_indices: - column_name = inputs_metadata.query((metadata_base.ALL_ELEMENTS, column_index)).get("name") - if column_name is None: - column_name = "output_{}".format(column_index) - - column_metadata = OrderedDict(inputs_metadata.query_column(column_index)) - semantic_types = set(column_metadata.get('semantic_types', [])) - semantic_types_to_remove = set([]) - add_semantic_types = set() - add_semantic_types.add(hyperparams["return_semantic_type"]) - semantic_types = semantic_types - semantic_types_to_remove - semantic_types = semantic_types.union(add_semantic_types) - column_metadata['semantic_types'] = list(semantic_types) - - column_metadata["name"] = str(column_name) - target_columns_metadata.append(column_metadata) - - # If outputs has more columns than index, add Attribute Type to all remaining - if outputs_length > len(input_indices): - for column_index in range(len(input_indices), outputs_length): - column_metadata = OrderedDict() - semantic_types = set() - semantic_types.add(hyperparams["return_semantic_type"]) - column_name = "output_{}".format(column_index) - column_metadata["semantic_types"] = list(semantic_types) - column_metadata["name"] = str(column_name) - target_columns_metadata.append(column_metadata) - - return target_columns_metadata - - -SKMaxAbsScaler.__doc__ = MaxAbsScaler.__doc__ \ No newline at end of file diff --git a/common-primitives/sklearn-wrap/sklearn_wrap/SKMinMaxScaler.py b/common-primitives/sklearn-wrap/sklearn_wrap/SKMinMaxScaler.py deleted file mode 100644 index dc8fc78..0000000 --- a/common-primitives/sklearn-wrap/sklearn_wrap/SKMinMaxScaler.py +++ /dev/null @@ -1,366 +0,0 @@ -from typing import Any, Callable, List, Dict, Union, Optional, Sequence, Tuple -from numpy import ndarray -from collections import OrderedDict -from scipy import sparse -import os -import sklearn -import numpy -import typing - -# Custom import commands if any -from sklearn.preprocessing.data import MinMaxScaler - - -from d3m.container.numpy import ndarray as d3m_ndarray -from d3m.container import DataFrame as d3m_dataframe -from d3m.metadata import hyperparams, params, base as metadata_base -from d3m import utils -from d3m.base import utils as base_utils -from d3m.exceptions import PrimitiveNotFittedError -from d3m.primitive_interfaces.base import CallResult, DockerContainer -from d3m.primitive_interfaces.unsupervised_learning import UnsupervisedLearnerPrimitiveBase - - -Inputs = d3m_dataframe -Outputs = d3m_dataframe - - -class Params(params.Params): - min_: Optional[ndarray] - scale_: Optional[ndarray] - data_min_: Optional[ndarray] - data_max_: Optional[ndarray] - data_range_: Optional[ndarray] - n_samples_seen_: Optional[int] - input_column_names: Optional[Any] - target_names_: Optional[Sequence[Any]] - training_indices_: Optional[Sequence[int]] - target_column_indices_: Optional[Sequence[int]] - target_columns_metadata_: Optional[List[OrderedDict]] - - - -class Hyperparams(hyperparams.Hyperparams): - feature_range = hyperparams.SortedSet( - elements=hyperparams.Hyperparameter[int](0), - default=(0, 1), - min_size=2, - max_size=2, - description='Desired range of transformed data.', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] - ) - - use_columns = hyperparams.Set( - elements=hyperparams.Hyperparameter[int](-1), - default=(), - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="A set of column indices to force primitive to operate on. If any specified column cannot be parsed, it is skipped.", - ) - exclude_columns = hyperparams.Set( - elements=hyperparams.Hyperparameter[int](-1), - default=(), - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="A set of column indices to not operate on. Applicable only if \"use_columns\" is not provided.", - ) - return_result = hyperparams.Enumeration( - values=['append', 'replace', 'new'], - default='new', - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="Should parsed columns be appended, should they replace original columns, or should only parsed columns be returned? This hyperparam is ignored if use_semantic_types is set to false.", - ) - use_semantic_types = hyperparams.UniformBool( - default=False, - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="Controls whether semantic_types metadata will be used for filtering columns in input dataframe. Setting this to false makes the code ignore return_result and will produce only the output dataframe" - ) - add_index_columns = hyperparams.UniformBool( - default=False, - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="Also include primary index columns if input data has them. Applicable only if \"return_result\" is set to \"new\".", - ) - error_on_no_input = hyperparams.UniformBool( - default=True, - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="Throw an exception if no input column is selected/provided. Defaults to true to behave like sklearn. To prevent pipelines from breaking set this to False.", - ) - - return_semantic_type = hyperparams.Enumeration[str]( - values=['https://metadata.datadrivendiscovery.org/types/Attribute', 'https://metadata.datadrivendiscovery.org/types/ConstructedAttribute'], - default='https://metadata.datadrivendiscovery.org/types/Attribute', - description='Decides what semantic type to attach to generated attributes', - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'] - ) - -class SKMinMaxScaler(UnsupervisedLearnerPrimitiveBase[Inputs, Outputs, Params, Hyperparams]): - """ - Primitive wrapping for sklearn MinMaxScaler - `sklearn documentation `_ - - """ - - __author__ = "JPL MARVIN" - metadata = metadata_base.PrimitiveMetadata({ - "algorithm_types": [metadata_base.PrimitiveAlgorithmType.FEATURE_SCALING, ], - "name": "sklearn.preprocessing.data.MinMaxScaler", - "primitive_family": metadata_base.PrimitiveFamily.DATA_PREPROCESSING, - "python_path": "d3m.primitives.data_preprocessing.min_max_scaler.SKlearn", - "source": {'name': 'JPL', 'contact': 'mailto:shah@jpl.nasa.gov', 'uris': ['https://gitlab.com/datadrivendiscovery/sklearn-wrap/issues', 'https://scikit-learn.org/stable/modules/generated/sklearn.preprocessing.MinMaxScaler.html']}, - "version": "2019.11.13", - "id": "08d0579d-38da-307b-8b75-6a213ef2972e", - 'installation': [ - {'type': metadata_base.PrimitiveInstallationType.PIP, - 'package_uri': 'git+https://gitlab.com/datadrivendiscovery/sklearn-wrap.git@{git_commit}#egg=sklearn_wrap'.format( - git_commit=utils.current_git_commit(os.path.dirname(__file__)), - ), - }] - }) - - def __init__(self, *, - hyperparams: Hyperparams, - random_seed: int = 0, - docker_containers: Dict[str, DockerContainer] = None) -> None: - - super().__init__(hyperparams=hyperparams, random_seed=random_seed, docker_containers=docker_containers) - - # False - self._clf = MinMaxScaler( - feature_range=self.hyperparams['feature_range'], - ) - - self._inputs = None - self._outputs = None - self._training_inputs = None - self._training_outputs = None - self._target_names = None - self._training_indices = None - self._target_column_indices = None - self._target_columns_metadata: List[OrderedDict] = None - self._input_column_names = None - self._fitted = False - - - def set_training_data(self, *, inputs: Inputs) -> None: - self._inputs = inputs - self._fitted = False - - def fit(self, *, timeout: float = None, iterations: int = None) -> CallResult[None]: - if self._fitted: - return CallResult(None) - - self._training_inputs, self._training_indices = self._get_columns_to_fit(self._inputs, self.hyperparams) - self._input_column_names = self._training_inputs.columns - - if self._training_inputs is None: - return CallResult(None) - - if len(self._training_indices) > 0: - self._clf.fit(self._training_inputs) - self._fitted = True - else: - if self.hyperparams['error_on_no_input']: - raise RuntimeError("No input columns were selected") - self.logger.warn("No input columns were selected") - return CallResult(None) - - def produce(self, *, inputs: Inputs, timeout: float = None, iterations: int = None) -> CallResult[Outputs]: - if not self._fitted: - raise PrimitiveNotFittedError("Primitive not fitted.") - sk_inputs = inputs - if self.hyperparams['use_semantic_types']: - sk_inputs = inputs.iloc[:, self._training_indices] - output_columns = [] - if len(self._training_indices) > 0: - sk_output = self._clf.transform(sk_inputs) - if sparse.issparse(sk_output): - sk_output = sk_output.toarray() - outputs = self._wrap_predictions(inputs, sk_output) - if len(outputs.columns) == len(self._input_column_names): - outputs.columns = self._input_column_names - output_columns = [outputs] - else: - if self.hyperparams['error_on_no_input']: - raise RuntimeError("No input columns were selected") - self.logger.warn("No input columns were selected") - outputs = base_utils.combine_columns(return_result=self.hyperparams['return_result'], - add_index_columns=self.hyperparams['add_index_columns'], - inputs=inputs, column_indices=self._training_indices, - columns_list=output_columns) - return CallResult(outputs) - - - def get_params(self) -> Params: - if not self._fitted: - return Params( - min_=None, - scale_=None, - data_min_=None, - data_max_=None, - data_range_=None, - n_samples_seen_=None, - input_column_names=self._input_column_names, - training_indices_=self._training_indices, - target_names_=self._target_names, - target_column_indices_=self._target_column_indices, - target_columns_metadata_=self._target_columns_metadata - ) - - return Params( - min_=getattr(self._clf, 'min_', None), - scale_=getattr(self._clf, 'scale_', None), - data_min_=getattr(self._clf, 'data_min_', None), - data_max_=getattr(self._clf, 'data_max_', None), - data_range_=getattr(self._clf, 'data_range_', None), - n_samples_seen_=getattr(self._clf, 'n_samples_seen_', None), - input_column_names=self._input_column_names, - training_indices_=self._training_indices, - target_names_=self._target_names, - target_column_indices_=self._target_column_indices, - target_columns_metadata_=self._target_columns_metadata - ) - - def set_params(self, *, params: Params) -> None: - self._clf.min_ = params['min_'] - self._clf.scale_ = params['scale_'] - self._clf.data_min_ = params['data_min_'] - self._clf.data_max_ = params['data_max_'] - self._clf.data_range_ = params['data_range_'] - self._clf.n_samples_seen_ = params['n_samples_seen_'] - self._input_column_names = params['input_column_names'] - self._training_indices = params['training_indices_'] - self._target_names = params['target_names_'] - self._target_column_indices = params['target_column_indices_'] - self._target_columns_metadata = params['target_columns_metadata_'] - - if params['min_'] is not None: - self._fitted = True - if params['scale_'] is not None: - self._fitted = True - if params['data_min_'] is not None: - self._fitted = True - if params['data_max_'] is not None: - self._fitted = True - if params['data_range_'] is not None: - self._fitted = True - if params['n_samples_seen_'] is not None: - self._fitted = True - - - - - - @classmethod - def _get_columns_to_fit(cls, inputs: Inputs, hyperparams: Hyperparams): - if not hyperparams['use_semantic_types']: - return inputs, list(range(len(inputs.columns))) - - inputs_metadata = inputs.metadata - - def can_produce_column(column_index: int) -> bool: - return cls._can_produce_column(inputs_metadata, column_index, hyperparams) - - columns_to_produce, columns_not_to_produce = base_utils.get_columns_to_use(inputs_metadata, - use_columns=hyperparams['use_columns'], - exclude_columns=hyperparams['exclude_columns'], - can_use_column=can_produce_column) - return inputs.iloc[:, columns_to_produce], columns_to_produce - # return columns_to_produce - - @classmethod - def _can_produce_column(cls, inputs_metadata: metadata_base.DataMetadata, column_index: int, hyperparams: Hyperparams) -> bool: - column_metadata = inputs_metadata.query((metadata_base.ALL_ELEMENTS, column_index)) - - accepted_structural_types = (int, float, numpy.integer, numpy.float64) - accepted_semantic_types = set() - accepted_semantic_types.add("https://metadata.datadrivendiscovery.org/types/Attribute") - if not issubclass(column_metadata['structural_type'], accepted_structural_types): - return False - - semantic_types = set(column_metadata.get('semantic_types', [])) - - if len(semantic_types) == 0: - cls.logger.warning("No semantic types found in column metadata") - return False - - # Making sure all accepted_semantic_types are available in semantic_types - if len(accepted_semantic_types - semantic_types) == 0: - return True - - return False - - - @classmethod - def _get_target_columns_metadata(cls, outputs_metadata: metadata_base.DataMetadata, hyperparams) -> List[OrderedDict]: - outputs_length = outputs_metadata.query((metadata_base.ALL_ELEMENTS,))['dimension']['length'] - - target_columns_metadata: List[OrderedDict] = [] - for column_index in range(outputs_length): - column_metadata = OrderedDict(outputs_metadata.query_column(column_index)) - - # Update semantic types and prepare it for predicted targets. - semantic_types = set(column_metadata.get('semantic_types', [])) - semantic_types_to_remove = set([]) - add_semantic_types = [] - add_semantic_types.add(hyperparams["return_semantic_type"]) - semantic_types = semantic_types - semantic_types_to_remove - semantic_types = semantic_types.union(add_semantic_types) - column_metadata['semantic_types'] = list(semantic_types) - - target_columns_metadata.append(column_metadata) - - return target_columns_metadata - - @classmethod - def _update_predictions_metadata(cls, inputs_metadata: metadata_base.DataMetadata, outputs: Optional[Outputs], - target_columns_metadata: List[OrderedDict]) -> metadata_base.DataMetadata: - outputs_metadata = metadata_base.DataMetadata().generate(value=outputs) - - for column_index, column_metadata in enumerate(target_columns_metadata): - column_metadata.pop("structural_type", None) - outputs_metadata = outputs_metadata.update_column(column_index, column_metadata) - - return outputs_metadata - - def _wrap_predictions(self, inputs: Inputs, predictions: ndarray) -> Outputs: - outputs = d3m_dataframe(predictions, generate_metadata=True) - target_columns_metadata = self._copy_inputs_metadata(inputs.metadata, self._training_indices, outputs.metadata, self.hyperparams) - outputs.metadata = self._update_predictions_metadata(inputs.metadata, outputs, target_columns_metadata) - return outputs - - - @classmethod - def _copy_inputs_metadata(cls, inputs_metadata: metadata_base.DataMetadata, input_indices: List[int], - outputs_metadata: metadata_base.DataMetadata, hyperparams): - outputs_length = outputs_metadata.query((metadata_base.ALL_ELEMENTS,))['dimension']['length'] - target_columns_metadata: List[OrderedDict] = [] - for column_index in input_indices: - column_name = inputs_metadata.query((metadata_base.ALL_ELEMENTS, column_index)).get("name") - if column_name is None: - column_name = "output_{}".format(column_index) - - column_metadata = OrderedDict(inputs_metadata.query_column(column_index)) - semantic_types = set(column_metadata.get('semantic_types', [])) - semantic_types_to_remove = set([]) - add_semantic_types = set() - add_semantic_types.add(hyperparams["return_semantic_type"]) - semantic_types = semantic_types - semantic_types_to_remove - semantic_types = semantic_types.union(add_semantic_types) - column_metadata['semantic_types'] = list(semantic_types) - - column_metadata["name"] = str(column_name) - target_columns_metadata.append(column_metadata) - - # If outputs has more columns than index, add Attribute Type to all remaining - if outputs_length > len(input_indices): - for column_index in range(len(input_indices), outputs_length): - column_metadata = OrderedDict() - semantic_types = set() - semantic_types.add(hyperparams["return_semantic_type"]) - column_name = "output_{}".format(column_index) - column_metadata["semantic_types"] = list(semantic_types) - column_metadata["name"] = str(column_name) - target_columns_metadata.append(column_metadata) - - return target_columns_metadata - - -SKMinMaxScaler.__doc__ = MinMaxScaler.__doc__ \ No newline at end of file diff --git a/common-primitives/sklearn-wrap/sklearn_wrap/SKMissingIndicator.py b/common-primitives/sklearn-wrap/sklearn_wrap/SKMissingIndicator.py deleted file mode 100644 index 929389f..0000000 --- a/common-primitives/sklearn-wrap/sklearn_wrap/SKMissingIndicator.py +++ /dev/null @@ -1,373 +0,0 @@ -from typing import Any, Callable, List, Dict, Union, Optional, Sequence, Tuple -from numpy import ndarray -from collections import OrderedDict -from scipy import sparse -import os -import sklearn -import numpy -import typing - -# Custom import commands if any -from sklearn.impute import MissingIndicator -from sklearn.impute._base import _get_mask - - -from d3m.container.numpy import ndarray as d3m_ndarray -from d3m.container import DataFrame as d3m_dataframe -from d3m.metadata import hyperparams, params, base as metadata_base -from d3m import utils -from d3m.base import utils as base_utils -from d3m.exceptions import PrimitiveNotFittedError -from d3m.primitive_interfaces.base import CallResult, DockerContainer -from d3m.primitive_interfaces.unsupervised_learning import UnsupervisedLearnerPrimitiveBase - - -Inputs = d3m_dataframe -Outputs = d3m_dataframe - - -class Params(params.Params): - features_: Optional[ndarray] - _n_features: Optional[int] - input_column_names: Optional[Any] - target_names_: Optional[Sequence[Any]] - training_indices_: Optional[Sequence[int]] - target_column_indices_: Optional[Sequence[int]] - target_columns_metadata_: Optional[List[OrderedDict]] - - - -class Hyperparams(hyperparams.Hyperparams): - missing_values = hyperparams.Union( - configuration=OrderedDict({ - 'int': hyperparams.Hyperparameter[int]( - default=0, - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'], - ), - 'np.nan': hyperparams.Hyperparameter[float]( - default=numpy.nan, - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'], - ) - }), - default='np.nan', - description='The placeholder for the missing values. All occurrences of `missing_values` will be indicated (True in the output array), the other values will be marked as False.', - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'] - ) - features = hyperparams.Enumeration[str]( - values=['missing-only', 'all'], - default='missing-only', - description='Whether the imputer mask should represent all or a subset of features. - If "missing-only" (default), the imputer mask will only represent features containing missing values during fit time. - If "all", the imputer mask will represent all features.', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] - ) - error_on_new = hyperparams.UniformBool( - default=True, - description='If True (default), transform will raise an error when there are features with missing values in transform that have no missing values in fit. This is applicable only when ``features="missing-only"``.', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] - ) - - use_columns = hyperparams.Set( - elements=hyperparams.Hyperparameter[int](-1), - default=(), - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="A set of column indices to force primitive to operate on. If any specified column cannot be parsed, it is skipped.", - ) - exclude_columns = hyperparams.Set( - elements=hyperparams.Hyperparameter[int](-1), - default=(), - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="A set of column indices to not operate on. Applicable only if \"use_columns\" is not provided.", - ) - return_result = hyperparams.Enumeration( - values=['append', 'replace', 'new'], - default='new', - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="Should parsed columns be appended, should they replace original columns, or should only parsed columns be returned? This hyperparam is ignored if use_semantic_types is set to false.", - ) - use_semantic_types = hyperparams.UniformBool( - default=False, - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="Controls whether semantic_types metadata will be used for filtering columns in input dataframe. Setting this to false makes the code ignore return_result and will produce only the output dataframe" - ) - add_index_columns = hyperparams.UniformBool( - default=False, - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="Also include primary index columns if input data has them. Applicable only if \"return_result\" is set to \"new\".", - ) - error_on_no_input = hyperparams.UniformBool( - default=True, - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="Throw an exception if no input column is selected/provided. Defaults to true to behave like sklearn. To prevent pipelines from breaking set this to False.", - ) - - return_semantic_type = hyperparams.Enumeration[str]( - values=['https://metadata.datadrivendiscovery.org/types/Attribute', 'https://metadata.datadrivendiscovery.org/types/ConstructedAttribute'], - default='https://metadata.datadrivendiscovery.org/types/Attribute', - description='Decides what semantic type to attach to generated attributes', - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'] - ) - -class SKMissingIndicator(UnsupervisedLearnerPrimitiveBase[Inputs, Outputs, Params, Hyperparams]): - """ - Primitive wrapping for sklearn MissingIndicator - `sklearn documentation `_ - - """ - - __author__ = "JPL MARVIN" - metadata = metadata_base.PrimitiveMetadata({ - "algorithm_types": [metadata_base.PrimitiveAlgorithmType.IMPUTATION, ], - "name": "sklearn.impute.MissingIndicator", - "primitive_family": metadata_base.PrimitiveFamily.DATA_CLEANING, - "python_path": "d3m.primitives.data_cleaning.missing_indicator.SKlearn", - "source": {'name': 'JPL', 'contact': 'mailto:shah@jpl.nasa.gov', 'uris': ['https://gitlab.com/datadrivendiscovery/sklearn-wrap/issues', 'https://scikit-learn.org/stable/modules/generated/sklearn.impute.MissingIndicator.html']}, - "version": "2019.11.13", - "id": "94c5c918-9ad5-3496-8e52-2359056e0120", - 'installation': [ - {'type': metadata_base.PrimitiveInstallationType.PIP, - 'package_uri': 'git+https://gitlab.com/datadrivendiscovery/sklearn-wrap.git@{git_commit}#egg=sklearn_wrap'.format( - git_commit=utils.current_git_commit(os.path.dirname(__file__)), - ), - }] - }) - - def __init__(self, *, - hyperparams: Hyperparams, - random_seed: int = 0, - docker_containers: Dict[str, DockerContainer] = None) -> None: - - super().__init__(hyperparams=hyperparams, random_seed=random_seed, docker_containers=docker_containers) - - # False - self._clf = MissingIndicator( - missing_values=self.hyperparams['missing_values'], - features=self.hyperparams['features'], - error_on_new=self.hyperparams['error_on_new'], - ) - - self._inputs = None - self._outputs = None - self._training_inputs = None - self._training_outputs = None - self._target_names = None - self._training_indices = None - self._target_column_indices = None - self._target_columns_metadata: List[OrderedDict] = None - self._input_column_names = None - self._fitted = False - - - def set_training_data(self, *, inputs: Inputs) -> None: - self._inputs = inputs - self._fitted = False - - def fit(self, *, timeout: float = None, iterations: int = None) -> CallResult[None]: - if self._fitted: - return CallResult(None) - - self._training_inputs, self._training_indices, _ = self._get_columns_to_fit(self._inputs, self.hyperparams) - self._input_column_names = self._training_inputs.columns - - if self._training_inputs is None: - return CallResult(None) - - if len(self._training_indices) > 0: - self._clf.fit(self._training_inputs) - self._fitted = True - else: - if self.hyperparams['error_on_no_input']: - raise RuntimeError("No input columns were selected") - self.logger.warn("No input columns were selected") - return CallResult(None) - - def produce(self, *, inputs: Inputs, timeout: float = None, iterations: int = None) -> CallResult[Outputs]: - sk_inputs, columns_to_use, _ = self._get_columns_to_fit(inputs, self.hyperparams) - output = [] - if len(sk_inputs.columns): - try: - sk_output = self._clf.transform(sk_inputs) - except sklearn.exceptions.NotFittedError as error: - raise PrimitiveNotFittedError("Primitive not fitted.") from error - if sparse.issparse(sk_output): - sk_output = sk_output.toarray() - target_columns_metadata = self._copy_columns_metadata(inputs.metadata, self._training_indices, self.hyperparams) - output = self._wrap_predictions(inputs, sk_output, target_columns_metadata) - - output.columns = [inputs.columns[idx] for idx in range(len(inputs.columns)) if idx in self._training_indices] - output = [output] - else: - if self.hyperparams['error_on_no_input']: - raise RuntimeError("No input columns were selected") - self.logger.warn("No input columns were selected") - _, _, dropped_cols = self._get_columns_to_fit(inputs, self.hyperparams) - outputs = base_utils.combine_columns(return_result=self.hyperparams['return_result'], - add_index_columns=self.hyperparams['add_index_columns'], - inputs=inputs, column_indices=self._training_indices + dropped_cols, - columns_list=output) - return CallResult(outputs) - - - def get_params(self) -> Params: - if not self._fitted: - return Params( - features_=None, - _n_features=None, - input_column_names=self._input_column_names, - training_indices_=self._training_indices, - target_names_=self._target_names, - target_column_indices_=self._target_column_indices, - target_columns_metadata_=self._target_columns_metadata - ) - - return Params( - features_=getattr(self._clf, 'features_', None), - _n_features=getattr(self._clf, '_n_features', None), - input_column_names=self._input_column_names, - training_indices_=self._training_indices, - target_names_=self._target_names, - target_column_indices_=self._target_column_indices, - target_columns_metadata_=self._target_columns_metadata - ) - - def set_params(self, *, params: Params) -> None: - self._clf.features_ = params['features_'] - self._clf._n_features = params['_n_features'] - self._input_column_names = params['input_column_names'] - self._training_indices = params['training_indices_'] - self._target_names = params['target_names_'] - self._target_column_indices = params['target_column_indices_'] - self._target_columns_metadata = params['target_columns_metadata_'] - - if params['features_'] is not None: - self._fitted = True - if params['_n_features'] is not None: - self._fitted = True - - - - - - @classmethod - def _get_columns_to_fit(cls, inputs: Inputs, hyperparams: Hyperparams): - - if not hyperparams['use_semantic_types']: - columns_to_produce = list(range(len(inputs.columns))) - - else: - inputs_metadata = inputs.metadata - - def can_produce_column(column_index: int) -> bool: - return cls._can_produce_column(inputs_metadata, column_index, hyperparams) - - columns_to_produce, columns_not_to_produce = base_utils.get_columns_to_use(inputs_metadata, - use_columns=hyperparams['use_columns'], - exclude_columns=hyperparams['exclude_columns'], - can_use_column=can_produce_column) - - columns_to_drop = cls._get_columns_to_drop(inputs, columns_to_produce, hyperparams) - for col in columns_to_drop: - columns_to_produce.remove(col) - - return inputs.iloc[:, columns_to_produce], columns_to_produce, columns_to_drop - - @classmethod - def _get_columns_to_drop(cls, inputs: Inputs, column_indices: List[int], hyperparams: Hyperparams): - """ - Check for columns that contain missing_values that need to be imputed - If strategy is constant and missin_values is nan, then all nan columns will not be dropped - :param inputs: - :param column_indices: - :return: - """ - columns_to_remove = [] - if hyperparams['features'] == "missing-only": - for _, col in enumerate(column_indices): - inp = inputs.iloc[:, [col]].values - mask = _get_mask(inp, hyperparams['missing_values']) - if not mask.any(): - columns_to_remove.append(col) - return columns_to_remove - - @classmethod - def _can_produce_column(cls, inputs_metadata: metadata_base.DataMetadata, column_index: int, hyperparams: Hyperparams) -> bool: - column_metadata = inputs_metadata.query((metadata_base.ALL_ELEMENTS, column_index)) - - accepted_structural_types = (int, float, numpy.integer, numpy.float64) - accepted_semantic_types = set() - accepted_semantic_types.add("https://metadata.datadrivendiscovery.org/types/Attribute") - if not issubclass(column_metadata['structural_type'], accepted_structural_types): - return False - - semantic_types = set(column_metadata.get('semantic_types', [])) - - if len(semantic_types) == 0: - cls.logger.warning("No semantic types found in column metadata") - return False - # Making sure all accepted_semantic_types are available in semantic_types - if len(accepted_semantic_types - semantic_types) == 0: - return True - - return False - - - @classmethod - def _get_target_columns_metadata(cls, outputs_metadata: metadata_base.DataMetadata, hyperparams) -> List[OrderedDict]: - outputs_length = outputs_metadata.query((metadata_base.ALL_ELEMENTS,))['dimension']['length'] - - target_columns_metadata: List[OrderedDict] = [] - for column_index in range(outputs_length): - column_metadata = OrderedDict(outputs_metadata.query_column(column_index)) - - # Update semantic types and prepare it for predicted targets. - semantic_types = set(column_metadata.get('semantic_types', [])) - semantic_types_to_remove = set([]) - add_semantic_types = [] - add_semantic_types.add(hyperparams["return_semantic_type"]) - semantic_types = semantic_types - semantic_types_to_remove - semantic_types = semantic_types.union(add_semantic_types) - column_metadata['semantic_types'] = list(semantic_types) - - target_columns_metadata.append(column_metadata) - - return target_columns_metadata - - @classmethod - def _update_predictions_metadata(cls, inputs_metadata: metadata_base.DataMetadata, outputs: Optional[Outputs], - target_columns_metadata: List[OrderedDict]) -> metadata_base.DataMetadata: - outputs_metadata = metadata_base.DataMetadata().generate(value=outputs) - - for column_index, column_metadata in enumerate(target_columns_metadata): - column_metadata.pop("structural_type", None) - outputs_metadata = outputs_metadata.update_column(column_index, column_metadata) - - return outputs_metadata - - def _wrap_predictions(self, inputs: Inputs, predictions: ndarray, target_columns_metadata) -> Outputs: - outputs = d3m_dataframe(predictions, generate_metadata=False) - outputs.metadata = self._update_predictions_metadata(inputs.metadata, outputs, target_columns_metadata) - return outputs - - - - @classmethod - def _copy_columns_metadata(cls, inputs_metadata: metadata_base.DataMetadata, column_indices, hyperparams) -> List[OrderedDict]: - outputs_length = inputs_metadata.query((metadata_base.ALL_ELEMENTS,))['dimension']['length'] - - target_columns_metadata: List[OrderedDict] = [] - for column_index in column_indices: - column_name = inputs_metadata.query((metadata_base.ALL_ELEMENTS, column_index)).get("name") - column_metadata = OrderedDict(inputs_metadata.query_column(column_index)) - semantic_types = set(column_metadata.get('semantic_types', [])) - semantic_types_to_remove = set([]) - add_semantic_types = set() - add_semantic_types.add(hyperparams["return_semantic_type"]) - semantic_types = semantic_types - semantic_types_to_remove - semantic_types = semantic_types.union(add_semantic_types) - column_metadata['semantic_types'] = list(semantic_types) - - column_metadata["name"] = str(column_name) - target_columns_metadata.append(column_metadata) - - return target_columns_metadata - - -SKMissingIndicator.__doc__ = MissingIndicator.__doc__ \ No newline at end of file diff --git a/common-primitives/sklearn-wrap/sklearn_wrap/SKMultinomialNB.py b/common-primitives/sklearn-wrap/sklearn_wrap/SKMultinomialNB.py deleted file mode 100644 index b429050..0000000 --- a/common-primitives/sklearn-wrap/sklearn_wrap/SKMultinomialNB.py +++ /dev/null @@ -1,488 +0,0 @@ -from typing import Any, Callable, List, Dict, Union, Optional, Sequence, Tuple -from numpy import ndarray -from collections import OrderedDict -from scipy import sparse -import os -import sklearn -import numpy -import typing - -# Custom import commands if any -from sklearn.naive_bayes import MultinomialNB - - -from d3m.container.numpy import ndarray as d3m_ndarray -from d3m.container import DataFrame as d3m_dataframe -from d3m.metadata import hyperparams, params, base as metadata_base -from d3m import utils -from d3m.base import utils as base_utils -from d3m.exceptions import PrimitiveNotFittedError -from d3m.primitive_interfaces.base import CallResult, DockerContainer - -from d3m.primitive_interfaces.supervised_learning import SupervisedLearnerPrimitiveBase -from d3m.primitive_interfaces.base import ProbabilisticCompositionalityMixin, ContinueFitMixin -from d3m import exceptions -import pandas - - - -Inputs = d3m_dataframe -Outputs = d3m_dataframe - - -class Params(params.Params): - class_log_prior_: Optional[ndarray] - feature_log_prob_: Optional[ndarray] - class_count_: Optional[ndarray] - feature_count_: Optional[ndarray] - classes_: Optional[ndarray] - input_column_names: Optional[Any] - target_names_: Optional[Sequence[Any]] - training_indices_: Optional[Sequence[int]] - target_column_indices_: Optional[Sequence[int]] - target_columns_metadata_: Optional[List[OrderedDict]] - - - -class Hyperparams(hyperparams.Hyperparams): - alpha = hyperparams.Hyperparameter[float]( - default=1, - description='Additive (Laplace/Lidstone) smoothing parameter (0 for no smoothing).', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] - ) - fit_prior = hyperparams.UniformBool( - default=True, - description='Whether to learn class prior probabilities or not. If false, a uniform prior will be used.', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] - ) - - use_inputs_columns = hyperparams.Set( - elements=hyperparams.Hyperparameter[int](-1), - default=(), - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="A set of column indices to force primitive to use as training input. If any specified column cannot be parsed, it is skipped.", - ) - use_outputs_columns = hyperparams.Set( - elements=hyperparams.Hyperparameter[int](-1), - default=(), - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="A set of column indices to force primitive to use as training target. If any specified column cannot be parsed, it is skipped.", - ) - exclude_inputs_columns = hyperparams.Set( - elements=hyperparams.Hyperparameter[int](-1), - default=(), - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="A set of column indices to not use as training inputs. Applicable only if \"use_columns\" is not provided.", - ) - exclude_outputs_columns = hyperparams.Set( - elements=hyperparams.Hyperparameter[int](-1), - default=(), - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="A set of column indices to not use as training target. Applicable only if \"use_columns\" is not provided.", - ) - return_result = hyperparams.Enumeration( - values=['append', 'replace', 'new'], - default='new', - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="Should parsed columns be appended, should they replace original columns, or should only parsed columns be returned? This hyperparam is ignored if use_semantic_types is set to false.", - ) - use_semantic_types = hyperparams.UniformBool( - default=False, - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="Controls whether semantic_types metadata will be used for filtering columns in input dataframe. Setting this to false makes the code ignore return_result and will produce only the output dataframe" - ) - add_index_columns = hyperparams.UniformBool( - default=False, - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="Also include primary index columns if input data has them. Applicable only if \"return_result\" is set to \"new\".", - ) - error_on_no_input = hyperparams.UniformBool( - default=True, - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="Throw an exception if no input column is selected/provided. Defaults to true to behave like sklearn. To prevent pipelines from breaking set this to False.", - ) - - return_semantic_type = hyperparams.Enumeration[str]( - values=['https://metadata.datadrivendiscovery.org/types/Attribute', 'https://metadata.datadrivendiscovery.org/types/ConstructedAttribute', 'https://metadata.datadrivendiscovery.org/types/PredictedTarget'], - default='https://metadata.datadrivendiscovery.org/types/PredictedTarget', - description='Decides what semantic type to attach to generated output', - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'] - ) - -class SKMultinomialNB(SupervisedLearnerPrimitiveBase[Inputs, Outputs, Params, Hyperparams], - ProbabilisticCompositionalityMixin[Inputs, Outputs, Params, Hyperparams], - ContinueFitMixin[Inputs, Outputs, Params, Hyperparams]): - """ - Primitive wrapping for sklearn MultinomialNB - `sklearn documentation `_ - - """ - - __author__ = "JPL MARVIN" - metadata = metadata_base.PrimitiveMetadata({ - "algorithm_types": [metadata_base.PrimitiveAlgorithmType.NAIVE_BAYES_CLASSIFIER, ], - "name": "sklearn.naive_bayes.MultinomialNB", - "primitive_family": metadata_base.PrimitiveFamily.CLASSIFICATION, - "python_path": "d3m.primitives.classification.multinomial_naive_bayes.SKlearn", - "source": {'name': 'JPL', 'contact': 'mailto:shah@jpl.nasa.gov', 'uris': ['https://gitlab.com/datadrivendiscovery/sklearn-wrap/issues', 'https://scikit-learn.org/stable/modules/generated/sklearn.naive_bayes.MultinomialNB.html']}, - "version": "2019.11.13", - "id": "adf13b4b-9fe5-38a2-a1ea-d1b1cc342576", - "hyperparams_to_tune": ['alpha', 'fit_prior'], - 'installation': [ - {'type': metadata_base.PrimitiveInstallationType.PIP, - 'package_uri': 'git+https://gitlab.com/datadrivendiscovery/sklearn-wrap.git@{git_commit}#egg=sklearn_wrap'.format( - git_commit=utils.current_git_commit(os.path.dirname(__file__)), - ), - }] - }) - - def __init__(self, *, - hyperparams: Hyperparams, - random_seed: int = 0, - docker_containers: Dict[str, DockerContainer] = None) -> None: - - super().__init__(hyperparams=hyperparams, random_seed=random_seed, docker_containers=docker_containers) - - # False - self._clf = MultinomialNB( - alpha=self.hyperparams['alpha'], - fit_prior=self.hyperparams['fit_prior'], - ) - - self._inputs = None - self._outputs = None - self._training_inputs = None - self._training_outputs = None - self._target_names = None - self._training_indices = None - self._target_column_indices = None - self._target_columns_metadata: List[OrderedDict] = None - self._input_column_names = None - self._fitted = False - self._new_training_data = False - - def set_training_data(self, *, inputs: Inputs, outputs: Outputs) -> None: - self._inputs = inputs - self._outputs = outputs - self._fitted = False - self._new_training_data = True - - def fit(self, *, timeout: float = None, iterations: int = None) -> CallResult[None]: - if self._inputs is None or self._outputs is None: - raise ValueError("Missing training data.") - - if not self._new_training_data: - return CallResult(None) - self._new_training_data = False - - self._training_inputs, self._training_indices = self._get_columns_to_fit(self._inputs, self.hyperparams) - self._training_outputs, self._target_names, self._target_column_indices = self._get_targets(self._outputs, self.hyperparams) - self._input_column_names = self._training_inputs.columns - - if len(self._training_indices) > 0 and len(self._target_column_indices) > 0: - self._target_columns_metadata = self._get_target_columns_metadata(self._training_outputs.metadata, self.hyperparams) - sk_training_output = self._training_outputs.values - - shape = sk_training_output.shape - if len(shape) == 2 and shape[1] == 1: - sk_training_output = numpy.ravel(sk_training_output) - - self._clf.fit(self._training_inputs, sk_training_output) - self._fitted = True - else: - if self.hyperparams['error_on_no_input']: - raise RuntimeError("No input columns were selected") - self.logger.warn("No input columns were selected") - - return CallResult(None) - - def continue_fit(self, *, timeout: float = None, iterations: int = None) -> CallResult[None]: - if self._training_inputs is None or self._training_outputs is None: - raise ValueError("Missing training data.") - - if not self._new_training_data: - return CallResult(None) - self._new_training_data = False - - if len(self._training_indices) > 0 and len(self._target_column_indices) > 0: - self._target_columns_metadata = self._get_target_columns_metadata(self._training_outputs.metadata, self.hyperparams) - sk_training_output = self._training_outputs.values - - shape = sk_training_output.shape - if len(shape) == 2 and shape[1] == 1: - sk_training_output = numpy.ravel(sk_training_output) - - self._clf.partial_fit(self._training_inputs, sk_training_output) - self._fitted = True - else: - if self.hyperparams['error_on_no_input']: - raise RuntimeError("No input columns were selected") - self.logger.warn("No input columns were selected") - - return CallResult(None) - - - def produce(self, *, inputs: Inputs, timeout: float = None, iterations: int = None) -> CallResult[Outputs]: - sk_inputs, columns_to_use = self._get_columns_to_fit(inputs, self.hyperparams) - output = [] - if len(sk_inputs.columns): - try: - sk_output = self._clf.predict(sk_inputs) - except sklearn.exceptions.NotFittedError as error: - raise PrimitiveNotFittedError("Primitive not fitted.") from error - # For primitives that allow predicting without fitting like GaussianProcessRegressor - if not self._fitted: - raise PrimitiveNotFittedError("Primitive not fitted.") - if sparse.issparse(sk_output): - sk_output = sk_output.toarray() - output = self._wrap_predictions(inputs, sk_output) - output.columns = self._target_names - output = [output] - else: - if self.hyperparams['error_on_no_input']: - raise RuntimeError("No input columns were selected") - self.logger.warn("No input columns were selected") - outputs = base_utils.combine_columns(return_result=self.hyperparams['return_result'], - add_index_columns=self.hyperparams['add_index_columns'], - inputs=inputs, column_indices=self._target_column_indices, - columns_list=output) - - return CallResult(outputs) - - - def get_params(self) -> Params: - if not self._fitted: - return Params( - class_log_prior_=None, - feature_log_prob_=None, - class_count_=None, - feature_count_=None, - classes_=None, - input_column_names=self._input_column_names, - training_indices_=self._training_indices, - target_names_=self._target_names, - target_column_indices_=self._target_column_indices, - target_columns_metadata_=self._target_columns_metadata - ) - - return Params( - class_log_prior_=getattr(self._clf, 'class_log_prior_', None), - feature_log_prob_=getattr(self._clf, 'feature_log_prob_', None), - class_count_=getattr(self._clf, 'class_count_', None), - feature_count_=getattr(self._clf, 'feature_count_', None), - classes_=getattr(self._clf, 'classes_', None), - input_column_names=self._input_column_names, - training_indices_=self._training_indices, - target_names_=self._target_names, - target_column_indices_=self._target_column_indices, - target_columns_metadata_=self._target_columns_metadata - ) - - def set_params(self, *, params: Params) -> None: - self._clf.class_log_prior_ = params['class_log_prior_'] - self._clf.feature_log_prob_ = params['feature_log_prob_'] - self._clf.class_count_ = params['class_count_'] - self._clf.feature_count_ = params['feature_count_'] - self._clf.classes_ = params['classes_'] - self._input_column_names = params['input_column_names'] - self._training_indices = params['training_indices_'] - self._target_names = params['target_names_'] - self._target_column_indices = params['target_column_indices_'] - self._target_columns_metadata = params['target_columns_metadata_'] - - if params['class_log_prior_'] is not None: - self._fitted = True - if params['feature_log_prob_'] is not None: - self._fitted = True - if params['class_count_'] is not None: - self._fitted = True - if params['feature_count_'] is not None: - self._fitted = True - if params['classes_'] is not None: - self._fitted = True - - - def log_likelihoods(self, *, - outputs: Outputs, - inputs: Inputs, - timeout: float = None, - iterations: int = None) -> CallResult[Sequence[float]]: - inputs = inputs.iloc[:, self._training_indices] # Get ndarray - outputs = outputs.iloc[:, self._target_column_indices] - - if len(inputs.columns) and len(outputs.columns): - - if outputs.shape[1] != self._clf.n_outputs_: - raise exceptions.InvalidArgumentValueError("\"outputs\" argument does not have the correct number of target columns.") - - log_proba = self._clf.predict_log_proba(inputs) - - # Making it always a list, even when only one target. - if self._clf.n_outputs_ == 1: - log_proba = [log_proba] - classes = [self._clf.classes_] - else: - classes = self._clf.classes_ - - samples_length = inputs.shape[0] - - log_likelihoods = [] - for k in range(self._clf.n_outputs_): - # We have to map each class to its internal (numerical) index used in the learner. - # This allows "outputs" to contain string classes. - outputs_column = outputs.iloc[:, k] - classes_map = pandas.Series(numpy.arange(len(classes[k])), index=classes[k]) - mapped_outputs_column = outputs_column.map(classes_map) - - # For each target column (column in "outputs"), for each sample (row) we pick the log - # likelihood for a given class. - log_likelihoods.append(log_proba[k][numpy.arange(samples_length), mapped_outputs_column]) - - results = d3m_dataframe(dict(enumerate(log_likelihoods)), generate_metadata=True) - results.columns = outputs.columns - - for k in range(self._clf.n_outputs_): - column_metadata = outputs.metadata.query_column(k) - if 'name' in column_metadata: - results.metadata = results.metadata.update_column(k, {'name': column_metadata['name']}) - - else: - results = d3m_dataframe(generate_metadata=True) - - return CallResult(results) - - - - - - @classmethod - def _get_columns_to_fit(cls, inputs: Inputs, hyperparams: Hyperparams): - if not hyperparams['use_semantic_types']: - return inputs, list(range(len(inputs.columns))) - - inputs_metadata = inputs.metadata - - def can_produce_column(column_index: int) -> bool: - return cls._can_produce_column(inputs_metadata, column_index, hyperparams) - - columns_to_produce, columns_not_to_produce = base_utils.get_columns_to_use(inputs_metadata, - use_columns=hyperparams['use_inputs_columns'], - exclude_columns=hyperparams['exclude_inputs_columns'], - can_use_column=can_produce_column) - return inputs.iloc[:, columns_to_produce], columns_to_produce - # return columns_to_produce - - @classmethod - def _can_produce_column(cls, inputs_metadata: metadata_base.DataMetadata, column_index: int, hyperparams: Hyperparams) -> bool: - column_metadata = inputs_metadata.query((metadata_base.ALL_ELEMENTS, column_index)) - - accepted_structural_types = (int, float, numpy.integer, numpy.float64) - accepted_semantic_types = set() - accepted_semantic_types.add("https://metadata.datadrivendiscovery.org/types/Attribute") - if not issubclass(column_metadata['structural_type'], accepted_structural_types): - return False - - semantic_types = set(column_metadata.get('semantic_types', [])) - - if len(semantic_types) == 0: - cls.logger.warning("No semantic types found in column metadata") - return False - # Making sure all accepted_semantic_types are available in semantic_types - if len(accepted_semantic_types - semantic_types) == 0: - return True - - return False - - @classmethod - def _get_targets(cls, data: d3m_dataframe, hyperparams: Hyperparams): - if not hyperparams['use_semantic_types']: - return data, list(data.columns), list(range(len(data.columns))) - - metadata = data.metadata - - def can_produce_column(column_index: int) -> bool: - accepted_semantic_types = set() - accepted_semantic_types.add("https://metadata.datadrivendiscovery.org/types/TrueTarget") - column_metadata = metadata.query((metadata_base.ALL_ELEMENTS, column_index)) - semantic_types = set(column_metadata.get('semantic_types', [])) - if len(semantic_types) == 0: - cls.logger.warning("No semantic types found in column metadata") - return False - # Making sure all accepted_semantic_types are available in semantic_types - if len(accepted_semantic_types - semantic_types) == 0: - return True - return False - - target_column_indices, target_columns_not_to_produce = base_utils.get_columns_to_use(metadata, - use_columns=hyperparams[ - 'use_outputs_columns'], - exclude_columns= - hyperparams[ - 'exclude_outputs_columns'], - can_use_column=can_produce_column) - targets = [] - if target_column_indices: - targets = data.select_columns(target_column_indices) - target_column_names = [] - for idx in target_column_indices: - target_column_names.append(data.columns[idx]) - return targets, target_column_names, target_column_indices - - @classmethod - def _get_target_columns_metadata(cls, outputs_metadata: metadata_base.DataMetadata, hyperparams) -> List[OrderedDict]: - outputs_length = outputs_metadata.query((metadata_base.ALL_ELEMENTS,))['dimension']['length'] - - target_columns_metadata: List[OrderedDict] = [] - for column_index in range(outputs_length): - column_metadata = OrderedDict(outputs_metadata.query_column(column_index)) - - # Update semantic types and prepare it for predicted targets. - semantic_types = set(column_metadata.get('semantic_types', [])) - semantic_types_to_remove = set(["https://metadata.datadrivendiscovery.org/types/TrueTarget","https://metadata.datadrivendiscovery.org/types/SuggestedTarget",]) - add_semantic_types = set(["https://metadata.datadrivendiscovery.org/types/PredictedTarget",]) - add_semantic_types.add(hyperparams["return_semantic_type"]) - semantic_types = semantic_types - semantic_types_to_remove - semantic_types = semantic_types.union(add_semantic_types) - column_metadata['semantic_types'] = list(semantic_types) - - target_columns_metadata.append(column_metadata) - - return target_columns_metadata - - @classmethod - def _update_predictions_metadata(cls, inputs_metadata: metadata_base.DataMetadata, outputs: Optional[Outputs], - target_columns_metadata: List[OrderedDict]) -> metadata_base.DataMetadata: - outputs_metadata = metadata_base.DataMetadata().generate(value=outputs) - - for column_index, column_metadata in enumerate(target_columns_metadata): - column_metadata.pop("structural_type", None) - outputs_metadata = outputs_metadata.update_column(column_index, column_metadata) - - return outputs_metadata - - def _wrap_predictions(self, inputs: Inputs, predictions: ndarray) -> Outputs: - outputs = d3m_dataframe(predictions, generate_metadata=False) - outputs.metadata = self._update_predictions_metadata(inputs.metadata, outputs, self._target_columns_metadata) - return outputs - - - @classmethod - def _add_target_columns_metadata(cls, outputs_metadata: metadata_base.DataMetadata): - outputs_length = outputs_metadata.query((metadata_base.ALL_ELEMENTS,))['dimension']['length'] - - target_columns_metadata: List[OrderedDict] = [] - for column_index in range(outputs_length): - column_metadata = OrderedDict() - semantic_types = [] - semantic_types.append('https://metadata.datadrivendiscovery.org/types/PredictedTarget') - column_name = outputs_metadata.query((metadata_base.ALL_ELEMENTS, column_index)).get("name") - if column_name is None: - column_name = "output_{}".format(column_index) - column_metadata["semantic_types"] = semantic_types - column_metadata["name"] = str(column_name) - target_columns_metadata.append(column_metadata) - - return target_columns_metadata - - -SKMultinomialNB.__doc__ = MultinomialNB.__doc__ \ No newline at end of file diff --git a/common-primitives/sklearn-wrap/sklearn_wrap/SKNearestCentroid.py b/common-primitives/sklearn-wrap/sklearn_wrap/SKNearestCentroid.py deleted file mode 100644 index 62bc158..0000000 --- a/common-primitives/sklearn-wrap/sklearn_wrap/SKNearestCentroid.py +++ /dev/null @@ -1,408 +0,0 @@ -from typing import Any, Callable, List, Dict, Union, Optional, Sequence, Tuple -from numpy import ndarray -from collections import OrderedDict -from scipy import sparse -import os -import sklearn -import numpy -import typing - -# Custom import commands if any -from sklearn.neighbors.nearest_centroid import NearestCentroid - - -from d3m.container.numpy import ndarray as d3m_ndarray -from d3m.container import DataFrame as d3m_dataframe -from d3m.metadata import hyperparams, params, base as metadata_base -from d3m import utils -from d3m.base import utils as base_utils -from d3m.exceptions import PrimitiveNotFittedError -from d3m.primitive_interfaces.base import CallResult, DockerContainer - -from d3m.primitive_interfaces.supervised_learning import SupervisedLearnerPrimitiveBase -from d3m.primitive_interfaces.base import ProbabilisticCompositionalityMixin, ContinueFitMixin -from d3m import exceptions -import pandas - - - -Inputs = d3m_dataframe -Outputs = d3m_dataframe - - -class Params(params.Params): - centroids_: Optional[ndarray] - classes_: Optional[ndarray] - input_column_names: Optional[Any] - target_names_: Optional[Sequence[Any]] - training_indices_: Optional[Sequence[int]] - target_column_indices_: Optional[Sequence[int]] - target_columns_metadata_: Optional[List[OrderedDict]] - - - -class Hyperparams(hyperparams.Hyperparams): - metric = hyperparams.Enumeration[str]( - default='euclidean', - values=['euclidean', 'manhattan'], - description='The metric to use when calculating distance between instances in a feature array. If metric is a string or callable, it must be one of the options allowed by metrics.pairwise.pairwise_distances for its metric parameter. The centroids for the samples corresponding to each class is the point from which the sum of the distances (according to the metric) of all samples that belong to that particular class are minimized. If the "manhattan" metric is provided, this centroid is the median and for all other metrics, the centroid is now set to be the mean.', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] - ) - shrink_threshold = hyperparams.Union( - configuration=OrderedDict({ - 'float': hyperparams.Bounded[float]( - lower=0, - upper=None, - default=0, - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'], - ), - 'none': hyperparams.Constant( - default=None, - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'], - ) - }), - default='none', - description='Threshold for shrinking centroids to remove features.', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] - ) - - use_inputs_columns = hyperparams.Set( - elements=hyperparams.Hyperparameter[int](-1), - default=(), - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="A set of column indices to force primitive to use as training input. If any specified column cannot be parsed, it is skipped.", - ) - use_outputs_columns = hyperparams.Set( - elements=hyperparams.Hyperparameter[int](-1), - default=(), - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="A set of column indices to force primitive to use as training target. If any specified column cannot be parsed, it is skipped.", - ) - exclude_inputs_columns = hyperparams.Set( - elements=hyperparams.Hyperparameter[int](-1), - default=(), - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="A set of column indices to not use as training inputs. Applicable only if \"use_columns\" is not provided.", - ) - exclude_outputs_columns = hyperparams.Set( - elements=hyperparams.Hyperparameter[int](-1), - default=(), - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="A set of column indices to not use as training target. Applicable only if \"use_columns\" is not provided.", - ) - return_result = hyperparams.Enumeration( - values=['append', 'replace', 'new'], - default='new', - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="Should parsed columns be appended, should they replace original columns, or should only parsed columns be returned? This hyperparam is ignored if use_semantic_types is set to false.", - ) - use_semantic_types = hyperparams.UniformBool( - default=False, - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="Controls whether semantic_types metadata will be used for filtering columns in input dataframe. Setting this to false makes the code ignore return_result and will produce only the output dataframe" - ) - add_index_columns = hyperparams.UniformBool( - default=False, - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="Also include primary index columns if input data has them. Applicable only if \"return_result\" is set to \"new\".", - ) - error_on_no_input = hyperparams.UniformBool( - default=True, - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="Throw an exception if no input column is selected/provided. Defaults to true to behave like sklearn. To prevent pipelines from breaking set this to False.", - ) - - return_semantic_type = hyperparams.Enumeration[str]( - values=['https://metadata.datadrivendiscovery.org/types/Attribute', 'https://metadata.datadrivendiscovery.org/types/ConstructedAttribute', 'https://metadata.datadrivendiscovery.org/types/PredictedTarget'], - default='https://metadata.datadrivendiscovery.org/types/PredictedTarget', - description='Decides what semantic type to attach to generated output', - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'] - ) - -class SKNearestCentroid(SupervisedLearnerPrimitiveBase[Inputs, Outputs, Params, Hyperparams]): - """ - Primitive wrapping for sklearn NearestCentroid - `sklearn documentation `_ - - """ - - __author__ = "JPL MARVIN" - metadata = metadata_base.PrimitiveMetadata({ - "algorithm_types": [metadata_base.PrimitiveAlgorithmType.NEAREST_CENTROID_CLASSIFIER, ], - "name": "sklearn.neighbors.nearest_centroid.NearestCentroid", - "primitive_family": metadata_base.PrimitiveFamily.CLASSIFICATION, - "python_path": "d3m.primitives.classification.nearest_centroid.SKlearn", - "source": {'name': 'JPL', 'contact': 'mailto:shah@jpl.nasa.gov', 'uris': ['https://gitlab.com/datadrivendiscovery/sklearn-wrap/issues', 'https://scikit-learn.org/stable/modules/generated/sklearn.neighbors.NearestCentroid.html']}, - "version": "2019.11.13", - "id": "90e7b335-5af0-35ad-932c-9c771fe84693", - 'installation': [ - {'type': metadata_base.PrimitiveInstallationType.PIP, - 'package_uri': 'git+https://gitlab.com/datadrivendiscovery/sklearn-wrap.git@{git_commit}#egg=sklearn_wrap'.format( - git_commit=utils.current_git_commit(os.path.dirname(__file__)), - ), - }] - }) - - def __init__(self, *, - hyperparams: Hyperparams, - random_seed: int = 0, - docker_containers: Dict[str, DockerContainer] = None) -> None: - - super().__init__(hyperparams=hyperparams, random_seed=random_seed, docker_containers=docker_containers) - - # False - self._clf = NearestCentroid( - metric=self.hyperparams['metric'], - shrink_threshold=self.hyperparams['shrink_threshold'], - ) - - self._inputs = None - self._outputs = None - self._training_inputs = None - self._training_outputs = None - self._target_names = None - self._training_indices = None - self._target_column_indices = None - self._target_columns_metadata: List[OrderedDict] = None - self._input_column_names = None - self._fitted = False - self._new_training_data = False - - def set_training_data(self, *, inputs: Inputs, outputs: Outputs) -> None: - self._inputs = inputs - self._outputs = outputs - self._fitted = False - self._new_training_data = True - - def fit(self, *, timeout: float = None, iterations: int = None) -> CallResult[None]: - if self._inputs is None or self._outputs is None: - raise ValueError("Missing training data.") - - if not self._new_training_data: - return CallResult(None) - self._new_training_data = False - - self._training_inputs, self._training_indices = self._get_columns_to_fit(self._inputs, self.hyperparams) - self._training_outputs, self._target_names, self._target_column_indices = self._get_targets(self._outputs, self.hyperparams) - self._input_column_names = self._training_inputs.columns - - if len(self._training_indices) > 0 and len(self._target_column_indices) > 0: - self._target_columns_metadata = self._get_target_columns_metadata(self._training_outputs.metadata, self.hyperparams) - sk_training_output = self._training_outputs.values - - shape = sk_training_output.shape - if len(shape) == 2 and shape[1] == 1: - sk_training_output = numpy.ravel(sk_training_output) - - self._clf.fit(self._training_inputs, sk_training_output) - self._fitted = True - else: - if self.hyperparams['error_on_no_input']: - raise RuntimeError("No input columns were selected") - self.logger.warn("No input columns were selected") - - return CallResult(None) - - - - def produce(self, *, inputs: Inputs, timeout: float = None, iterations: int = None) -> CallResult[Outputs]: - sk_inputs, columns_to_use = self._get_columns_to_fit(inputs, self.hyperparams) - output = [] - if len(sk_inputs.columns): - try: - sk_output = self._clf.predict(sk_inputs) - except sklearn.exceptions.NotFittedError as error: - raise PrimitiveNotFittedError("Primitive not fitted.") from error - # For primitives that allow predicting without fitting like GaussianProcessRegressor - if not self._fitted: - raise PrimitiveNotFittedError("Primitive not fitted.") - if sparse.issparse(sk_output): - sk_output = sk_output.toarray() - output = self._wrap_predictions(inputs, sk_output) - output.columns = self._target_names - output = [output] - else: - if self.hyperparams['error_on_no_input']: - raise RuntimeError("No input columns were selected") - self.logger.warn("No input columns were selected") - outputs = base_utils.combine_columns(return_result=self.hyperparams['return_result'], - add_index_columns=self.hyperparams['add_index_columns'], - inputs=inputs, column_indices=self._target_column_indices, - columns_list=output) - - return CallResult(outputs) - - - def get_params(self) -> Params: - if not self._fitted: - return Params( - centroids_=None, - classes_=None, - input_column_names=self._input_column_names, - training_indices_=self._training_indices, - target_names_=self._target_names, - target_column_indices_=self._target_column_indices, - target_columns_metadata_=self._target_columns_metadata - ) - - return Params( - centroids_=getattr(self._clf, 'centroids_', None), - classes_=getattr(self._clf, 'classes_', None), - input_column_names=self._input_column_names, - training_indices_=self._training_indices, - target_names_=self._target_names, - target_column_indices_=self._target_column_indices, - target_columns_metadata_=self._target_columns_metadata - ) - - def set_params(self, *, params: Params) -> None: - self._clf.centroids_ = params['centroids_'] - self._clf.classes_ = params['classes_'] - self._input_column_names = params['input_column_names'] - self._training_indices = params['training_indices_'] - self._target_names = params['target_names_'] - self._target_column_indices = params['target_column_indices_'] - self._target_columns_metadata = params['target_columns_metadata_'] - - if params['centroids_'] is not None: - self._fitted = True - if params['classes_'] is not None: - self._fitted = True - - - - - - - - @classmethod - def _get_columns_to_fit(cls, inputs: Inputs, hyperparams: Hyperparams): - if not hyperparams['use_semantic_types']: - return inputs, list(range(len(inputs.columns))) - - inputs_metadata = inputs.metadata - - def can_produce_column(column_index: int) -> bool: - return cls._can_produce_column(inputs_metadata, column_index, hyperparams) - - columns_to_produce, columns_not_to_produce = base_utils.get_columns_to_use(inputs_metadata, - use_columns=hyperparams['use_inputs_columns'], - exclude_columns=hyperparams['exclude_inputs_columns'], - can_use_column=can_produce_column) - return inputs.iloc[:, columns_to_produce], columns_to_produce - # return columns_to_produce - - @classmethod - def _can_produce_column(cls, inputs_metadata: metadata_base.DataMetadata, column_index: int, hyperparams: Hyperparams) -> bool: - column_metadata = inputs_metadata.query((metadata_base.ALL_ELEMENTS, column_index)) - - accepted_structural_types = (int, float, numpy.integer, numpy.float64) - accepted_semantic_types = set() - accepted_semantic_types.add("https://metadata.datadrivendiscovery.org/types/Attribute") - if not issubclass(column_metadata['structural_type'], accepted_structural_types): - return False - - semantic_types = set(column_metadata.get('semantic_types', [])) - - if len(semantic_types) == 0: - cls.logger.warning("No semantic types found in column metadata") - return False - # Making sure all accepted_semantic_types are available in semantic_types - if len(accepted_semantic_types - semantic_types) == 0: - return True - - return False - - @classmethod - def _get_targets(cls, data: d3m_dataframe, hyperparams: Hyperparams): - if not hyperparams['use_semantic_types']: - return data, list(data.columns), list(range(len(data.columns))) - - metadata = data.metadata - - def can_produce_column(column_index: int) -> bool: - accepted_semantic_types = set() - accepted_semantic_types.add("https://metadata.datadrivendiscovery.org/types/TrueTarget") - column_metadata = metadata.query((metadata_base.ALL_ELEMENTS, column_index)) - semantic_types = set(column_metadata.get('semantic_types', [])) - if len(semantic_types) == 0: - cls.logger.warning("No semantic types found in column metadata") - return False - # Making sure all accepted_semantic_types are available in semantic_types - if len(accepted_semantic_types - semantic_types) == 0: - return True - return False - - target_column_indices, target_columns_not_to_produce = base_utils.get_columns_to_use(metadata, - use_columns=hyperparams[ - 'use_outputs_columns'], - exclude_columns= - hyperparams[ - 'exclude_outputs_columns'], - can_use_column=can_produce_column) - targets = [] - if target_column_indices: - targets = data.select_columns(target_column_indices) - target_column_names = [] - for idx in target_column_indices: - target_column_names.append(data.columns[idx]) - return targets, target_column_names, target_column_indices - - @classmethod - def _get_target_columns_metadata(cls, outputs_metadata: metadata_base.DataMetadata, hyperparams) -> List[OrderedDict]: - outputs_length = outputs_metadata.query((metadata_base.ALL_ELEMENTS,))['dimension']['length'] - - target_columns_metadata: List[OrderedDict] = [] - for column_index in range(outputs_length): - column_metadata = OrderedDict(outputs_metadata.query_column(column_index)) - - # Update semantic types and prepare it for predicted targets. - semantic_types = set(column_metadata.get('semantic_types', [])) - semantic_types_to_remove = set(["https://metadata.datadrivendiscovery.org/types/TrueTarget","https://metadata.datadrivendiscovery.org/types/SuggestedTarget",]) - add_semantic_types = set(["https://metadata.datadrivendiscovery.org/types/PredictedTarget",]) - add_semantic_types.add(hyperparams["return_semantic_type"]) - semantic_types = semantic_types - semantic_types_to_remove - semantic_types = semantic_types.union(add_semantic_types) - column_metadata['semantic_types'] = list(semantic_types) - - target_columns_metadata.append(column_metadata) - - return target_columns_metadata - - @classmethod - def _update_predictions_metadata(cls, inputs_metadata: metadata_base.DataMetadata, outputs: Optional[Outputs], - target_columns_metadata: List[OrderedDict]) -> metadata_base.DataMetadata: - outputs_metadata = metadata_base.DataMetadata().generate(value=outputs) - - for column_index, column_metadata in enumerate(target_columns_metadata): - column_metadata.pop("structural_type", None) - outputs_metadata = outputs_metadata.update_column(column_index, column_metadata) - - return outputs_metadata - - def _wrap_predictions(self, inputs: Inputs, predictions: ndarray) -> Outputs: - outputs = d3m_dataframe(predictions, generate_metadata=False) - outputs.metadata = self._update_predictions_metadata(inputs.metadata, outputs, self._target_columns_metadata) - return outputs - - - @classmethod - def _add_target_columns_metadata(cls, outputs_metadata: metadata_base.DataMetadata): - outputs_length = outputs_metadata.query((metadata_base.ALL_ELEMENTS,))['dimension']['length'] - - target_columns_metadata: List[OrderedDict] = [] - for column_index in range(outputs_length): - column_metadata = OrderedDict() - semantic_types = [] - semantic_types.append('https://metadata.datadrivendiscovery.org/types/PredictedTarget') - column_name = outputs_metadata.query((metadata_base.ALL_ELEMENTS, column_index)).get("name") - if column_name is None: - column_name = "output_{}".format(column_index) - column_metadata["semantic_types"] = semantic_types - column_metadata["name"] = str(column_name) - target_columns_metadata.append(column_metadata) - - return target_columns_metadata - - -SKNearestCentroid.__doc__ = NearestCentroid.__doc__ \ No newline at end of file diff --git a/common-primitives/sklearn-wrap/sklearn_wrap/SKNormalizer.py b/common-primitives/sklearn-wrap/sklearn_wrap/SKNormalizer.py deleted file mode 100644 index b358b7c..0000000 --- a/common-primitives/sklearn-wrap/sklearn_wrap/SKNormalizer.py +++ /dev/null @@ -1,329 +0,0 @@ -from typing import Any, Callable, List, Dict, Union, Optional, Sequence, Tuple -from numpy import ndarray -from collections import OrderedDict -from scipy import sparse -import os -import sklearn -import numpy -import typing - -# Custom import commands if any -from sklearn.preprocessing.data import Normalizer - - -from d3m.container.numpy import ndarray as d3m_ndarray -from d3m.container import DataFrame as d3m_dataframe -from d3m.metadata import hyperparams, params, base as metadata_base -from d3m import utils -from d3m.base import utils as base_utils -from d3m.exceptions import PrimitiveNotFittedError -from d3m.primitive_interfaces.base import CallResult, DockerContainer -from d3m.primitive_interfaces.unsupervised_learning import UnsupervisedLearnerPrimitiveBase - - -Inputs = d3m_dataframe -Outputs = d3m_dataframe - - -class Params(params.Params): - input_column_names: Optional[Any] - target_names_: Optional[Sequence[Any]] - training_indices_: Optional[Sequence[int]] - target_column_indices_: Optional[Sequence[int]] - target_columns_metadata_: Optional[List[OrderedDict]] - - - -class Hyperparams(hyperparams.Hyperparams): - norm = hyperparams.Enumeration[str]( - default='l2', - values=['l1', 'l2', 'max'], - description='The norm to use to normalize each non zero sample.', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] - ) - - use_columns = hyperparams.Set( - elements=hyperparams.Hyperparameter[int](-1), - default=(), - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="A set of column indices to force primitive to operate on. If any specified column cannot be parsed, it is skipped.", - ) - exclude_columns = hyperparams.Set( - elements=hyperparams.Hyperparameter[int](-1), - default=(), - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="A set of column indices to not operate on. Applicable only if \"use_columns\" is not provided.", - ) - return_result = hyperparams.Enumeration( - values=['append', 'replace', 'new'], - default='new', - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="Should parsed columns be appended, should they replace original columns, or should only parsed columns be returned? This hyperparam is ignored if use_semantic_types is set to false.", - ) - use_semantic_types = hyperparams.UniformBool( - default=False, - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="Controls whether semantic_types metadata will be used for filtering columns in input dataframe. Setting this to false makes the code ignore return_result and will produce only the output dataframe" - ) - add_index_columns = hyperparams.UniformBool( - default=False, - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="Also include primary index columns if input data has them. Applicable only if \"return_result\" is set to \"new\".", - ) - error_on_no_input = hyperparams.UniformBool( - default=True, - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="Throw an exception if no input column is selected/provided. Defaults to true to behave like sklearn. To prevent pipelines from breaking set this to False.", - ) - - return_semantic_type = hyperparams.Enumeration[str]( - values=['https://metadata.datadrivendiscovery.org/types/Attribute', 'https://metadata.datadrivendiscovery.org/types/ConstructedAttribute'], - default='https://metadata.datadrivendiscovery.org/types/Attribute', - description='Decides what semantic type to attach to generated attributes', - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'] - ) - -class SKNormalizer(UnsupervisedLearnerPrimitiveBase[Inputs, Outputs, Params, Hyperparams]): - """ - Primitive wrapping for sklearn Normalizer - `sklearn documentation `_ - - """ - - __author__ = "JPL MARVIN" - metadata = metadata_base.PrimitiveMetadata({ - "algorithm_types": [metadata_base.PrimitiveAlgorithmType.DATA_NORMALIZATION, ], - "name": "sklearn.preprocessing.data.Normalizer", - "primitive_family": metadata_base.PrimitiveFamily.DATA_PREPROCESSING, - "python_path": "d3m.primitives.data_preprocessing.normalizer.SKlearn", - "source": {'name': 'JPL', 'contact': 'mailto:shah@jpl.nasa.gov', 'uris': ['https://gitlab.com/datadrivendiscovery/sklearn-wrap/issues', 'https://scikit-learn.org/stable/modules/generated/sklearn.preprocessing.Normalizer.html']}, - "version": "2019.11.13", - "id": "980b3a2d-1574-31f3-8326-ddc62f8fc2c3", - "hyperparams_to_tune": ['norm'], - 'installation': [ - {'type': metadata_base.PrimitiveInstallationType.PIP, - 'package_uri': 'git+https://gitlab.com/datadrivendiscovery/sklearn-wrap.git@{git_commit}#egg=sklearn_wrap'.format( - git_commit=utils.current_git_commit(os.path.dirname(__file__)), - ), - }] - }) - - def __init__(self, *, - hyperparams: Hyperparams, - random_seed: int = 0, - docker_containers: Dict[str, DockerContainer] = None) -> None: - - super().__init__(hyperparams=hyperparams, random_seed=random_seed, docker_containers=docker_containers) - - # False - self._clf = Normalizer( - norm=self.hyperparams['norm'], - ) - - self._inputs = None - self._outputs = None - self._training_inputs = None - self._training_outputs = None - self._target_names = None - self._training_indices = None - self._target_column_indices = None - self._target_columns_metadata: List[OrderedDict] = None - self._input_column_names = None - self._fitted = False - - - def set_training_data(self, *, inputs: Inputs) -> None: - self._inputs = inputs - self._fitted = False - - def fit(self, *, timeout: float = None, iterations: int = None) -> CallResult[None]: - if self._fitted: - return CallResult(None) - - self._training_inputs, self._training_indices = self._get_columns_to_fit(self._inputs, self.hyperparams) - self._input_column_names = self._training_inputs.columns - - if self._training_inputs is None: - return CallResult(None) - - if len(self._training_indices) > 0: - self._clf.fit(self._training_inputs) - self._fitted = True - else: - if self.hyperparams['error_on_no_input']: - raise RuntimeError("No input columns were selected") - self.logger.warn("No input columns were selected") - return CallResult(None) - - def produce(self, *, inputs: Inputs, timeout: float = None, iterations: int = None) -> CallResult[Outputs]: - if not self._fitted: - raise PrimitiveNotFittedError("Primitive not fitted.") - sk_inputs = inputs - if self.hyperparams['use_semantic_types']: - sk_inputs = inputs.iloc[:, self._training_indices] - output_columns = [] - if len(self._training_indices) > 0: - sk_output = self._clf.transform(sk_inputs) - if sparse.issparse(sk_output): - sk_output = sk_output.toarray() - outputs = self._wrap_predictions(inputs, sk_output) - if len(outputs.columns) == len(self._input_column_names): - outputs.columns = self._input_column_names - output_columns = [outputs] - else: - if self.hyperparams['error_on_no_input']: - raise RuntimeError("No input columns were selected") - self.logger.warn("No input columns were selected") - outputs = base_utils.combine_columns(return_result=self.hyperparams['return_result'], - add_index_columns=self.hyperparams['add_index_columns'], - inputs=inputs, column_indices=self._training_indices, - columns_list=output_columns) - return CallResult(outputs) - - - def get_params(self) -> Params: - if not self._fitted: - return Params( - input_column_names=self._input_column_names, - training_indices_=self._training_indices, - target_names_=self._target_names, - target_column_indices_=self._target_column_indices, - target_columns_metadata_=self._target_columns_metadata - ) - - return Params( - input_column_names=self._input_column_names, - training_indices_=self._training_indices, - target_names_=self._target_names, - target_column_indices_=self._target_column_indices, - target_columns_metadata_=self._target_columns_metadata - ) - - def set_params(self, *, params: Params) -> None: - self._input_column_names = params['input_column_names'] - self._training_indices = params['training_indices_'] - self._target_names = params['target_names_'] - self._target_column_indices = params['target_column_indices_'] - self._target_columns_metadata = params['target_columns_metadata_'] - self._fitted = True - - - - - - @classmethod - def _get_columns_to_fit(cls, inputs: Inputs, hyperparams: Hyperparams): - if not hyperparams['use_semantic_types']: - return inputs, list(range(len(inputs.columns))) - - inputs_metadata = inputs.metadata - - def can_produce_column(column_index: int) -> bool: - return cls._can_produce_column(inputs_metadata, column_index, hyperparams) - - columns_to_produce, columns_not_to_produce = base_utils.get_columns_to_use(inputs_metadata, - use_columns=hyperparams['use_columns'], - exclude_columns=hyperparams['exclude_columns'], - can_use_column=can_produce_column) - return inputs.iloc[:, columns_to_produce], columns_to_produce - # return columns_to_produce - - @classmethod - def _can_produce_column(cls, inputs_metadata: metadata_base.DataMetadata, column_index: int, hyperparams: Hyperparams) -> bool: - column_metadata = inputs_metadata.query((metadata_base.ALL_ELEMENTS, column_index)) - - accepted_structural_types = (int, float, numpy.integer, numpy.float64) - accepted_semantic_types = set() - accepted_semantic_types.add("https://metadata.datadrivendiscovery.org/types/Attribute") - if not issubclass(column_metadata['structural_type'], accepted_structural_types): - return False - - semantic_types = set(column_metadata.get('semantic_types', [])) - - if len(semantic_types) == 0: - cls.logger.warning("No semantic types found in column metadata") - return False - - # Making sure all accepted_semantic_types are available in semantic_types - if len(accepted_semantic_types - semantic_types) == 0: - return True - - return False - - - @classmethod - def _get_target_columns_metadata(cls, outputs_metadata: metadata_base.DataMetadata, hyperparams) -> List[OrderedDict]: - outputs_length = outputs_metadata.query((metadata_base.ALL_ELEMENTS,))['dimension']['length'] - - target_columns_metadata: List[OrderedDict] = [] - for column_index in range(outputs_length): - column_metadata = OrderedDict(outputs_metadata.query_column(column_index)) - - # Update semantic types and prepare it for predicted targets. - semantic_types = set(column_metadata.get('semantic_types', [])) - semantic_types_to_remove = set([]) - add_semantic_types = [] - add_semantic_types.add(hyperparams["return_semantic_type"]) - semantic_types = semantic_types - semantic_types_to_remove - semantic_types = semantic_types.union(add_semantic_types) - column_metadata['semantic_types'] = list(semantic_types) - - target_columns_metadata.append(column_metadata) - - return target_columns_metadata - - @classmethod - def _update_predictions_metadata(cls, inputs_metadata: metadata_base.DataMetadata, outputs: Optional[Outputs], - target_columns_metadata: List[OrderedDict]) -> metadata_base.DataMetadata: - outputs_metadata = metadata_base.DataMetadata().generate(value=outputs) - - for column_index, column_metadata in enumerate(target_columns_metadata): - column_metadata.pop("structural_type", None) - outputs_metadata = outputs_metadata.update_column(column_index, column_metadata) - - return outputs_metadata - - def _wrap_predictions(self, inputs: Inputs, predictions: ndarray) -> Outputs: - outputs = d3m_dataframe(predictions, generate_metadata=True) - target_columns_metadata = self._copy_inputs_metadata(inputs.metadata, self._training_indices, outputs.metadata, self.hyperparams) - outputs.metadata = self._update_predictions_metadata(inputs.metadata, outputs, target_columns_metadata) - return outputs - - - @classmethod - def _copy_inputs_metadata(cls, inputs_metadata: metadata_base.DataMetadata, input_indices: List[int], - outputs_metadata: metadata_base.DataMetadata, hyperparams): - outputs_length = outputs_metadata.query((metadata_base.ALL_ELEMENTS,))['dimension']['length'] - target_columns_metadata: List[OrderedDict] = [] - for column_index in input_indices: - column_name = inputs_metadata.query((metadata_base.ALL_ELEMENTS, column_index)).get("name") - if column_name is None: - column_name = "output_{}".format(column_index) - - column_metadata = OrderedDict(inputs_metadata.query_column(column_index)) - semantic_types = set(column_metadata.get('semantic_types', [])) - semantic_types_to_remove = set([]) - add_semantic_types = set() - add_semantic_types.add(hyperparams["return_semantic_type"]) - semantic_types = semantic_types - semantic_types_to_remove - semantic_types = semantic_types.union(add_semantic_types) - column_metadata['semantic_types'] = list(semantic_types) - - column_metadata["name"] = str(column_name) - target_columns_metadata.append(column_metadata) - - # If outputs has more columns than index, add Attribute Type to all remaining - if outputs_length > len(input_indices): - for column_index in range(len(input_indices), outputs_length): - column_metadata = OrderedDict() - semantic_types = set() - semantic_types.add(hyperparams["return_semantic_type"]) - column_name = "output_{}".format(column_index) - column_metadata["semantic_types"] = list(semantic_types) - column_metadata["name"] = str(column_name) - target_columns_metadata.append(column_metadata) - - return target_columns_metadata - - -SKNormalizer.__doc__ = Normalizer.__doc__ \ No newline at end of file diff --git a/common-primitives/sklearn-wrap/sklearn_wrap/SKNystroem.py b/common-primitives/sklearn-wrap/sklearn_wrap/SKNystroem.py deleted file mode 100644 index b92c92f..0000000 --- a/common-primitives/sklearn-wrap/sklearn_wrap/SKNystroem.py +++ /dev/null @@ -1,522 +0,0 @@ -from typing import Any, Callable, List, Dict, Union, Optional, Sequence, Tuple -from numpy import ndarray -from collections import OrderedDict -from scipy import sparse -import os -import sklearn -import numpy -import typing - -# Custom import commands if any -from sklearn.kernel_approximation import Nystroem - - -from d3m.container.numpy import ndarray as d3m_ndarray -from d3m.container import DataFrame as d3m_dataframe -from d3m.metadata import hyperparams, params, base as metadata_base -from d3m import utils -from d3m.base import utils as base_utils -from d3m.exceptions import PrimitiveNotFittedError -from d3m.primitive_interfaces.base import CallResult, DockerContainer -from d3m.primitive_interfaces.unsupervised_learning import UnsupervisedLearnerPrimitiveBase - - -Inputs = d3m_dataframe -Outputs = d3m_dataframe - - -class Params(params.Params): - components_: Optional[ndarray] - component_indices_: Optional[ndarray] - normalization_: Optional[ndarray] - input_column_names: Optional[Any] - target_names_: Optional[Sequence[Any]] - training_indices_: Optional[Sequence[int]] - target_column_indices_: Optional[Sequence[int]] - target_columns_metadata_: Optional[List[OrderedDict]] - - - -class Hyperparams(hyperparams.Hyperparams): - kernel = hyperparams.Choice( - choices={ - 'rbf': hyperparams.Hyperparams.define( - configuration=OrderedDict({ - 'gamma': hyperparams.Union( - configuration=OrderedDict({ - 'float': hyperparams.Constant( - default=0.1, - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'], - ), - 'none': hyperparams.Constant( - default=None, - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'], - ) - }), - default='none', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] - ) - }) - ), - 'laplacian': hyperparams.Hyperparams.define( - configuration=OrderedDict({ - 'gamma': hyperparams.Union( - configuration=OrderedDict({ - 'float': hyperparams.Constant( - default=0.1, - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'], - ), - 'none': hyperparams.Constant( - default=None, - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'], - ) - }), - default='none', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] - ) - }) - ), - 'polynomial': hyperparams.Hyperparams.define( - configuration=OrderedDict({ - 'gamma': hyperparams.Union( - configuration=OrderedDict({ - 'float': hyperparams.Constant( - default=0.1, - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'], - ), - 'none': hyperparams.Constant( - default=None, - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'], - ) - }), - default='none', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] - ), - 'coef0': hyperparams.Union( - configuration=OrderedDict({ - 'float': hyperparams.Constant( - default=1, - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'], - ), - 'none': hyperparams.Constant( - default=None, - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'], - ) - }), - default='none', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] - ), - 'degree': hyperparams.Union( - configuration=OrderedDict({ - 'float': hyperparams.Constant( - default=1, - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'], - ), - 'none': hyperparams.Constant( - default=None, - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'], - ) - }), - default='none', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] - ) - }) - ), - 'exponential': hyperparams.Hyperparams.define( - configuration=OrderedDict({ - 'gamma': hyperparams.Union( - configuration=OrderedDict({ - 'float': hyperparams.Constant( - default=0.1, - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'], - ), - 'none': hyperparams.Constant( - default=None, - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'], - ) - }), - default='none', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] - ) - }) - ), - 'chi2': hyperparams.Hyperparams.define( - configuration=OrderedDict({ - 'gamma': hyperparams.Union( - configuration=OrderedDict({ - 'float': hyperparams.Constant( - default=0.1, - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'], - ), - 'none': hyperparams.Constant( - default=None, - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'], - ) - }), - default='none', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] - ) - }) - ), - 'sigmoid': hyperparams.Hyperparams.define( - configuration=OrderedDict({ - 'gamma': hyperparams.Union( - configuration=OrderedDict({ - 'float': hyperparams.Constant( - default=0.1, - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'], - ), - 'none': hyperparams.Constant( - default=None, - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'], - ) - }), - default='none', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] - ), - 'coef0': hyperparams.Union( - configuration=OrderedDict({ - 'float': hyperparams.Constant( - default=1, - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'], - ), - 'none': hyperparams.Constant( - default=None, - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'], - ) - }), - default='none', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] - ) - }) - ), - 'cosine': hyperparams.Hyperparams.define( - configuration=OrderedDict({}) - ), - 'poly': hyperparams.Hyperparams.define( - configuration=OrderedDict({}) - ), - 'linear': hyperparams.Hyperparams.define( - configuration=OrderedDict({}) - ), - 'additive_chi2': hyperparams.Hyperparams.define( - configuration=OrderedDict({}) - ) - }, - default='rbf', - description='Kernel map to be approximated. A callable should accept two arguments and the keyword arguments passed to this object as kernel_params, and should return a floating point number.', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] - ) - n_components = hyperparams.Bounded[int]( - default=100, - lower=0, - upper=None, - description='Number of features to construct. How many data points will be used to construct the mapping.', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] - ) - - use_columns = hyperparams.Set( - elements=hyperparams.Hyperparameter[int](-1), - default=(), - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="A set of column indices to force primitive to operate on. If any specified column cannot be parsed, it is skipped.", - ) - exclude_columns = hyperparams.Set( - elements=hyperparams.Hyperparameter[int](-1), - default=(), - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="A set of column indices to not operate on. Applicable only if \"use_columns\" is not provided.", - ) - return_result = hyperparams.Enumeration( - values=['append', 'replace', 'new'], - default='new', - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="Should parsed columns be appended, should they replace original columns, or should only parsed columns be returned? This hyperparam is ignored if use_semantic_types is set to false.", - ) - use_semantic_types = hyperparams.UniformBool( - default=False, - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="Controls whether semantic_types metadata will be used for filtering columns in input dataframe. Setting this to false makes the code ignore return_result and will produce only the output dataframe" - ) - add_index_columns = hyperparams.UniformBool( - default=False, - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="Also include primary index columns if input data has them. Applicable only if \"return_result\" is set to \"new\".", - ) - error_on_no_input = hyperparams.UniformBool( - default=True, - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="Throw an exception if no input column is selected/provided. Defaults to true to behave like sklearn. To prevent pipelines from breaking set this to False.", - ) - - return_semantic_type = hyperparams.Enumeration[str]( - values=['https://metadata.datadrivendiscovery.org/types/Attribute', 'https://metadata.datadrivendiscovery.org/types/ConstructedAttribute'], - default='https://metadata.datadrivendiscovery.org/types/Attribute', - description='Decides what semantic type to attach to generated attributes', - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'] - ) - -class SKNystroem(UnsupervisedLearnerPrimitiveBase[Inputs, Outputs, Params, Hyperparams]): - """ - Primitive wrapping for sklearn Nystroem - `sklearn documentation `_ - - """ - - __author__ = "JPL MARVIN" - metadata = metadata_base.PrimitiveMetadata({ - "algorithm_types": [metadata_base.PrimitiveAlgorithmType.KERNEL_METHOD, ], - "name": "sklearn.kernel_approximation.Nystroem", - "primitive_family": metadata_base.PrimitiveFamily.DATA_PREPROCESSING, - "python_path": "d3m.primitives.data_preprocessing.nystroem.SKlearn", - "source": {'name': 'JPL', 'contact': 'mailto:shah@jpl.nasa.gov', 'uris': ['https://gitlab.com/datadrivendiscovery/sklearn-wrap/issues', 'https://scikit-learn.org/stable/modules/generated/sklearn.kernel_approximation.Nystroem.html']}, - "version": "2019.11.13", - "id": "ca3a4357-a49f-31f0-82ed-244b66e29426", - "hyperparams_to_tune": ['kernel'], - 'installation': [ - {'type': metadata_base.PrimitiveInstallationType.PIP, - 'package_uri': 'git+https://gitlab.com/datadrivendiscovery/sklearn-wrap.git@{git_commit}#egg=sklearn_wrap'.format( - git_commit=utils.current_git_commit(os.path.dirname(__file__)), - ), - }] - }) - - def __init__(self, *, - hyperparams: Hyperparams, - random_seed: int = 0, - docker_containers: Dict[str, DockerContainer] = None) -> None: - - super().__init__(hyperparams=hyperparams, random_seed=random_seed, docker_containers=docker_containers) - - # False - self._clf = Nystroem( - kernel=self.hyperparams['kernel']['choice'], - degree=self.hyperparams['kernel'].get('degree', 'none'), - gamma=self.hyperparams['kernel'].get('gamma', 'none'), - coef0=self.hyperparams['kernel'].get('coef0', 'none'), - n_components=self.hyperparams['n_components'], - random_state=self.random_seed, - ) - - self._inputs = None - self._outputs = None - self._training_inputs = None - self._training_outputs = None - self._target_names = None - self._training_indices = None - self._target_column_indices = None - self._target_columns_metadata: List[OrderedDict] = None - self._input_column_names = None - self._fitted = False - - - def set_training_data(self, *, inputs: Inputs) -> None: - self._inputs = inputs - self._fitted = False - - def fit(self, *, timeout: float = None, iterations: int = None) -> CallResult[None]: - if self._fitted: - return CallResult(None) - - self._training_inputs, self._training_indices = self._get_columns_to_fit(self._inputs, self.hyperparams) - self._input_column_names = self._training_inputs.columns - - if self._training_inputs is None: - return CallResult(None) - - if len(self._training_indices) > 0: - self._clf.fit(self._training_inputs) - self._fitted = True - else: - if self.hyperparams['error_on_no_input']: - raise RuntimeError("No input columns were selected") - self.logger.warn("No input columns were selected") - return CallResult(None) - - def produce(self, *, inputs: Inputs, timeout: float = None, iterations: int = None) -> CallResult[Outputs]: - if not self._fitted: - raise PrimitiveNotFittedError("Primitive not fitted.") - sk_inputs = inputs - if self.hyperparams['use_semantic_types']: - sk_inputs = inputs.iloc[:, self._training_indices] - output_columns = [] - if len(self._training_indices) > 0: - sk_output = self._clf.transform(sk_inputs) - if sparse.issparse(sk_output): - sk_output = sk_output.toarray() - outputs = self._wrap_predictions(inputs, sk_output) - if len(outputs.columns) == len(self._input_column_names): - outputs.columns = self._input_column_names - output_columns = [outputs] - else: - if self.hyperparams['error_on_no_input']: - raise RuntimeError("No input columns were selected") - self.logger.warn("No input columns were selected") - outputs = base_utils.combine_columns(return_result=self.hyperparams['return_result'], - add_index_columns=self.hyperparams['add_index_columns'], - inputs=inputs, column_indices=self._training_indices, - columns_list=output_columns) - return CallResult(outputs) - - - def get_params(self) -> Params: - if not self._fitted: - return Params( - components_=None, - component_indices_=None, - normalization_=None, - input_column_names=self._input_column_names, - training_indices_=self._training_indices, - target_names_=self._target_names, - target_column_indices_=self._target_column_indices, - target_columns_metadata_=self._target_columns_metadata - ) - - return Params( - components_=getattr(self._clf, 'components_', None), - component_indices_=getattr(self._clf, 'component_indices_', None), - normalization_=getattr(self._clf, 'normalization_', None), - input_column_names=self._input_column_names, - training_indices_=self._training_indices, - target_names_=self._target_names, - target_column_indices_=self._target_column_indices, - target_columns_metadata_=self._target_columns_metadata - ) - - def set_params(self, *, params: Params) -> None: - self._clf.components_ = params['components_'] - self._clf.component_indices_ = params['component_indices_'] - self._clf.normalization_ = params['normalization_'] - self._input_column_names = params['input_column_names'] - self._training_indices = params['training_indices_'] - self._target_names = params['target_names_'] - self._target_column_indices = params['target_column_indices_'] - self._target_columns_metadata = params['target_columns_metadata_'] - - if params['components_'] is not None: - self._fitted = True - if params['component_indices_'] is not None: - self._fitted = True - if params['normalization_'] is not None: - self._fitted = True - - - - - - @classmethod - def _get_columns_to_fit(cls, inputs: Inputs, hyperparams: Hyperparams): - if not hyperparams['use_semantic_types']: - return inputs, list(range(len(inputs.columns))) - - inputs_metadata = inputs.metadata - - def can_produce_column(column_index: int) -> bool: - return cls._can_produce_column(inputs_metadata, column_index, hyperparams) - - columns_to_produce, columns_not_to_produce = base_utils.get_columns_to_use(inputs_metadata, - use_columns=hyperparams['use_columns'], - exclude_columns=hyperparams['exclude_columns'], - can_use_column=can_produce_column) - return inputs.iloc[:, columns_to_produce], columns_to_produce - # return columns_to_produce - - @classmethod - def _can_produce_column(cls, inputs_metadata: metadata_base.DataMetadata, column_index: int, hyperparams: Hyperparams) -> bool: - column_metadata = inputs_metadata.query((metadata_base.ALL_ELEMENTS, column_index)) - - accepted_structural_types = (int, float, numpy.integer, numpy.float64) - accepted_semantic_types = set() - accepted_semantic_types.add("https://metadata.datadrivendiscovery.org/types/Attribute") - if not issubclass(column_metadata['structural_type'], accepted_structural_types): - return False - - semantic_types = set(column_metadata.get('semantic_types', [])) - - if len(semantic_types) == 0: - cls.logger.warning("No semantic types found in column metadata") - return False - - # Making sure all accepted_semantic_types are available in semantic_types - if len(accepted_semantic_types - semantic_types) == 0: - return True - - return False - - - @classmethod - def _get_target_columns_metadata(cls, outputs_metadata: metadata_base.DataMetadata, hyperparams) -> List[OrderedDict]: - outputs_length = outputs_metadata.query((metadata_base.ALL_ELEMENTS,))['dimension']['length'] - - target_columns_metadata: List[OrderedDict] = [] - for column_index in range(outputs_length): - column_metadata = OrderedDict(outputs_metadata.query_column(column_index)) - - # Update semantic types and prepare it for predicted targets. - semantic_types = set(column_metadata.get('semantic_types', [])) - semantic_types_to_remove = set([]) - add_semantic_types = [] - add_semantic_types.add(hyperparams["return_semantic_type"]) - semantic_types = semantic_types - semantic_types_to_remove - semantic_types = semantic_types.union(add_semantic_types) - column_metadata['semantic_types'] = list(semantic_types) - - target_columns_metadata.append(column_metadata) - - return target_columns_metadata - - @classmethod - def _update_predictions_metadata(cls, inputs_metadata: metadata_base.DataMetadata, outputs: Optional[Outputs], - target_columns_metadata: List[OrderedDict]) -> metadata_base.DataMetadata: - outputs_metadata = metadata_base.DataMetadata().generate(value=outputs) - - for column_index, column_metadata in enumerate(target_columns_metadata): - column_metadata.pop("structural_type", None) - outputs_metadata = outputs_metadata.update_column(column_index, column_metadata) - - return outputs_metadata - - def _wrap_predictions(self, inputs: Inputs, predictions: ndarray) -> Outputs: - outputs = d3m_dataframe(predictions, generate_metadata=True) - target_columns_metadata = self._copy_inputs_metadata(inputs.metadata, self._training_indices, outputs.metadata, self.hyperparams) - outputs.metadata = self._update_predictions_metadata(inputs.metadata, outputs, target_columns_metadata) - return outputs - - - @classmethod - def _copy_inputs_metadata(cls, inputs_metadata: metadata_base.DataMetadata, input_indices: List[int], - outputs_metadata: metadata_base.DataMetadata, hyperparams): - outputs_length = outputs_metadata.query((metadata_base.ALL_ELEMENTS,))['dimension']['length'] - target_columns_metadata: List[OrderedDict] = [] - for column_index in input_indices: - column_name = inputs_metadata.query((metadata_base.ALL_ELEMENTS, column_index)).get("name") - if column_name is None: - column_name = "output_{}".format(column_index) - - column_metadata = OrderedDict(inputs_metadata.query_column(column_index)) - semantic_types = set(column_metadata.get('semantic_types', [])) - semantic_types_to_remove = set([]) - add_semantic_types = set() - add_semantic_types.add(hyperparams["return_semantic_type"]) - semantic_types = semantic_types - semantic_types_to_remove - semantic_types = semantic_types.union(add_semantic_types) - column_metadata['semantic_types'] = list(semantic_types) - - column_metadata["name"] = str(column_name) - target_columns_metadata.append(column_metadata) - - # If outputs has more columns than index, add Attribute Type to all remaining - if outputs_length > len(input_indices): - for column_index in range(len(input_indices), outputs_length): - column_metadata = OrderedDict() - semantic_types = set() - semantic_types.add(hyperparams["return_semantic_type"]) - column_name = "output_{}".format(column_index) - column_metadata["semantic_types"] = list(semantic_types) - column_metadata["name"] = str(column_name) - target_columns_metadata.append(column_metadata) - - return target_columns_metadata - - -SKNystroem.__doc__ = Nystroem.__doc__ \ No newline at end of file diff --git a/common-primitives/sklearn-wrap/sklearn_wrap/SKOneHotEncoder.py b/common-primitives/sklearn-wrap/sklearn_wrap/SKOneHotEncoder.py deleted file mode 100644 index 536c585..0000000 --- a/common-primitives/sklearn-wrap/sklearn_wrap/SKOneHotEncoder.py +++ /dev/null @@ -1,420 +0,0 @@ -from typing import Any, Callable, List, Dict, Union, Optional, Sequence, Tuple -from numpy import ndarray -from collections import OrderedDict -from scipy import sparse -import os -import sklearn -import numpy -import typing - -# Custom import commands if any -from sklearn.preprocessing.data import OneHotEncoder -from numpy import float as npfloat - - -from d3m.container.numpy import ndarray as d3m_ndarray -from d3m.container import DataFrame as d3m_dataframe -from d3m.metadata import hyperparams, params, base as metadata_base -from d3m import utils -from d3m.base import utils as base_utils -from d3m.exceptions import PrimitiveNotFittedError -from d3m.primitive_interfaces.base import CallResult, DockerContainer -from d3m.primitive_interfaces.unsupervised_learning import UnsupervisedLearnerPrimitiveBase - - -Inputs = d3m_dataframe -Outputs = d3m_dataframe - - -class Params(params.Params): - _active_features_: Optional[ndarray] - _categorical_features: Optional[Union[str, ndarray]] - _categories: Optional[Sequence[Any]] - _feature_indices_: Optional[ndarray] - _legacy_mode: Optional[bool] - _n_values_: Optional[ndarray] - _n_values: Optional[Union[str, ndarray]] - categories_: Optional[Sequence[Any]] - input_column_names: Optional[Any] - target_names_: Optional[Sequence[Any]] - training_indices_: Optional[Sequence[int]] - target_column_indices_: Optional[Sequence[int]] - target_columns_metadata_: Optional[List[OrderedDict]] - - - -class Hyperparams(hyperparams.Hyperparams): - n_values = hyperparams.Union( - configuration=OrderedDict({ - 'auto': hyperparams.Constant( - default='auto', - description='Determine value range from training data.', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'], - ), - 'int': hyperparams.Bounded[int]( - lower=0, - upper=None, - default=10, - description='Number of categorical values per feature. Each feature value should be in range(n_values).', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'], - ), - 'list': hyperparams.List( - default=[], - elements=hyperparams.Hyperparameter[int](1), - description='n_values[i] is the number of categorical values in X[:, i]. Each feature value should be in range(n_values[i]).', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'], - ) - }), - default='auto', - description='Number of values per feature. - \'auto\' : determine value range from training data. - int : number of categorical values per feature. Each feature value should be in ``range(n_values)`` - array : ``n_values[i]`` is the number of categorical values in ``X[:, i]``. Each feature value should be in ``range(n_values[i])``', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] - ) - sparse = hyperparams.UniformBool( - default=True, - description='Will return sparse matrix if set True else will return an array.', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] - ) - handle_unknown = hyperparams.Enumeration[str]( - values=['error', 'ignore'], - default='error', - description='Whether to raise an error or ignore if a unknown categorical feature is present during transform.', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] - ) - categories = hyperparams.Constant( - default='auto', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] - ) - - use_columns = hyperparams.Set( - elements=hyperparams.Hyperparameter[int](-1), - default=(), - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="A set of column indices to force primitive to operate on. If any specified column cannot be parsed, it is skipped.", - ) - exclude_columns = hyperparams.Set( - elements=hyperparams.Hyperparameter[int](-1), - default=(), - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="A set of column indices to not operate on. Applicable only if \"use_columns\" is not provided.", - ) - return_result = hyperparams.Enumeration( - values=['append', 'replace', 'new'], - default='new', - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="Should parsed columns be appended, should they replace original columns, or should only parsed columns be returned? This hyperparam is ignored if use_semantic_types is set to false.", - ) - use_semantic_types = hyperparams.UniformBool( - default=False, - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="Controls whether semantic_types metadata will be used for filtering columns in input dataframe. Setting this to false makes the code ignore return_result and will produce only the output dataframe" - ) - add_index_columns = hyperparams.UniformBool( - default=False, - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="Also include primary index columns if input data has them. Applicable only if \"return_result\" is set to \"new\".", - ) - error_on_no_input = hyperparams.UniformBool( - default=True, - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="Throw an exception if no input column is selected/provided. Defaults to true to behave like sklearn. To prevent pipelines from breaking set this to False.", - ) - encode_target_columns = hyperparams.UniformBool( - default=False, - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="Should it encode also target columns?", - ) - return_semantic_type = hyperparams.Enumeration[str]( - values=['https://metadata.datadrivendiscovery.org/types/Attribute', 'https://metadata.datadrivendiscovery.org/types/ConstructedAttribute'], - default='https://metadata.datadrivendiscovery.org/types/Attribute', - description='Decides what semantic type to attach to generated attributes', - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'] - ) - -class SKOneHotEncoder(UnsupervisedLearnerPrimitiveBase[Inputs, Outputs, Params, Hyperparams]): - """ - Primitive wrapping for sklearn OneHotEncoder - `sklearn documentation `_ - - """ - - __author__ = "JPL MARVIN" - metadata = metadata_base.PrimitiveMetadata({ - "algorithm_types": [metadata_base.PrimitiveAlgorithmType.ENCODE_ONE_HOT, ], - "name": "sklearn.preprocessing.data.OneHotEncoder", - "primitive_family": metadata_base.PrimitiveFamily.DATA_TRANSFORMATION, - "python_path": "d3m.primitives.data_transformation.one_hot_encoder.SKlearn", - "source": {'name': 'JPL', 'contact': 'mailto:shah@jpl.nasa.gov', 'uris': ['https://gitlab.com/datadrivendiscovery/sklearn-wrap/issues', 'https://scikit-learn.org/stable/modules/generated/sklearn.preprocessing.OneHotEncoder.html']}, - "version": "2019.11.13", - "id": "c977e879-1bf5-3829-b5b0-39b00233aff5", - 'installation': [ - {'type': metadata_base.PrimitiveInstallationType.PIP, - 'package_uri': 'git+https://gitlab.com/datadrivendiscovery/sklearn-wrap.git@{git_commit}#egg=sklearn_wrap'.format( - git_commit=utils.current_git_commit(os.path.dirname(__file__)), - ), - }] - }) - - def __init__(self, *, - hyperparams: Hyperparams, - random_seed: int = 0, - docker_containers: Dict[str, DockerContainer] = None) -> None: - - super().__init__(hyperparams=hyperparams, random_seed=random_seed, docker_containers=docker_containers) - - # False - self._clf = OneHotEncoder( - n_values=self.hyperparams['n_values'], - sparse=self.hyperparams['sparse'], - handle_unknown=self.hyperparams['handle_unknown'], - categories=self.hyperparams['categories'], - ) - - self._inputs = None - self._outputs = None - self._training_inputs = None - self._training_outputs = None - self._target_names = None - self._training_indices = None - self._target_column_indices = None - self._target_columns_metadata: List[OrderedDict] = None - self._input_column_names = None - self._fitted = False - - - def set_training_data(self, *, inputs: Inputs) -> None: - self._inputs = inputs - self._fitted = False - - def fit(self, *, timeout: float = None, iterations: int = None) -> CallResult[None]: - if self._fitted: - return CallResult(None) - - self._training_inputs, self._training_indices = self._get_columns_to_fit(self._inputs, self.hyperparams) - self._input_column_names = self._training_inputs.columns - - if self._training_inputs is None: - return CallResult(None) - - if len(self._training_indices) > 0: - self._clf.fit(self._training_inputs) - self._fitted = True - else: - if self.hyperparams['error_on_no_input']: - raise RuntimeError("No input columns were selected") - self.logger.warn("No input columns were selected") - return CallResult(None) - - def produce(self, *, inputs: Inputs, timeout: float = None, iterations: int = None) -> CallResult[Outputs]: - if not self._fitted: - raise PrimitiveNotFittedError("Primitive not fitted.") - sk_inputs = inputs - if self.hyperparams['use_semantic_types']: - sk_inputs = inputs.iloc[:, self._training_indices] - output_columns = [] - if len(self._training_indices) > 0: - sk_output = self._clf.transform(sk_inputs) - if sparse.issparse(sk_output): - sk_output = sk_output.toarray() - outputs = self._wrap_predictions(inputs, sk_output) - if len(outputs.columns) == len(self._input_column_names): - outputs.columns = self._input_column_names - output_columns = [outputs] - else: - if self.hyperparams['error_on_no_input']: - raise RuntimeError("No input columns were selected") - self.logger.warn("No input columns were selected") - outputs = base_utils.combine_columns(return_result=self.hyperparams['return_result'], - add_index_columns=self.hyperparams['add_index_columns'], - inputs=inputs, column_indices=self._training_indices, - columns_list=output_columns) - return CallResult(outputs) - - - def get_params(self) -> Params: - if not self._fitted: - return Params( - _active_features_=None, - _categorical_features=None, - _categories=None, - _feature_indices_=None, - _legacy_mode=None, - _n_values_=None, - _n_values=None, - categories_=None, - input_column_names=self._input_column_names, - training_indices_=self._training_indices, - target_names_=self._target_names, - target_column_indices_=self._target_column_indices, - target_columns_metadata_=self._target_columns_metadata - ) - - return Params( - _active_features_=getattr(self._clf, '_active_features_', None), - _categorical_features=getattr(self._clf, '_categorical_features', None), - _categories=getattr(self._clf, '_categories', None), - _feature_indices_=getattr(self._clf, '_feature_indices_', None), - _legacy_mode=getattr(self._clf, '_legacy_mode', None), - _n_values_=getattr(self._clf, '_n_values_', None), - _n_values=getattr(self._clf, '_n_values', None), - categories_=getattr(self._clf, 'categories_', None), - input_column_names=self._input_column_names, - training_indices_=self._training_indices, - target_names_=self._target_names, - target_column_indices_=self._target_column_indices, - target_columns_metadata_=self._target_columns_metadata - ) - - def set_params(self, *, params: Params) -> None: - self._clf._active_features_ = params['_active_features_'] - self._clf._categorical_features = params['_categorical_features'] - self._clf._categories = params['_categories'] - self._clf._feature_indices_ = params['_feature_indices_'] - self._clf._legacy_mode = params['_legacy_mode'] - self._clf._n_values_ = params['_n_values_'] - self._clf._n_values = params['_n_values'] - self._clf.categories_ = params['categories_'] - self._input_column_names = params['input_column_names'] - self._training_indices = params['training_indices_'] - self._target_names = params['target_names_'] - self._target_column_indices = params['target_column_indices_'] - self._target_columns_metadata = params['target_columns_metadata_'] - - if params['_active_features_'] is not None: - self._fitted = True - if params['_categorical_features'] is not None: - self._fitted = True - if params['_categories'] is not None: - self._fitted = True - if params['_feature_indices_'] is not None: - self._fitted = True - if params['_legacy_mode'] is not None: - self._fitted = True - if params['_n_values_'] is not None: - self._fitted = True - if params['_n_values'] is not None: - self._fitted = True - if params['categories_'] is not None: - self._fitted = True - - - - - - @classmethod - def _get_columns_to_fit(cls, inputs: Inputs, hyperparams: Hyperparams): - if not hyperparams['use_semantic_types']: - return inputs, list(range(len(inputs.columns))) - - inputs_metadata = inputs.metadata - - def can_produce_column(column_index: int) -> bool: - return cls._can_produce_column(inputs_metadata, column_index, hyperparams) - - columns_to_produce, columns_not_to_produce = base_utils.get_columns_to_use(inputs_metadata, - use_columns=hyperparams['use_columns'], - exclude_columns=hyperparams['exclude_columns'], - can_use_column=can_produce_column) - return inputs.iloc[:, columns_to_produce], columns_to_produce - # return columns_to_produce - - @classmethod - def _can_produce_column(cls, inputs_metadata: metadata_base.DataMetadata, column_index: int, hyperparams: Hyperparams) -> bool: - column_metadata = inputs_metadata.query((metadata_base.ALL_ELEMENTS, column_index)) - - accepted_structural_types = (int,float,numpy.integer,numpy.float64,str,) - accepted_semantic_types = set(["https://metadata.datadrivendiscovery.org/types/CategoricalData","https://metadata.datadrivendiscovery.org/types/Attribute",]) - accepted_semantic_types.add("https://metadata.datadrivendiscovery.org/types/Attribute") - if not issubclass(column_metadata['structural_type'], accepted_structural_types): - return False - - semantic_types = set(column_metadata.get('semantic_types', [])) - - if len(semantic_types) == 0: - cls.logger.warning("No semantic types found in column metadata") - return False - - if hyperparams['encode_target_columns'] and 'https://metadata.datadrivendiscovery.org/types/Target' in semantic_types: - return True - # Making sure all accepted_semantic_types are available in semantic_types - if len(accepted_semantic_types - semantic_types) == 0: - return True - - return False - - - @classmethod - def _get_target_columns_metadata(cls, outputs_metadata: metadata_base.DataMetadata, hyperparams) -> List[OrderedDict]: - outputs_length = outputs_metadata.query((metadata_base.ALL_ELEMENTS,))['dimension']['length'] - - target_columns_metadata: List[OrderedDict] = [] - for column_index in range(outputs_length): - column_metadata = OrderedDict(outputs_metadata.query_column(column_index)) - - # Update semantic types and prepare it for predicted targets. - semantic_types = set(column_metadata.get('semantic_types', [])) - semantic_types_to_remove = set([]) - add_semantic_types = [] - add_semantic_types.add(hyperparams["return_semantic_type"]) - semantic_types = semantic_types - semantic_types_to_remove - semantic_types = semantic_types.union(add_semantic_types) - column_metadata['semantic_types'] = list(semantic_types) - - target_columns_metadata.append(column_metadata) - - return target_columns_metadata - - @classmethod - def _update_predictions_metadata(cls, inputs_metadata: metadata_base.DataMetadata, outputs: Optional[Outputs], - target_columns_metadata: List[OrderedDict]) -> metadata_base.DataMetadata: - outputs_metadata = metadata_base.DataMetadata().generate(value=outputs) - - for column_index, column_metadata in enumerate(target_columns_metadata): - column_metadata.pop("structural_type", None) - outputs_metadata = outputs_metadata.update_column(column_index, column_metadata) - - return outputs_metadata - - def _wrap_predictions(self, inputs: Inputs, predictions: ndarray) -> Outputs: - outputs = d3m_dataframe(predictions, generate_metadata=True) - target_columns_metadata = self._copy_inputs_metadata(inputs.metadata, self._training_indices, outputs.metadata, self.hyperparams) - outputs.metadata = self._update_predictions_metadata(inputs.metadata, outputs, target_columns_metadata) - return outputs - - - @classmethod - def _copy_inputs_metadata(cls, inputs_metadata: metadata_base.DataMetadata, input_indices: List[int], - outputs_metadata: metadata_base.DataMetadata, hyperparams): - outputs_length = outputs_metadata.query((metadata_base.ALL_ELEMENTS,))['dimension']['length'] - target_columns_metadata: List[OrderedDict] = [] - for column_index in input_indices: - column_name = inputs_metadata.query((metadata_base.ALL_ELEMENTS, column_index)).get("name") - if column_name is None: - column_name = "output_{}".format(column_index) - - column_metadata = OrderedDict(inputs_metadata.query_column(column_index)) - semantic_types = set(column_metadata.get('semantic_types', [])) - semantic_types_to_remove = set([]) - add_semantic_types = set() - add_semantic_types.add(hyperparams["return_semantic_type"]) - semantic_types = semantic_types - semantic_types_to_remove - semantic_types = semantic_types.union(add_semantic_types) - column_metadata['semantic_types'] = list(semantic_types) - - column_metadata["name"] = str(column_name) - target_columns_metadata.append(column_metadata) - - # If outputs has more columns than index, add Attribute Type to all remaining - if outputs_length > len(input_indices): - for column_index in range(len(input_indices), outputs_length): - column_metadata = OrderedDict() - semantic_types = set() - semantic_types.add(hyperparams["return_semantic_type"]) - column_name = "output_{}".format(column_index) - column_metadata["semantic_types"] = list(semantic_types) - column_metadata["name"] = str(column_name) - target_columns_metadata.append(column_metadata) - - return target_columns_metadata - - -SKOneHotEncoder.__doc__ = OneHotEncoder.__doc__ \ No newline at end of file diff --git a/common-primitives/sklearn-wrap/sklearn_wrap/SKOrdinalEncoder.py b/common-primitives/sklearn-wrap/sklearn_wrap/SKOrdinalEncoder.py deleted file mode 100644 index 7396073..0000000 --- a/common-primitives/sklearn-wrap/sklearn_wrap/SKOrdinalEncoder.py +++ /dev/null @@ -1,343 +0,0 @@ -from typing import Any, Callable, List, Dict, Union, Optional, Sequence, Tuple -from numpy import ndarray -from collections import OrderedDict -from scipy import sparse -import os -import sklearn -import numpy -import typing - -# Custom import commands if any -from sklearn.preprocessing._encoders import OrdinalEncoder - - -from d3m.container.numpy import ndarray as d3m_ndarray -from d3m.container import DataFrame as d3m_dataframe -from d3m.metadata import hyperparams, params, base as metadata_base -from d3m import utils -from d3m.base import utils as base_utils -from d3m.exceptions import PrimitiveNotFittedError -from d3m.primitive_interfaces.base import CallResult, DockerContainer -from d3m.primitive_interfaces.unsupervised_learning import UnsupervisedLearnerPrimitiveBase - - -Inputs = d3m_dataframe -Outputs = d3m_dataframe - - -class Params(params.Params): - categories_: Optional[Optional[Sequence[Any]]] - _categories: Optional[str] - input_column_names: Optional[Any] - target_names_: Optional[Sequence[Any]] - training_indices_: Optional[Sequence[int]] - target_column_indices_: Optional[Sequence[int]] - target_columns_metadata_: Optional[List[OrderedDict]] - - - -class Hyperparams(hyperparams.Hyperparams): - categories = hyperparams.Constant( - default='auto', - description='Categories (unique values) per feature: - \'auto\' : Determine categories automatically from the training data. - list : ``categories[i]`` holds the categories expected in the ith column. The passed categories should not mix strings and numeric values, and should be sorted in case of numeric values. The used categories can be found in the ``categories_`` attribute.', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] - ) - - use_columns = hyperparams.Set( - elements=hyperparams.Hyperparameter[int](-1), - default=(), - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="A set of column indices to force primitive to operate on. If any specified column cannot be parsed, it is skipped.", - ) - exclude_columns = hyperparams.Set( - elements=hyperparams.Hyperparameter[int](-1), - default=(), - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="A set of column indices to not operate on. Applicable only if \"use_columns\" is not provided.", - ) - return_result = hyperparams.Enumeration( - values=['append', 'replace', 'new'], - default='new', - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="Should parsed columns be appended, should they replace original columns, or should only parsed columns be returned? This hyperparam is ignored if use_semantic_types is set to false.", - ) - use_semantic_types = hyperparams.UniformBool( - default=False, - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="Controls whether semantic_types metadata will be used for filtering columns in input dataframe. Setting this to false makes the code ignore return_result and will produce only the output dataframe" - ) - add_index_columns = hyperparams.UniformBool( - default=False, - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="Also include primary index columns if input data has them. Applicable only if \"return_result\" is set to \"new\".", - ) - error_on_no_input = hyperparams.UniformBool( - default=True, - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="Throw an exception if no input column is selected/provided. Defaults to true to behave like sklearn. To prevent pipelines from breaking set this to False.", - ) - - return_semantic_type = hyperparams.Enumeration[str]( - values=['https://metadata.datadrivendiscovery.org/types/Attribute', 'https://metadata.datadrivendiscovery.org/types/ConstructedAttribute'], - default='https://metadata.datadrivendiscovery.org/types/Attribute', - description='Decides what semantic type to attach to generated attributes', - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'] - ) - -class SKOrdinalEncoder(UnsupervisedLearnerPrimitiveBase[Inputs, Outputs, Params, Hyperparams]): - """ - Primitive wrapping for sklearn OrdinalEncoder - `sklearn documentation `_ - - """ - - __author__ = "JPL MARVIN" - metadata = metadata_base.PrimitiveMetadata({ - "algorithm_types": [metadata_base.PrimitiveAlgorithmType.CATEGORY_ENCODER, ], - "name": "sklearn.preprocessing._encoders.OrdinalEncoder", - "primitive_family": metadata_base.PrimitiveFamily.DATA_TRANSFORMATION, - "python_path": "d3m.primitives.data_transformation.ordinal_encoder.SKlearn", - "source": {'name': 'JPL', 'contact': 'mailto:shah@jpl.nasa.gov', 'uris': ['https://gitlab.com/datadrivendiscovery/sklearn-wrap/issues', 'https://scikit-learn.org/stable/modules/generated/sklearn.preprocessing.OrdinalEncoder.html']}, - "version": "2019.11.13", - "id": "a048aaa7-4475-3834-b739-de3105ec7217", - 'installation': [ - {'type': metadata_base.PrimitiveInstallationType.PIP, - 'package_uri': 'git+https://gitlab.com/datadrivendiscovery/sklearn-wrap.git@{git_commit}#egg=sklearn_wrap'.format( - git_commit=utils.current_git_commit(os.path.dirname(__file__)), - ), - }] - }) - - def __init__(self, *, - hyperparams: Hyperparams, - random_seed: int = 0, - docker_containers: Dict[str, DockerContainer] = None) -> None: - - super().__init__(hyperparams=hyperparams, random_seed=random_seed, docker_containers=docker_containers) - - # False - self._clf = OrdinalEncoder( - categories=self.hyperparams['categories'], - ) - - self._inputs = None - self._outputs = None - self._training_inputs = None - self._training_outputs = None - self._target_names = None - self._training_indices = None - self._target_column_indices = None - self._target_columns_metadata: List[OrderedDict] = None - self._input_column_names = None - self._fitted = False - - - def set_training_data(self, *, inputs: Inputs) -> None: - self._inputs = inputs - self._fitted = False - - def fit(self, *, timeout: float = None, iterations: int = None) -> CallResult[None]: - if self._fitted: - return CallResult(None) - - self._training_inputs, self._training_indices = self._get_columns_to_fit(self._inputs, self.hyperparams) - self._input_column_names = self._training_inputs.columns - - if self._training_inputs is None: - return CallResult(None) - - if len(self._training_indices) > 0: - self._clf.fit(self._training_inputs) - self._fitted = True - else: - if self.hyperparams['error_on_no_input']: - raise RuntimeError("No input columns were selected") - self.logger.warn("No input columns were selected") - return CallResult(None) - - def produce(self, *, inputs: Inputs, timeout: float = None, iterations: int = None) -> CallResult[Outputs]: - if not self._fitted: - raise PrimitiveNotFittedError("Primitive not fitted.") - sk_inputs = inputs - if self.hyperparams['use_semantic_types']: - sk_inputs = inputs.iloc[:, self._training_indices] - output_columns = [] - if len(self._training_indices) > 0: - sk_output = self._clf.transform(sk_inputs) - if sparse.issparse(sk_output): - sk_output = sk_output.toarray() - outputs = self._wrap_predictions(inputs, sk_output) - if len(outputs.columns) == len(self._input_column_names): - outputs.columns = self._input_column_names - output_columns = [outputs] - else: - if self.hyperparams['error_on_no_input']: - raise RuntimeError("No input columns were selected") - self.logger.warn("No input columns were selected") - outputs = base_utils.combine_columns(return_result=self.hyperparams['return_result'], - add_index_columns=self.hyperparams['add_index_columns'], - inputs=inputs, column_indices=self._training_indices, - columns_list=output_columns) - return CallResult(outputs) - - - def get_params(self) -> Params: - if not self._fitted: - return Params( - categories_=None, - _categories=None, - input_column_names=self._input_column_names, - training_indices_=self._training_indices, - target_names_=self._target_names, - target_column_indices_=self._target_column_indices, - target_columns_metadata_=self._target_columns_metadata - ) - - return Params( - categories_=getattr(self._clf, 'categories_', None), - _categories=getattr(self._clf, '_categories', None), - input_column_names=self._input_column_names, - training_indices_=self._training_indices, - target_names_=self._target_names, - target_column_indices_=self._target_column_indices, - target_columns_metadata_=self._target_columns_metadata - ) - - def set_params(self, *, params: Params) -> None: - self._clf.categories_ = params['categories_'] - self._clf._categories = params['_categories'] - self._input_column_names = params['input_column_names'] - self._training_indices = params['training_indices_'] - self._target_names = params['target_names_'] - self._target_column_indices = params['target_column_indices_'] - self._target_columns_metadata = params['target_columns_metadata_'] - - if params['categories_'] is not None: - self._fitted = True - if params['_categories'] is not None: - self._fitted = True - - - - - - @classmethod - def _get_columns_to_fit(cls, inputs: Inputs, hyperparams: Hyperparams): - if not hyperparams['use_semantic_types']: - return inputs, list(range(len(inputs.columns))) - - inputs_metadata = inputs.metadata - - def can_produce_column(column_index: int) -> bool: - return cls._can_produce_column(inputs_metadata, column_index, hyperparams) - - columns_to_produce, columns_not_to_produce = base_utils.get_columns_to_use(inputs_metadata, - use_columns=hyperparams['use_columns'], - exclude_columns=hyperparams['exclude_columns'], - can_use_column=can_produce_column) - return inputs.iloc[:, columns_to_produce], columns_to_produce - # return columns_to_produce - - @classmethod - def _can_produce_column(cls, inputs_metadata: metadata_base.DataMetadata, column_index: int, hyperparams: Hyperparams) -> bool: - column_metadata = inputs_metadata.query((metadata_base.ALL_ELEMENTS, column_index)) - - accepted_structural_types = (int,float,numpy.integer,numpy.float64,str,) - accepted_semantic_types = set(["https://metadata.datadrivendiscovery.org/types/CategoricalData",]) - not_accepted_semantic_types = set(["https://metadata.datadrivendiscovery.org/types/Target",]) - - if not issubclass(column_metadata['structural_type'], accepted_structural_types): - return False - - semantic_types = set(column_metadata.get('semantic_types', [])) - - if len(semantic_types) == 0: - cls.logger.warning("No semantic types found in column metadata") - return False - - if len(not_accepted_semantic_types.intersection(semantic_types)) > 0: - return False - - # Making sure at least one accepted_semantic_types are available in semantic_types - if len(accepted_semantic_types.intersection(semantic_types)) > 0: - return True - - return False - - - @classmethod - def _get_target_columns_metadata(cls, outputs_metadata: metadata_base.DataMetadata, hyperparams) -> List[OrderedDict]: - outputs_length = outputs_metadata.query((metadata_base.ALL_ELEMENTS,))['dimension']['length'] - - target_columns_metadata: List[OrderedDict] = [] - for column_index in range(outputs_length): - column_metadata = OrderedDict(outputs_metadata.query_column(column_index)) - - # Update semantic types and prepare it for predicted targets. - semantic_types = set(column_metadata.get('semantic_types', [])) - semantic_types_to_remove = set([]) - add_semantic_types = [] - add_semantic_types.add(hyperparams["return_semantic_type"]) - semantic_types = semantic_types - semantic_types_to_remove - semantic_types = semantic_types.union(add_semantic_types) - column_metadata['semantic_types'] = list(semantic_types) - - target_columns_metadata.append(column_metadata) - - return target_columns_metadata - - @classmethod - def _update_predictions_metadata(cls, inputs_metadata: metadata_base.DataMetadata, outputs: Optional[Outputs], - target_columns_metadata: List[OrderedDict]) -> metadata_base.DataMetadata: - outputs_metadata = metadata_base.DataMetadata().generate(value=outputs) - - for column_index, column_metadata in enumerate(target_columns_metadata): - column_metadata.pop("structural_type", None) - outputs_metadata = outputs_metadata.update_column(column_index, column_metadata) - - return outputs_metadata - - def _wrap_predictions(self, inputs: Inputs, predictions: ndarray) -> Outputs: - outputs = d3m_dataframe(predictions, generate_metadata=True) - target_columns_metadata = self._copy_inputs_metadata(inputs.metadata, self._training_indices, outputs.metadata, self.hyperparams) - outputs.metadata = self._update_predictions_metadata(inputs.metadata, outputs, target_columns_metadata) - return outputs - - - @classmethod - def _copy_inputs_metadata(cls, inputs_metadata: metadata_base.DataMetadata, input_indices: List[int], - outputs_metadata: metadata_base.DataMetadata, hyperparams): - outputs_length = outputs_metadata.query((metadata_base.ALL_ELEMENTS,))['dimension']['length'] - target_columns_metadata: List[OrderedDict] = [] - for column_index in input_indices: - column_name = inputs_metadata.query((metadata_base.ALL_ELEMENTS, column_index)).get("name") - if column_name is None: - column_name = "output_{}".format(column_index) - - column_metadata = OrderedDict(inputs_metadata.query_column(column_index)) - semantic_types = set(column_metadata.get('semantic_types', [])) - semantic_types_to_remove = set([]) - add_semantic_types = set() - add_semantic_types.add(hyperparams["return_semantic_type"]) - semantic_types = semantic_types - semantic_types_to_remove - semantic_types = semantic_types.union(add_semantic_types) - column_metadata['semantic_types'] = list(semantic_types) - - column_metadata["name"] = str(column_name) - target_columns_metadata.append(column_metadata) - - # If outputs has more columns than index, add Attribute Type to all remaining - if outputs_length > len(input_indices): - for column_index in range(len(input_indices), outputs_length): - column_metadata = OrderedDict() - semantic_types = set() - semantic_types.add(hyperparams["return_semantic_type"]) - column_name = "output_{}".format(column_index) - column_metadata["semantic_types"] = list(semantic_types) - column_metadata["name"] = str(column_name) - target_columns_metadata.append(column_metadata) - - return target_columns_metadata - - -SKOrdinalEncoder.__doc__ = OrdinalEncoder.__doc__ \ No newline at end of file diff --git a/common-primitives/sklearn-wrap/sklearn_wrap/SKPCA.py b/common-primitives/sklearn-wrap/sklearn_wrap/SKPCA.py deleted file mode 100644 index a8c7973..0000000 --- a/common-primitives/sklearn-wrap/sklearn_wrap/SKPCA.py +++ /dev/null @@ -1,468 +0,0 @@ -from typing import Any, Callable, List, Dict, Union, Optional, Sequence, Tuple -from numpy import ndarray -from collections import OrderedDict -from scipy import sparse -import os -import sklearn -import numpy -import typing - -# Custom import commands if any -from sklearn.decomposition.pca import PCA -import sys - - -from d3m.container.numpy import ndarray as d3m_ndarray -from d3m.container import DataFrame as d3m_dataframe -from d3m.metadata import hyperparams, params, base as metadata_base -from d3m import utils -from d3m.base import utils as base_utils -from d3m.exceptions import PrimitiveNotFittedError -from d3m.primitive_interfaces.base import CallResult, DockerContainer -from d3m.primitive_interfaces.unsupervised_learning import UnsupervisedLearnerPrimitiveBase - - -Inputs = d3m_dataframe -Outputs = d3m_dataframe - - -class Params(params.Params): - components_: Optional[ndarray] - explained_variance_: Optional[ndarray] - explained_variance_ratio_: Optional[ndarray] - mean_: Optional[ndarray] - n_components_: Optional[int] - noise_variance_: Optional[float] - n_features_: Optional[int] - n_samples_: Optional[int] - singular_values_: Optional[ndarray] - _fit_svd_solver: Optional[str] - input_column_names: Optional[Any] - target_names_: Optional[Sequence[Any]] - training_indices_: Optional[Sequence[int]] - target_column_indices_: Optional[Sequence[int]] - target_columns_metadata_: Optional[List[OrderedDict]] - - - -class Hyperparams(hyperparams.Hyperparams): - n_components = hyperparams.Union( - configuration=OrderedDict({ - 'int': hyperparams.Bounded[int]( - lower=0, - upper=None, - default=0, - description='Number of components to keep.', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'], - ), - 'float': hyperparams.Uniform( - lower=0, - upper=1, - default=0.5, - description='Selects the number of components such that the amount of variance that needs to be explained is greater than the percentage specified by n_components.', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'], - ), - 'mle': hyperparams.Constant( - default='mle', - description='If svd_solver == \'full\', Minka\'s MLE is used to guess the dimension.', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'], - ), - 'none': hyperparams.Constant( - default=None, - description='All components are kept, n_components == min(n_samples, n_features).', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'], - ) - }), - default='none', - description='Number of components to keep. if n_components is not set all components are kept:: n_components == min(n_samples, n_features) if n_components == \'mle\' and svd_solver == \'full\', Minka\'s MLE is used to guess the dimension if ``0 < n_components < 1`` and svd_solver == \'full\', select the number of components such that the amount of variance that needs to be explained is greater than the percentage specified by n_components n_components cannot be equal to n_features for svd_solver == \'arpack\'.', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] - ) - whiten = hyperparams.UniformBool( - default=False, - description='When True (False by default) the `components_` vectors are multiplied by the square root of n_samples and then divided by the singular values to ensure uncorrelated outputs with unit component-wise variances. Whitening will remove some information from the transformed signal (the relative variance scales of the components) but can sometime improve the predictive accuracy of the downstream estimators by making their data respect some hard-wired assumptions.', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] - ) - svd_solver = hyperparams.Choice( - choices={ - 'auto': hyperparams.Hyperparams.define( - configuration=OrderedDict({}) - ), - 'full': hyperparams.Hyperparams.define( - configuration=OrderedDict({}) - ), - 'arpack': hyperparams.Hyperparams.define( - configuration=OrderedDict({ - 'tol': hyperparams.Bounded[float]( - default=0.0, - lower=0.0, - upper=None, - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] - ) - }) - ), - 'randomized': hyperparams.Hyperparams.define( - configuration=OrderedDict({ - 'iterated_power': hyperparams.Union( - configuration=OrderedDict({ - 'int': hyperparams.Bounded[int]( - lower=0, - upper=None, - default=0, - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'], - ), - 'auto': hyperparams.Constant( - default='auto', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'], - ) - }), - default='auto', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] - ) - }) - ) - }, - default='auto', - description='auto : the solver is selected by a default policy based on `X.shape` and `n_components`: if the input data is larger than 500x500 and the number of components to extract is lower than 80% of the smallest dimension of the data, then the more efficient \'randomized\' method is enabled. Otherwise the exact full SVD is computed and optionally truncated afterwards. full : run exact full SVD calling the standard LAPACK solver via `scipy.linalg.svd` and select the components by postprocessing arpack : run SVD truncated to n_components calling ARPACK solver via `scipy.sparse.linalg.svds`. It requires strictly 0 < n_components < X.shape[1] randomized : run randomized SVD by the method of Halko et al. .. versionadded:: 0.18.0', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] - ) - - use_columns = hyperparams.Set( - elements=hyperparams.Hyperparameter[int](-1), - default=(), - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="A set of column indices to force primitive to operate on. If any specified column cannot be parsed, it is skipped.", - ) - exclude_columns = hyperparams.Set( - elements=hyperparams.Hyperparameter[int](-1), - default=(), - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="A set of column indices to not operate on. Applicable only if \"use_columns\" is not provided.", - ) - return_result = hyperparams.Enumeration( - values=['append', 'replace', 'new'], - default='new', - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="Should parsed columns be appended, should they replace original columns, or should only parsed columns be returned? This hyperparam is ignored if use_semantic_types is set to false.", - ) - use_semantic_types = hyperparams.UniformBool( - default=False, - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="Controls whether semantic_types metadata will be used for filtering columns in input dataframe. Setting this to false makes the code ignore return_result and will produce only the output dataframe" - ) - add_index_columns = hyperparams.UniformBool( - default=False, - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="Also include primary index columns if input data has them. Applicable only if \"return_result\" is set to \"new\".", - ) - error_on_no_input = hyperparams.UniformBool( - default=True, - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="Throw an exception if no input column is selected/provided. Defaults to true to behave like sklearn. To prevent pipelines from breaking set this to False.", - ) - - return_semantic_type = hyperparams.Enumeration[str]( - values=['https://metadata.datadrivendiscovery.org/types/Attribute', 'https://metadata.datadrivendiscovery.org/types/ConstructedAttribute'], - default='https://metadata.datadrivendiscovery.org/types/Attribute', - description='Decides what semantic type to attach to generated attributes', - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'] - ) - -class SKPCA(UnsupervisedLearnerPrimitiveBase[Inputs, Outputs, Params, Hyperparams]): - """ - Primitive wrapping for sklearn PCA - `sklearn documentation `_ - - """ - - __author__ = "JPL MARVIN" - metadata = metadata_base.PrimitiveMetadata({ - "algorithm_types": [metadata_base.PrimitiveAlgorithmType.PRINCIPAL_COMPONENT_ANALYSIS, ], - "name": "sklearn.decomposition.pca.PCA", - "primitive_family": metadata_base.PrimitiveFamily.FEATURE_EXTRACTION, - "python_path": "d3m.primitives.feature_extraction.pca.SKlearn", - "source": {'name': 'JPL', 'contact': 'mailto:shah@jpl.nasa.gov', 'uris': ['https://gitlab.com/datadrivendiscovery/sklearn-wrap/issues', 'https://scikit-learn.org/stable/modules/generated/sklearn.decomposition.PCA.html']}, - "version": "2019.11.13", - "id": "2fb28cd1-5de6-3663-a2dc-09c786fba7f4", - "hyperparams_to_tune": ['n_components', 'svd_solver'], - 'installation': [ - {'type': metadata_base.PrimitiveInstallationType.PIP, - 'package_uri': 'git+https://gitlab.com/datadrivendiscovery/sklearn-wrap.git@{git_commit}#egg=sklearn_wrap'.format( - git_commit=utils.current_git_commit(os.path.dirname(__file__)), - ), - }] - }) - - def __init__(self, *, - hyperparams: Hyperparams, - random_seed: int = 0, - docker_containers: Dict[str, DockerContainer] = None) -> None: - - super().__init__(hyperparams=hyperparams, random_seed=random_seed, docker_containers=docker_containers) - - # False - self._clf = PCA( - n_components=self.hyperparams['n_components'], - whiten=self.hyperparams['whiten'], - svd_solver=self.hyperparams['svd_solver']['choice'], - tol=self.hyperparams['svd_solver'].get('tol', 0.0), - iterated_power=self.hyperparams['svd_solver'].get('iterated_power', 'auto'), - random_state=self.random_seed, - ) - - self._inputs = None - self._outputs = None - self._training_inputs = None - self._training_outputs = None - self._target_names = None - self._training_indices = None - self._target_column_indices = None - self._target_columns_metadata: List[OrderedDict] = None - self._input_column_names = None - self._fitted = False - - - def set_training_data(self, *, inputs: Inputs) -> None: - self._inputs = inputs - self._fitted = False - - def fit(self, *, timeout: float = None, iterations: int = None) -> CallResult[None]: - if self._fitted: - return CallResult(None) - - self._training_inputs, self._training_indices = self._get_columns_to_fit(self._inputs, self.hyperparams) - self._input_column_names = self._training_inputs.columns - - if self._training_inputs is None: - return CallResult(None) - - if len(self._training_indices) > 0: - self._clf.fit(self._training_inputs) - self._fitted = True - else: - if self.hyperparams['error_on_no_input']: - raise RuntimeError("No input columns were selected") - self.logger.warn("No input columns were selected") - return CallResult(None) - - def produce(self, *, inputs: Inputs, timeout: float = None, iterations: int = None) -> CallResult[Outputs]: - if not self._fitted: - raise PrimitiveNotFittedError("Primitive not fitted.") - sk_inputs = inputs - if self.hyperparams['use_semantic_types']: - sk_inputs = inputs.iloc[:, self._training_indices] - output_columns = [] - if len(self._training_indices) > 0: - sk_output = self._clf.transform(sk_inputs) - if sparse.issparse(sk_output): - sk_output = sk_output.toarray() - outputs = self._wrap_predictions(inputs, sk_output) - if len(outputs.columns) == len(self._input_column_names): - outputs.columns = self._input_column_names - output_columns = [outputs] - else: - if self.hyperparams['error_on_no_input']: - raise RuntimeError("No input columns were selected") - self.logger.warn("No input columns were selected") - outputs = base_utils.combine_columns(return_result=self.hyperparams['return_result'], - add_index_columns=self.hyperparams['add_index_columns'], - inputs=inputs, column_indices=self._training_indices, - columns_list=output_columns) - return CallResult(outputs) - - - def get_params(self) -> Params: - if not self._fitted: - return Params( - components_=None, - explained_variance_=None, - explained_variance_ratio_=None, - mean_=None, - n_components_=None, - noise_variance_=None, - n_features_=None, - n_samples_=None, - singular_values_=None, - _fit_svd_solver=None, - input_column_names=self._input_column_names, - training_indices_=self._training_indices, - target_names_=self._target_names, - target_column_indices_=self._target_column_indices, - target_columns_metadata_=self._target_columns_metadata - ) - - return Params( - components_=getattr(self._clf, 'components_', None), - explained_variance_=getattr(self._clf, 'explained_variance_', None), - explained_variance_ratio_=getattr(self._clf, 'explained_variance_ratio_', None), - mean_=getattr(self._clf, 'mean_', None), - n_components_=getattr(self._clf, 'n_components_', None), - noise_variance_=getattr(self._clf, 'noise_variance_', None), - n_features_=getattr(self._clf, 'n_features_', None), - n_samples_=getattr(self._clf, 'n_samples_', None), - singular_values_=getattr(self._clf, 'singular_values_', None), - _fit_svd_solver=getattr(self._clf, '_fit_svd_solver', None), - input_column_names=self._input_column_names, - training_indices_=self._training_indices, - target_names_=self._target_names, - target_column_indices_=self._target_column_indices, - target_columns_metadata_=self._target_columns_metadata - ) - - def set_params(self, *, params: Params) -> None: - self._clf.components_ = params['components_'] - self._clf.explained_variance_ = params['explained_variance_'] - self._clf.explained_variance_ratio_ = params['explained_variance_ratio_'] - self._clf.mean_ = params['mean_'] - self._clf.n_components_ = params['n_components_'] - self._clf.noise_variance_ = params['noise_variance_'] - self._clf.n_features_ = params['n_features_'] - self._clf.n_samples_ = params['n_samples_'] - self._clf.singular_values_ = params['singular_values_'] - self._clf._fit_svd_solver = params['_fit_svd_solver'] - self._input_column_names = params['input_column_names'] - self._training_indices = params['training_indices_'] - self._target_names = params['target_names_'] - self._target_column_indices = params['target_column_indices_'] - self._target_columns_metadata = params['target_columns_metadata_'] - - if params['components_'] is not None: - self._fitted = True - if params['explained_variance_'] is not None: - self._fitted = True - if params['explained_variance_ratio_'] is not None: - self._fitted = True - if params['mean_'] is not None: - self._fitted = True - if params['n_components_'] is not None: - self._fitted = True - if params['noise_variance_'] is not None: - self._fitted = True - if params['n_features_'] is not None: - self._fitted = True - if params['n_samples_'] is not None: - self._fitted = True - if params['singular_values_'] is not None: - self._fitted = True - if params['_fit_svd_solver'] is not None: - self._fitted = True - - - - - - @classmethod - def _get_columns_to_fit(cls, inputs: Inputs, hyperparams: Hyperparams): - if not hyperparams['use_semantic_types']: - return inputs, list(range(len(inputs.columns))) - - inputs_metadata = inputs.metadata - - def can_produce_column(column_index: int) -> bool: - return cls._can_produce_column(inputs_metadata, column_index, hyperparams) - - columns_to_produce, columns_not_to_produce = base_utils.get_columns_to_use(inputs_metadata, - use_columns=hyperparams['use_columns'], - exclude_columns=hyperparams['exclude_columns'], - can_use_column=can_produce_column) - return inputs.iloc[:, columns_to_produce], columns_to_produce - # return columns_to_produce - - @classmethod - def _can_produce_column(cls, inputs_metadata: metadata_base.DataMetadata, column_index: int, hyperparams: Hyperparams) -> bool: - column_metadata = inputs_metadata.query((metadata_base.ALL_ELEMENTS, column_index)) - - accepted_structural_types = (int, float, numpy.integer, numpy.float64) - accepted_semantic_types = set() - accepted_semantic_types.add("https://metadata.datadrivendiscovery.org/types/Attribute") - if not issubclass(column_metadata['structural_type'], accepted_structural_types): - return False - - semantic_types = set(column_metadata.get('semantic_types', [])) - - if len(semantic_types) == 0: - cls.logger.warning("No semantic types found in column metadata") - return False - - # Making sure all accepted_semantic_types are available in semantic_types - if len(accepted_semantic_types - semantic_types) == 0: - return True - - return False - - - @classmethod - def _get_target_columns_metadata(cls, outputs_metadata: metadata_base.DataMetadata, hyperparams) -> List[OrderedDict]: - outputs_length = outputs_metadata.query((metadata_base.ALL_ELEMENTS,))['dimension']['length'] - - target_columns_metadata: List[OrderedDict] = [] - for column_index in range(outputs_length): - column_metadata = OrderedDict(outputs_metadata.query_column(column_index)) - - # Update semantic types and prepare it for predicted targets. - semantic_types = set(column_metadata.get('semantic_types', [])) - semantic_types_to_remove = set([]) - add_semantic_types = [] - add_semantic_types.add(hyperparams["return_semantic_type"]) - semantic_types = semantic_types - semantic_types_to_remove - semantic_types = semantic_types.union(add_semantic_types) - column_metadata['semantic_types'] = list(semantic_types) - - target_columns_metadata.append(column_metadata) - - return target_columns_metadata - - @classmethod - def _update_predictions_metadata(cls, inputs_metadata: metadata_base.DataMetadata, outputs: Optional[Outputs], - target_columns_metadata: List[OrderedDict]) -> metadata_base.DataMetadata: - outputs_metadata = metadata_base.DataMetadata().generate(value=outputs) - - for column_index, column_metadata in enumerate(target_columns_metadata): - column_metadata.pop("structural_type", None) - outputs_metadata = outputs_metadata.update_column(column_index, column_metadata) - - return outputs_metadata - - def _wrap_predictions(self, inputs: Inputs, predictions: ndarray) -> Outputs: - outputs = d3m_dataframe(predictions, generate_metadata=True) - target_columns_metadata = self._copy_inputs_metadata(inputs.metadata, self._training_indices, outputs.metadata, self.hyperparams) - outputs.metadata = self._update_predictions_metadata(inputs.metadata, outputs, target_columns_metadata) - return outputs - - - @classmethod - def _copy_inputs_metadata(cls, inputs_metadata: metadata_base.DataMetadata, input_indices: List[int], - outputs_metadata: metadata_base.DataMetadata, hyperparams): - outputs_length = outputs_metadata.query((metadata_base.ALL_ELEMENTS,))['dimension']['length'] - target_columns_metadata: List[OrderedDict] = [] - for column_index in input_indices: - column_name = inputs_metadata.query((metadata_base.ALL_ELEMENTS, column_index)).get("name") - if column_name is None: - column_name = "output_{}".format(column_index) - - column_metadata = OrderedDict(inputs_metadata.query_column(column_index)) - semantic_types = set(column_metadata.get('semantic_types', [])) - semantic_types_to_remove = set([]) - add_semantic_types = set() - add_semantic_types.add(hyperparams["return_semantic_type"]) - semantic_types = semantic_types - semantic_types_to_remove - semantic_types = semantic_types.union(add_semantic_types) - column_metadata['semantic_types'] = list(semantic_types) - - column_metadata["name"] = str(column_name) - target_columns_metadata.append(column_metadata) - - # If outputs has more columns than index, add Attribute Type to all remaining - if outputs_length > len(input_indices): - for column_index in range(len(input_indices), outputs_length): - column_metadata = OrderedDict() - semantic_types = set() - semantic_types.add(hyperparams["return_semantic_type"]) - column_name = "output_{}".format(column_index) - column_metadata["semantic_types"] = list(semantic_types) - column_metadata["name"] = str(column_name) - target_columns_metadata.append(column_metadata) - - return target_columns_metadata - - -SKPCA.__doc__ = PCA.__doc__ \ No newline at end of file diff --git a/common-primitives/sklearn-wrap/sklearn_wrap/SKPassiveAggressiveClassifier.py b/common-primitives/sklearn-wrap/sklearn_wrap/SKPassiveAggressiveClassifier.py deleted file mode 100644 index 9a4cfa9..0000000 --- a/common-primitives/sklearn-wrap/sklearn_wrap/SKPassiveAggressiveClassifier.py +++ /dev/null @@ -1,648 +0,0 @@ -from typing import Any, Callable, List, Dict, Union, Optional, Sequence, Tuple -from numpy import ndarray -from collections import OrderedDict -from scipy import sparse -import os -import sklearn -import numpy -import typing - -# Custom import commands if any -from sklearn.linear_model.passive_aggressive import PassiveAggressiveClassifier - - -from d3m.container.numpy import ndarray as d3m_ndarray -from d3m.container import DataFrame as d3m_dataframe -from d3m.metadata import hyperparams, params, base as metadata_base -from d3m import utils -from d3m.base import utils as base_utils -from d3m.exceptions import PrimitiveNotFittedError -from d3m.primitive_interfaces.base import CallResult, DockerContainer - -from d3m.primitive_interfaces.supervised_learning import SupervisedLearnerPrimitiveBase -from d3m.primitive_interfaces.base import ProbabilisticCompositionalityMixin, ContinueFitMixin -from d3m import exceptions -import pandas - - - -Inputs = d3m_dataframe -Outputs = d3m_dataframe - - -class Params(params.Params): - coef_: Optional[ndarray] - intercept_: Optional[ndarray] - classes_: Optional[ndarray] - _expanded_class_weight: Optional[ndarray] - alpha: Optional[float] - epsilon: Optional[float] - eta0: Optional[float] - l1_ratio: Optional[float] - learning_rate: Optional[str] - loss_function_: Optional[object] - n_iter_: Optional[int] - penalty: Optional[str] - power_t: Optional[float] - t_: Optional[float] - average_coef_: Optional[ndarray] - average_intercept_: Optional[ndarray] - standard_coef_: Optional[ndarray] - standard_intercept_: Optional[ndarray] - input_column_names: Optional[Any] - target_names_: Optional[Sequence[Any]] - training_indices_: Optional[Sequence[int]] - target_column_indices_: Optional[Sequence[int]] - target_columns_metadata_: Optional[List[OrderedDict]] - - - -class Hyperparams(hyperparams.Hyperparams): - C = hyperparams.Bounded[float]( - default=1, - lower=0, - upper=None, - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] - ) - fit_intercept = hyperparams.UniformBool( - default=False, - description='Whether the intercept should be estimated or not. If False, the data is assumed to be already centered.', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] - ) - max_iter = hyperparams.Union( - configuration=OrderedDict({ - 'int': hyperparams.Bounded[int]( - lower=0, - upper=None, - default=1000, - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'], - ), - 'none': hyperparams.Constant( - default=None, - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'], - ) - }), - default='int', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] - ) - shuffle = hyperparams.UniformBool( - default=True, - description='Whether or not the training data should be shuffled after each epoch.', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] - ) - tol = hyperparams.Union( - configuration=OrderedDict({ - 'float': hyperparams.Bounded[float]( - default=0.001, - lower=0, - upper=None, - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'], - ), - 'none': hyperparams.Constant( - default=None, - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'], - ) - }), - default='float', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] - ) - n_jobs = hyperparams.Union( - configuration=OrderedDict({ - 'limit': hyperparams.Bounded[int]( - default=1, - lower=1, - upper=None, - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'], - ), - 'all_cores': hyperparams.Constant( - default=-1, - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'], - ) - }), - default='limit', - description='The number of CPUs to use to do the OVA (One Versus All, for multi-class problems) computation. -1 means \'all CPUs\'. Defaults to 1.', - semantic_types=['https://metadata.datadrivendiscovery.org/types/ResourcesUseParameter'] - ) - loss = hyperparams.Enumeration[str]( - values=['hinge', 'squared_hinge'], - default='hinge', - description='The loss function to be used: hinge: equivalent to PA-I in the reference paper. squared_hinge: equivalent to PA-II in the reference paper.', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] - ) - warm_start = hyperparams.UniformBool( - default=False, - description='When set to True, reuse the solution of the previous call to fit as initialization, otherwise, just erase the previous solution.', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] - ) - class_weight = hyperparams.Union( - configuration=OrderedDict({ - 'str': hyperparams.Constant( - default='balanced', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'], - ), - 'none': hyperparams.Constant( - default=None, - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'], - ) - }), - default='none', - description='Preset for the class_weight fit parameter. Weights associated with classes. If not given, all classes are supposed to have weight one. The "balanced" mode uses the values of y to automatically adjust weights inversely proportional to class frequencies in the input data as ``n_samples / (n_classes * np.bincount(y))`` .. versionadded:: 0.17 parameter *class_weight* to automatically weight samples.', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] - ) - average = hyperparams.Union( - configuration=OrderedDict({ - 'int': hyperparams.Bounded[int]( - lower=2, - upper=None, - default=10, - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'], - ), - 'bool': hyperparams.UniformBool( - default=False, - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'], - ) - }), - default='bool', - description='When set to True, computes the averaged SGD weights and stores the result in the coef_ attribute. If set to an int greater than 1, averaging will begin once the total number of samples seen reaches average. So average=10 will begin averaging after seeing 10 samples. New in version 0.19: parameter average to use weights averaging in SGD', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] - ) - early_stopping = hyperparams.UniformBool( - default=False, - description='Whether to use early stopping to terminate training when validation score is not improving. If set to True, it will automatically set asid a fraction of training data as validation and terminate training whe validation score is not improving by at least tol fo n_iter_no_change consecutive epochs.', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] - ) - validation_fraction = hyperparams.Bounded[float]( - default=0.1, - lower=0, - upper=1, - description='The proportion of training data to set aside as validation set for early stopping. Must be between 0 and 1. Only used if early_stopping is True.', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] - ) - n_iter_no_change = hyperparams.Bounded[int]( - default=5, - lower=0, - upper=None, - description='Number of iterations with no improvement to wait before early stopping.', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] - ) - - use_inputs_columns = hyperparams.Set( - elements=hyperparams.Hyperparameter[int](-1), - default=(), - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="A set of column indices to force primitive to use as training input. If any specified column cannot be parsed, it is skipped.", - ) - use_outputs_columns = hyperparams.Set( - elements=hyperparams.Hyperparameter[int](-1), - default=(), - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="A set of column indices to force primitive to use as training target. If any specified column cannot be parsed, it is skipped.", - ) - exclude_inputs_columns = hyperparams.Set( - elements=hyperparams.Hyperparameter[int](-1), - default=(), - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="A set of column indices to not use as training inputs. Applicable only if \"use_columns\" is not provided.", - ) - exclude_outputs_columns = hyperparams.Set( - elements=hyperparams.Hyperparameter[int](-1), - default=(), - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="A set of column indices to not use as training target. Applicable only if \"use_columns\" is not provided.", - ) - return_result = hyperparams.Enumeration( - values=['append', 'replace', 'new'], - default='new', - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="Should parsed columns be appended, should they replace original columns, or should only parsed columns be returned? This hyperparam is ignored if use_semantic_types is set to false.", - ) - use_semantic_types = hyperparams.UniformBool( - default=False, - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="Controls whether semantic_types metadata will be used for filtering columns in input dataframe. Setting this to false makes the code ignore return_result and will produce only the output dataframe" - ) - add_index_columns = hyperparams.UniformBool( - default=False, - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="Also include primary index columns if input data has them. Applicable only if \"return_result\" is set to \"new\".", - ) - error_on_no_input = hyperparams.UniformBool( - default=True, - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="Throw an exception if no input column is selected/provided. Defaults to true to behave like sklearn. To prevent pipelines from breaking set this to False.", - ) - - return_semantic_type = hyperparams.Enumeration[str]( - values=['https://metadata.datadrivendiscovery.org/types/Attribute', 'https://metadata.datadrivendiscovery.org/types/ConstructedAttribute', 'https://metadata.datadrivendiscovery.org/types/PredictedTarget'], - default='https://metadata.datadrivendiscovery.org/types/PredictedTarget', - description='Decides what semantic type to attach to generated output', - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'] - ) - -class SKPassiveAggressiveClassifier(SupervisedLearnerPrimitiveBase[Inputs, Outputs, Params, Hyperparams], - ContinueFitMixin[Inputs, Outputs, Params, Hyperparams]): - """ - Primitive wrapping for sklearn PassiveAggressiveClassifier - `sklearn documentation `_ - - """ - - __author__ = "JPL MARVIN" - metadata = metadata_base.PrimitiveMetadata({ - "algorithm_types": [metadata_base.PrimitiveAlgorithmType.PASSIVE_AGGRESSIVE, ], - "name": "sklearn.linear_model.passive_aggressive.PassiveAggressiveClassifier", - "primitive_family": metadata_base.PrimitiveFamily.CLASSIFICATION, - "python_path": "d3m.primitives.classification.passive_aggressive.SKlearn", - "source": {'name': 'JPL', 'contact': 'mailto:shah@jpl.nasa.gov', 'uris': ['https://gitlab.com/datadrivendiscovery/sklearn-wrap/issues', 'https://scikit-learn.org/stable/modules/generated/sklearn.linear_model.PassiveAggressiveClassifier.html']}, - "version": "2019.11.13", - "id": "85e5c88d-9eec-3452-8f2f-414f17d3e4d5", - "hyperparams_to_tune": ['C'], - 'installation': [ - {'type': metadata_base.PrimitiveInstallationType.PIP, - 'package_uri': 'git+https://gitlab.com/datadrivendiscovery/sklearn-wrap.git@{git_commit}#egg=sklearn_wrap'.format( - git_commit=utils.current_git_commit(os.path.dirname(__file__)), - ), - }] - }) - - def __init__(self, *, - hyperparams: Hyperparams, - random_seed: int = 0, - docker_containers: Dict[str, DockerContainer] = None, - _verbose: int = 0) -> None: - - super().__init__(hyperparams=hyperparams, random_seed=random_seed, docker_containers=docker_containers) - - # False - self._clf = PassiveAggressiveClassifier( - C=self.hyperparams['C'], - fit_intercept=self.hyperparams['fit_intercept'], - max_iter=self.hyperparams['max_iter'], - shuffle=self.hyperparams['shuffle'], - tol=self.hyperparams['tol'], - n_jobs=self.hyperparams['n_jobs'], - loss=self.hyperparams['loss'], - warm_start=self.hyperparams['warm_start'], - class_weight=self.hyperparams['class_weight'], - average=self.hyperparams['average'], - early_stopping=self.hyperparams['early_stopping'], - validation_fraction=self.hyperparams['validation_fraction'], - n_iter_no_change=self.hyperparams['n_iter_no_change'], - verbose=_verbose, - random_state=self.random_seed, - ) - - self._inputs = None - self._outputs = None - self._training_inputs = None - self._training_outputs = None - self._target_names = None - self._training_indices = None - self._target_column_indices = None - self._target_columns_metadata: List[OrderedDict] = None - self._input_column_names = None - self._fitted = False - self._new_training_data = False - - def set_training_data(self, *, inputs: Inputs, outputs: Outputs) -> None: - self._inputs = inputs - self._outputs = outputs - self._fitted = False - self._new_training_data = True - - def fit(self, *, timeout: float = None, iterations: int = None) -> CallResult[None]: - if self._inputs is None or self._outputs is None: - raise ValueError("Missing training data.") - - if not self._new_training_data: - return CallResult(None) - self._new_training_data = False - - self._training_inputs, self._training_indices = self._get_columns_to_fit(self._inputs, self.hyperparams) - self._training_outputs, self._target_names, self._target_column_indices = self._get_targets(self._outputs, self.hyperparams) - self._input_column_names = self._training_inputs.columns - - if len(self._training_indices) > 0 and len(self._target_column_indices) > 0: - self._target_columns_metadata = self._get_target_columns_metadata(self._training_outputs.metadata, self.hyperparams) - sk_training_output = self._training_outputs.values - - shape = sk_training_output.shape - if len(shape) == 2 and shape[1] == 1: - sk_training_output = numpy.ravel(sk_training_output) - - self._clf.fit(self._training_inputs, sk_training_output) - self._fitted = True - else: - if self.hyperparams['error_on_no_input']: - raise RuntimeError("No input columns were selected") - self.logger.warn("No input columns were selected") - - return CallResult(None) - - def continue_fit(self, *, timeout: float = None, iterations: int = None) -> CallResult[None]: - if self._training_inputs is None or self._training_outputs is None: - raise ValueError("Missing training data.") - - if not self._new_training_data: - return CallResult(None) - self._new_training_data = False - - if len(self._training_indices) > 0 and len(self._target_column_indices) > 0: - self._target_columns_metadata = self._get_target_columns_metadata(self._training_outputs.metadata, self.hyperparams) - sk_training_output = self._training_outputs.values - - shape = sk_training_output.shape - if len(shape) == 2 and shape[1] == 1: - sk_training_output = numpy.ravel(sk_training_output) - - self._clf.partial_fit(self._training_inputs, sk_training_output) - self._fitted = True - else: - if self.hyperparams['error_on_no_input']: - raise RuntimeError("No input columns were selected") - self.logger.warn("No input columns were selected") - - return CallResult(None) - - - def produce(self, *, inputs: Inputs, timeout: float = None, iterations: int = None) -> CallResult[Outputs]: - sk_inputs, columns_to_use = self._get_columns_to_fit(inputs, self.hyperparams) - output = [] - if len(sk_inputs.columns): - try: - sk_output = self._clf.predict(sk_inputs) - except sklearn.exceptions.NotFittedError as error: - raise PrimitiveNotFittedError("Primitive not fitted.") from error - # For primitives that allow predicting without fitting like GaussianProcessRegressor - if not self._fitted: - raise PrimitiveNotFittedError("Primitive not fitted.") - if sparse.issparse(sk_output): - sk_output = sk_output.toarray() - output = self._wrap_predictions(inputs, sk_output) - output.columns = self._target_names - output = [output] - else: - if self.hyperparams['error_on_no_input']: - raise RuntimeError("No input columns were selected") - self.logger.warn("No input columns were selected") - outputs = base_utils.combine_columns(return_result=self.hyperparams['return_result'], - add_index_columns=self.hyperparams['add_index_columns'], - inputs=inputs, column_indices=self._target_column_indices, - columns_list=output) - - return CallResult(outputs) - - - def get_params(self) -> Params: - if not self._fitted: - return Params( - coef_=None, - intercept_=None, - classes_=None, - _expanded_class_weight=None, - alpha=None, - epsilon=None, - eta0=None, - l1_ratio=None, - learning_rate=None, - loss_function_=None, - n_iter_=None, - penalty=None, - power_t=None, - t_=None, - average_coef_=None, - average_intercept_=None, - standard_coef_=None, - standard_intercept_=None, - input_column_names=self._input_column_names, - training_indices_=self._training_indices, - target_names_=self._target_names, - target_column_indices_=self._target_column_indices, - target_columns_metadata_=self._target_columns_metadata - ) - - return Params( - coef_=getattr(self._clf, 'coef_', None), - intercept_=getattr(self._clf, 'intercept_', None), - classes_=getattr(self._clf, 'classes_', None), - _expanded_class_weight=getattr(self._clf, '_expanded_class_weight', None), - alpha=getattr(self._clf, 'alpha', None), - epsilon=getattr(self._clf, 'epsilon', None), - eta0=getattr(self._clf, 'eta0', None), - l1_ratio=getattr(self._clf, 'l1_ratio', None), - learning_rate=getattr(self._clf, 'learning_rate', None), - loss_function_=getattr(self._clf, 'loss_function_', None), - n_iter_=getattr(self._clf, 'n_iter_', None), - penalty=getattr(self._clf, 'penalty', None), - power_t=getattr(self._clf, 'power_t', None), - t_=getattr(self._clf, 't_', None), - average_coef_=getattr(self._clf, 'average_coef_', None), - average_intercept_=getattr(self._clf, 'average_intercept_', None), - standard_coef_=getattr(self._clf, 'standard_coef_', None), - standard_intercept_=getattr(self._clf, 'standard_intercept_', None), - input_column_names=self._input_column_names, - training_indices_=self._training_indices, - target_names_=self._target_names, - target_column_indices_=self._target_column_indices, - target_columns_metadata_=self._target_columns_metadata - ) - - def set_params(self, *, params: Params) -> None: - self._clf.coef_ = params['coef_'] - self._clf.intercept_ = params['intercept_'] - self._clf.classes_ = params['classes_'] - self._clf._expanded_class_weight = params['_expanded_class_weight'] - self._clf.alpha = params['alpha'] - self._clf.epsilon = params['epsilon'] - self._clf.eta0 = params['eta0'] - self._clf.l1_ratio = params['l1_ratio'] - self._clf.learning_rate = params['learning_rate'] - self._clf.loss_function_ = params['loss_function_'] - self._clf.n_iter_ = params['n_iter_'] - self._clf.penalty = params['penalty'] - self._clf.power_t = params['power_t'] - self._clf.t_ = params['t_'] - self._clf.average_coef_ = params['average_coef_'] - self._clf.average_intercept_ = params['average_intercept_'] - self._clf.standard_coef_ = params['standard_coef_'] - self._clf.standard_intercept_ = params['standard_intercept_'] - self._input_column_names = params['input_column_names'] - self._training_indices = params['training_indices_'] - self._target_names = params['target_names_'] - self._target_column_indices = params['target_column_indices_'] - self._target_columns_metadata = params['target_columns_metadata_'] - - if params['coef_'] is not None: - self._fitted = True - if params['intercept_'] is not None: - self._fitted = True - if params['classes_'] is not None: - self._fitted = True - if params['_expanded_class_weight'] is not None: - self._fitted = True - if params['alpha'] is not None: - self._fitted = True - if params['epsilon'] is not None: - self._fitted = True - if params['eta0'] is not None: - self._fitted = True - if params['l1_ratio'] is not None: - self._fitted = True - if params['learning_rate'] is not None: - self._fitted = True - if params['loss_function_'] is not None: - self._fitted = True - if params['n_iter_'] is not None: - self._fitted = True - if params['penalty'] is not None: - self._fitted = True - if params['power_t'] is not None: - self._fitted = True - if params['t_'] is not None: - self._fitted = True - if params['average_coef_'] is not None: - self._fitted = True - if params['average_intercept_'] is not None: - self._fitted = True - if params['standard_coef_'] is not None: - self._fitted = True - if params['standard_intercept_'] is not None: - self._fitted = True - - - - - - - - @classmethod - def _get_columns_to_fit(cls, inputs: Inputs, hyperparams: Hyperparams): - if not hyperparams['use_semantic_types']: - return inputs, list(range(len(inputs.columns))) - - inputs_metadata = inputs.metadata - - def can_produce_column(column_index: int) -> bool: - return cls._can_produce_column(inputs_metadata, column_index, hyperparams) - - columns_to_produce, columns_not_to_produce = base_utils.get_columns_to_use(inputs_metadata, - use_columns=hyperparams['use_inputs_columns'], - exclude_columns=hyperparams['exclude_inputs_columns'], - can_use_column=can_produce_column) - return inputs.iloc[:, columns_to_produce], columns_to_produce - # return columns_to_produce - - @classmethod - def _can_produce_column(cls, inputs_metadata: metadata_base.DataMetadata, column_index: int, hyperparams: Hyperparams) -> bool: - column_metadata = inputs_metadata.query((metadata_base.ALL_ELEMENTS, column_index)) - - accepted_structural_types = (int, float, numpy.integer, numpy.float64) - accepted_semantic_types = set() - accepted_semantic_types.add("https://metadata.datadrivendiscovery.org/types/Attribute") - if not issubclass(column_metadata['structural_type'], accepted_structural_types): - return False - - semantic_types = set(column_metadata.get('semantic_types', [])) - - if len(semantic_types) == 0: - cls.logger.warning("No semantic types found in column metadata") - return False - # Making sure all accepted_semantic_types are available in semantic_types - if len(accepted_semantic_types - semantic_types) == 0: - return True - - return False - - @classmethod - def _get_targets(cls, data: d3m_dataframe, hyperparams: Hyperparams): - if not hyperparams['use_semantic_types']: - return data, list(data.columns), list(range(len(data.columns))) - - metadata = data.metadata - - def can_produce_column(column_index: int) -> bool: - accepted_semantic_types = set() - accepted_semantic_types.add("https://metadata.datadrivendiscovery.org/types/TrueTarget") - column_metadata = metadata.query((metadata_base.ALL_ELEMENTS, column_index)) - semantic_types = set(column_metadata.get('semantic_types', [])) - if len(semantic_types) == 0: - cls.logger.warning("No semantic types found in column metadata") - return False - # Making sure all accepted_semantic_types are available in semantic_types - if len(accepted_semantic_types - semantic_types) == 0: - return True - return False - - target_column_indices, target_columns_not_to_produce = base_utils.get_columns_to_use(metadata, - use_columns=hyperparams[ - 'use_outputs_columns'], - exclude_columns= - hyperparams[ - 'exclude_outputs_columns'], - can_use_column=can_produce_column) - targets = [] - if target_column_indices: - targets = data.select_columns(target_column_indices) - target_column_names = [] - for idx in target_column_indices: - target_column_names.append(data.columns[idx]) - return targets, target_column_names, target_column_indices - - @classmethod - def _get_target_columns_metadata(cls, outputs_metadata: metadata_base.DataMetadata, hyperparams) -> List[OrderedDict]: - outputs_length = outputs_metadata.query((metadata_base.ALL_ELEMENTS,))['dimension']['length'] - - target_columns_metadata: List[OrderedDict] = [] - for column_index in range(outputs_length): - column_metadata = OrderedDict(outputs_metadata.query_column(column_index)) - - # Update semantic types and prepare it for predicted targets. - semantic_types = set(column_metadata.get('semantic_types', [])) - semantic_types_to_remove = set(["https://metadata.datadrivendiscovery.org/types/TrueTarget","https://metadata.datadrivendiscovery.org/types/SuggestedTarget",]) - add_semantic_types = set(["https://metadata.datadrivendiscovery.org/types/PredictedTarget",]) - add_semantic_types.add(hyperparams["return_semantic_type"]) - semantic_types = semantic_types - semantic_types_to_remove - semantic_types = semantic_types.union(add_semantic_types) - column_metadata['semantic_types'] = list(semantic_types) - - target_columns_metadata.append(column_metadata) - - return target_columns_metadata - - @classmethod - def _update_predictions_metadata(cls, inputs_metadata: metadata_base.DataMetadata, outputs: Optional[Outputs], - target_columns_metadata: List[OrderedDict]) -> metadata_base.DataMetadata: - outputs_metadata = metadata_base.DataMetadata().generate(value=outputs) - - for column_index, column_metadata in enumerate(target_columns_metadata): - column_metadata.pop("structural_type", None) - outputs_metadata = outputs_metadata.update_column(column_index, column_metadata) - - return outputs_metadata - - def _wrap_predictions(self, inputs: Inputs, predictions: ndarray) -> Outputs: - outputs = d3m_dataframe(predictions, generate_metadata=False) - outputs.metadata = self._update_predictions_metadata(inputs.metadata, outputs, self._target_columns_metadata) - return outputs - - - @classmethod - def _add_target_columns_metadata(cls, outputs_metadata: metadata_base.DataMetadata): - outputs_length = outputs_metadata.query((metadata_base.ALL_ELEMENTS,))['dimension']['length'] - - target_columns_metadata: List[OrderedDict] = [] - for column_index in range(outputs_length): - column_metadata = OrderedDict() - semantic_types = [] - semantic_types.append('https://metadata.datadrivendiscovery.org/types/PredictedTarget') - column_name = outputs_metadata.query((metadata_base.ALL_ELEMENTS, column_index)).get("name") - if column_name is None: - column_name = "output_{}".format(column_index) - column_metadata["semantic_types"] = semantic_types - column_metadata["name"] = str(column_name) - target_columns_metadata.append(column_metadata) - - return target_columns_metadata - - -SKPassiveAggressiveClassifier.__doc__ = PassiveAggressiveClassifier.__doc__ \ No newline at end of file diff --git a/common-primitives/sklearn-wrap/sklearn_wrap/SKPassiveAggressiveRegressor.py b/common-primitives/sklearn-wrap/sklearn_wrap/SKPassiveAggressiveRegressor.py deleted file mode 100644 index 900de99..0000000 --- a/common-primitives/sklearn-wrap/sklearn_wrap/SKPassiveAggressiveRegressor.py +++ /dev/null @@ -1,583 +0,0 @@ -from typing import Any, Callable, List, Dict, Union, Optional, Sequence, Tuple -from numpy import ndarray -from collections import OrderedDict -from scipy import sparse -import os -import sklearn -import numpy -import typing - -# Custom import commands if any -from sklearn.linear_model.passive_aggressive import PassiveAggressiveRegressor - - -from d3m.container.numpy import ndarray as d3m_ndarray -from d3m.container import DataFrame as d3m_dataframe -from d3m.metadata import hyperparams, params, base as metadata_base -from d3m import utils -from d3m.base import utils as base_utils -from d3m.exceptions import PrimitiveNotFittedError -from d3m.primitive_interfaces.base import CallResult, DockerContainer - -from d3m.primitive_interfaces.supervised_learning import SupervisedLearnerPrimitiveBase -from d3m.primitive_interfaces.base import ProbabilisticCompositionalityMixin, ContinueFitMixin -from d3m import exceptions -import pandas - - - -Inputs = d3m_dataframe -Outputs = d3m_dataframe - - -class Params(params.Params): - coef_: Optional[ndarray] - intercept_: Optional[ndarray] - t_: Optional[float] - alpha: Optional[float] - eta0: Optional[float] - l1_ratio: Optional[int] - learning_rate: Optional[str] - n_iter_: Optional[int] - penalty: Optional[float] - power_t: Optional[float] - average_coef_: Optional[ndarray] - average_intercept_: Optional[ndarray] - standard_coef_: Optional[ndarray] - standard_intercept_: Optional[ndarray] - input_column_names: Optional[Any] - target_names_: Optional[Sequence[Any]] - training_indices_: Optional[Sequence[int]] - target_column_indices_: Optional[Sequence[int]] - target_columns_metadata_: Optional[List[OrderedDict]] - - - -class Hyperparams(hyperparams.Hyperparams): - C = hyperparams.Hyperparameter[float]( - default=1, - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'], - ) - fit_intercept = hyperparams.UniformBool( - default=True, - description='Whether the intercept should be estimated or not. If False, the data is assumed to be already centered. Defaults to True.', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] - ) - max_iter = hyperparams.Hyperparameter[int]( - default=1000, - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] - ) - shuffle = hyperparams.UniformBool( - default=True, - description='Whether or not the training data should be shuffled after each epoch.', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] - ) - tol = hyperparams.Union( - configuration=OrderedDict({ - 'float': hyperparams.Bounded[float]( - lower=0, - upper=None, - default=0.001, - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'], - ), - 'none': hyperparams.Constant( - default=None, - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'], - ) - }), - default='float', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] - ) - loss = hyperparams.Enumeration[str]( - values=['epsilon_insensitive', 'squared_epsilon_insensitive'], - default='epsilon_insensitive', - description='The loss function to be used: epsilon_insensitive: equivalent to PA-I in the reference paper. squared_epsilon_insensitive: equivalent to PA-II in the reference paper.', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] - ) - warm_start = hyperparams.UniformBool( - default=False, - description='When set to True, reuse the solution of the previous call to fit as initialization, otherwise, just erase the previous solution.', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] - ) - average = hyperparams.Union( - configuration=OrderedDict({ - 'int': hyperparams.Bounded[int]( - default=2, - lower=2, - upper=None, - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'], - ), - 'bool': hyperparams.UniformBool( - default=False, - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'], - ) - }), - default='bool', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] - ) - epsilon = hyperparams.Bounded[float]( - lower=0, - upper=None, - default=0.1, - description='If the difference between the current prediction and the correct label is below this threshold, the model is not updated.', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] - ) - early_stopping = hyperparams.UniformBool( - default=False, - description='Whether to use early stopping to terminate training when validation score is not improving. If set to True, it will automatically set asid a fraction of training data as validation and terminate training whe validation score is not improving by at least tol fo n_iter_no_change consecutive epochs.', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] - ) - validation_fraction = hyperparams.Bounded[float]( - default=0.1, - lower=0, - upper=1, - description='The proportion of training data to set aside as validation set for early stopping. Must be between 0 and 1. Only used if early_stopping is True.', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] - ) - n_iter_no_change = hyperparams.Bounded[int]( - default=5, - lower=0, - upper=None, - description='Number of iterations with no improvement to wait before early stopping.', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] - ) - - use_inputs_columns = hyperparams.Set( - elements=hyperparams.Hyperparameter[int](-1), - default=(), - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="A set of column indices to force primitive to use as training input. If any specified column cannot be parsed, it is skipped.", - ) - use_outputs_columns = hyperparams.Set( - elements=hyperparams.Hyperparameter[int](-1), - default=(), - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="A set of column indices to force primitive to use as training target. If any specified column cannot be parsed, it is skipped.", - ) - exclude_inputs_columns = hyperparams.Set( - elements=hyperparams.Hyperparameter[int](-1), - default=(), - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="A set of column indices to not use as training inputs. Applicable only if \"use_columns\" is not provided.", - ) - exclude_outputs_columns = hyperparams.Set( - elements=hyperparams.Hyperparameter[int](-1), - default=(), - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="A set of column indices to not use as training target. Applicable only if \"use_columns\" is not provided.", - ) - return_result = hyperparams.Enumeration( - values=['append', 'replace', 'new'], - default='new', - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="Should parsed columns be appended, should they replace original columns, or should only parsed columns be returned? This hyperparam is ignored if use_semantic_types is set to false.", - ) - use_semantic_types = hyperparams.UniformBool( - default=False, - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="Controls whether semantic_types metadata will be used for filtering columns in input dataframe. Setting this to false makes the code ignore return_result and will produce only the output dataframe" - ) - add_index_columns = hyperparams.UniformBool( - default=False, - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="Also include primary index columns if input data has them. Applicable only if \"return_result\" is set to \"new\".", - ) - error_on_no_input = hyperparams.UniformBool( - default=True, - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="Throw an exception if no input column is selected/provided. Defaults to true to behave like sklearn. To prevent pipelines from breaking set this to False.", - ) - - return_semantic_type = hyperparams.Enumeration[str]( - values=['https://metadata.datadrivendiscovery.org/types/Attribute', 'https://metadata.datadrivendiscovery.org/types/ConstructedAttribute', 'https://metadata.datadrivendiscovery.org/types/PredictedTarget'], - default='https://metadata.datadrivendiscovery.org/types/PredictedTarget', - description='Decides what semantic type to attach to generated output', - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'] - ) - -class SKPassiveAggressiveRegressor(SupervisedLearnerPrimitiveBase[Inputs, Outputs, Params, Hyperparams], - ContinueFitMixin[Inputs, Outputs, Params, Hyperparams]): - """ - Primitive wrapping for sklearn PassiveAggressiveRegressor - `sklearn documentation `_ - - """ - - __author__ = "JPL MARVIN" - metadata = metadata_base.PrimitiveMetadata({ - "algorithm_types": [metadata_base.PrimitiveAlgorithmType.PASSIVE_AGGRESSIVE, ], - "name": "sklearn.linear_model.passive_aggressive.PassiveAggressiveRegressor", - "primitive_family": metadata_base.PrimitiveFamily.REGRESSION, - "python_path": "d3m.primitives.regression.passive_aggressive.SKlearn", - "source": {'name': 'JPL', 'contact': 'mailto:shah@jpl.nasa.gov', 'uris': ['https://gitlab.com/datadrivendiscovery/sklearn-wrap/issues', 'https://scikit-learn.org/stable/modules/generated/sklearn.linear_model.PassiveAggressiveRegressor.html']}, - "version": "2019.11.13", - "id": "50ce5919-a155-3c72-a230-f4ab4b5babba", - "hyperparams_to_tune": ['C'], - 'installation': [ - {'type': metadata_base.PrimitiveInstallationType.PIP, - 'package_uri': 'git+https://gitlab.com/datadrivendiscovery/sklearn-wrap.git@{git_commit}#egg=sklearn_wrap'.format( - git_commit=utils.current_git_commit(os.path.dirname(__file__)), - ), - }] - }) - - def __init__(self, *, - hyperparams: Hyperparams, - random_seed: int = 0, - docker_containers: Dict[str, DockerContainer] = None, - _verbose: int = 0) -> None: - - super().__init__(hyperparams=hyperparams, random_seed=random_seed, docker_containers=docker_containers) - - # False - self._clf = PassiveAggressiveRegressor( - C=self.hyperparams['C'], - fit_intercept=self.hyperparams['fit_intercept'], - max_iter=self.hyperparams['max_iter'], - shuffle=self.hyperparams['shuffle'], - tol=self.hyperparams['tol'], - loss=self.hyperparams['loss'], - warm_start=self.hyperparams['warm_start'], - average=self.hyperparams['average'], - epsilon=self.hyperparams['epsilon'], - early_stopping=self.hyperparams['early_stopping'], - validation_fraction=self.hyperparams['validation_fraction'], - n_iter_no_change=self.hyperparams['n_iter_no_change'], - random_state=self.random_seed, - verbose=_verbose - ) - - self._inputs = None - self._outputs = None - self._training_inputs = None - self._training_outputs = None - self._target_names = None - self._training_indices = None - self._target_column_indices = None - self._target_columns_metadata: List[OrderedDict] = None - self._input_column_names = None - self._fitted = False - self._new_training_data = False - - def set_training_data(self, *, inputs: Inputs, outputs: Outputs) -> None: - self._inputs = inputs - self._outputs = outputs - self._fitted = False - self._new_training_data = True - - def fit(self, *, timeout: float = None, iterations: int = None) -> CallResult[None]: - if self._inputs is None or self._outputs is None: - raise ValueError("Missing training data.") - - if not self._new_training_data: - return CallResult(None) - self._new_training_data = False - - self._training_inputs, self._training_indices = self._get_columns_to_fit(self._inputs, self.hyperparams) - self._training_outputs, self._target_names, self._target_column_indices = self._get_targets(self._outputs, self.hyperparams) - self._input_column_names = self._training_inputs.columns - - if len(self._training_indices) > 0 and len(self._target_column_indices) > 0: - self._target_columns_metadata = self._get_target_columns_metadata(self._training_outputs.metadata, self.hyperparams) - sk_training_output = self._training_outputs.values - - shape = sk_training_output.shape - if len(shape) == 2 and shape[1] == 1: - sk_training_output = numpy.ravel(sk_training_output) - - self._clf.fit(self._training_inputs, sk_training_output) - self._fitted = True - else: - if self.hyperparams['error_on_no_input']: - raise RuntimeError("No input columns were selected") - self.logger.warn("No input columns were selected") - - return CallResult(None) - - def continue_fit(self, *, timeout: float = None, iterations: int = None) -> CallResult[None]: - if self._training_inputs is None or self._training_outputs is None: - raise ValueError("Missing training data.") - - if not self._new_training_data: - return CallResult(None) - self._new_training_data = False - - if len(self._training_indices) > 0 and len(self._target_column_indices) > 0: - self._target_columns_metadata = self._get_target_columns_metadata(self._training_outputs.metadata, self.hyperparams) - sk_training_output = self._training_outputs.values - - shape = sk_training_output.shape - if len(shape) == 2 and shape[1] == 1: - sk_training_output = numpy.ravel(sk_training_output) - - self._clf.partial_fit(self._training_inputs, sk_training_output) - self._fitted = True - else: - if self.hyperparams['error_on_no_input']: - raise RuntimeError("No input columns were selected") - self.logger.warn("No input columns were selected") - - return CallResult(None) - - - def produce(self, *, inputs: Inputs, timeout: float = None, iterations: int = None) -> CallResult[Outputs]: - sk_inputs, columns_to_use = self._get_columns_to_fit(inputs, self.hyperparams) - output = [] - if len(sk_inputs.columns): - try: - sk_output = self._clf.predict(sk_inputs) - except sklearn.exceptions.NotFittedError as error: - raise PrimitiveNotFittedError("Primitive not fitted.") from error - # For primitives that allow predicting without fitting like GaussianProcessRegressor - if not self._fitted: - raise PrimitiveNotFittedError("Primitive not fitted.") - if sparse.issparse(sk_output): - sk_output = sk_output.toarray() - output = self._wrap_predictions(inputs, sk_output) - output.columns = self._target_names - output = [output] - else: - if self.hyperparams['error_on_no_input']: - raise RuntimeError("No input columns were selected") - self.logger.warn("No input columns were selected") - outputs = base_utils.combine_columns(return_result=self.hyperparams['return_result'], - add_index_columns=self.hyperparams['add_index_columns'], - inputs=inputs, column_indices=self._target_column_indices, - columns_list=output) - - return CallResult(outputs) - - - def get_params(self) -> Params: - if not self._fitted: - return Params( - coef_=None, - intercept_=None, - t_=None, - alpha=None, - eta0=None, - l1_ratio=None, - learning_rate=None, - n_iter_=None, - penalty=None, - power_t=None, - average_coef_=None, - average_intercept_=None, - standard_coef_=None, - standard_intercept_=None, - input_column_names=self._input_column_names, - training_indices_=self._training_indices, - target_names_=self._target_names, - target_column_indices_=self._target_column_indices, - target_columns_metadata_=self._target_columns_metadata - ) - - return Params( - coef_=getattr(self._clf, 'coef_', None), - intercept_=getattr(self._clf, 'intercept_', None), - t_=getattr(self._clf, 't_', None), - alpha=getattr(self._clf, 'alpha', None), - eta0=getattr(self._clf, 'eta0', None), - l1_ratio=getattr(self._clf, 'l1_ratio', None), - learning_rate=getattr(self._clf, 'learning_rate', None), - n_iter_=getattr(self._clf, 'n_iter_', None), - penalty=getattr(self._clf, 'penalty', None), - power_t=getattr(self._clf, 'power_t', None), - average_coef_=getattr(self._clf, 'average_coef_', None), - average_intercept_=getattr(self._clf, 'average_intercept_', None), - standard_coef_=getattr(self._clf, 'standard_coef_', None), - standard_intercept_=getattr(self._clf, 'standard_intercept_', None), - input_column_names=self._input_column_names, - training_indices_=self._training_indices, - target_names_=self._target_names, - target_column_indices_=self._target_column_indices, - target_columns_metadata_=self._target_columns_metadata - ) - - def set_params(self, *, params: Params) -> None: - self._clf.coef_ = params['coef_'] - self._clf.intercept_ = params['intercept_'] - self._clf.t_ = params['t_'] - self._clf.alpha = params['alpha'] - self._clf.eta0 = params['eta0'] - self._clf.l1_ratio = params['l1_ratio'] - self._clf.learning_rate = params['learning_rate'] - self._clf.n_iter_ = params['n_iter_'] - self._clf.penalty = params['penalty'] - self._clf.power_t = params['power_t'] - self._clf.average_coef_ = params['average_coef_'] - self._clf.average_intercept_ = params['average_intercept_'] - self._clf.standard_coef_ = params['standard_coef_'] - self._clf.standard_intercept_ = params['standard_intercept_'] - self._input_column_names = params['input_column_names'] - self._training_indices = params['training_indices_'] - self._target_names = params['target_names_'] - self._target_column_indices = params['target_column_indices_'] - self._target_columns_metadata = params['target_columns_metadata_'] - - if params['coef_'] is not None: - self._fitted = True - if params['intercept_'] is not None: - self._fitted = True - if params['t_'] is not None: - self._fitted = True - if params['alpha'] is not None: - self._fitted = True - if params['eta0'] is not None: - self._fitted = True - if params['l1_ratio'] is not None: - self._fitted = True - if params['learning_rate'] is not None: - self._fitted = True - if params['n_iter_'] is not None: - self._fitted = True - if params['penalty'] is not None: - self._fitted = True - if params['power_t'] is not None: - self._fitted = True - if params['average_coef_'] is not None: - self._fitted = True - if params['average_intercept_'] is not None: - self._fitted = True - if params['standard_coef_'] is not None: - self._fitted = True - if params['standard_intercept_'] is not None: - self._fitted = True - - - - - - - - @classmethod - def _get_columns_to_fit(cls, inputs: Inputs, hyperparams: Hyperparams): - if not hyperparams['use_semantic_types']: - return inputs, list(range(len(inputs.columns))) - - inputs_metadata = inputs.metadata - - def can_produce_column(column_index: int) -> bool: - return cls._can_produce_column(inputs_metadata, column_index, hyperparams) - - columns_to_produce, columns_not_to_produce = base_utils.get_columns_to_use(inputs_metadata, - use_columns=hyperparams['use_inputs_columns'], - exclude_columns=hyperparams['exclude_inputs_columns'], - can_use_column=can_produce_column) - return inputs.iloc[:, columns_to_produce], columns_to_produce - # return columns_to_produce - - @classmethod - def _can_produce_column(cls, inputs_metadata: metadata_base.DataMetadata, column_index: int, hyperparams: Hyperparams) -> bool: - column_metadata = inputs_metadata.query((metadata_base.ALL_ELEMENTS, column_index)) - - accepted_structural_types = (int, float, numpy.integer, numpy.float64) - accepted_semantic_types = set() - accepted_semantic_types.add("https://metadata.datadrivendiscovery.org/types/Attribute") - if not issubclass(column_metadata['structural_type'], accepted_structural_types): - return False - - semantic_types = set(column_metadata.get('semantic_types', [])) - - if len(semantic_types) == 0: - cls.logger.warning("No semantic types found in column metadata") - return False - # Making sure all accepted_semantic_types are available in semantic_types - if len(accepted_semantic_types - semantic_types) == 0: - return True - - return False - - @classmethod - def _get_targets(cls, data: d3m_dataframe, hyperparams: Hyperparams): - if not hyperparams['use_semantic_types']: - return data, list(data.columns), list(range(len(data.columns))) - - metadata = data.metadata - - def can_produce_column(column_index: int) -> bool: - accepted_semantic_types = set() - accepted_semantic_types.add("https://metadata.datadrivendiscovery.org/types/TrueTarget") - column_metadata = metadata.query((metadata_base.ALL_ELEMENTS, column_index)) - semantic_types = set(column_metadata.get('semantic_types', [])) - if len(semantic_types) == 0: - cls.logger.warning("No semantic types found in column metadata") - return False - # Making sure all accepted_semantic_types are available in semantic_types - if len(accepted_semantic_types - semantic_types) == 0: - return True - return False - - target_column_indices, target_columns_not_to_produce = base_utils.get_columns_to_use(metadata, - use_columns=hyperparams[ - 'use_outputs_columns'], - exclude_columns= - hyperparams[ - 'exclude_outputs_columns'], - can_use_column=can_produce_column) - targets = [] - if target_column_indices: - targets = data.select_columns(target_column_indices) - target_column_names = [] - for idx in target_column_indices: - target_column_names.append(data.columns[idx]) - return targets, target_column_names, target_column_indices - - @classmethod - def _get_target_columns_metadata(cls, outputs_metadata: metadata_base.DataMetadata, hyperparams) -> List[OrderedDict]: - outputs_length = outputs_metadata.query((metadata_base.ALL_ELEMENTS,))['dimension']['length'] - - target_columns_metadata: List[OrderedDict] = [] - for column_index in range(outputs_length): - column_metadata = OrderedDict(outputs_metadata.query_column(column_index)) - - # Update semantic types and prepare it for predicted targets. - semantic_types = set(column_metadata.get('semantic_types', [])) - semantic_types_to_remove = set(["https://metadata.datadrivendiscovery.org/types/TrueTarget","https://metadata.datadrivendiscovery.org/types/SuggestedTarget",]) - add_semantic_types = set(["https://metadata.datadrivendiscovery.org/types/PredictedTarget",]) - add_semantic_types.add(hyperparams["return_semantic_type"]) - semantic_types = semantic_types - semantic_types_to_remove - semantic_types = semantic_types.union(add_semantic_types) - column_metadata['semantic_types'] = list(semantic_types) - - target_columns_metadata.append(column_metadata) - - return target_columns_metadata - - @classmethod - def _update_predictions_metadata(cls, inputs_metadata: metadata_base.DataMetadata, outputs: Optional[Outputs], - target_columns_metadata: List[OrderedDict]) -> metadata_base.DataMetadata: - outputs_metadata = metadata_base.DataMetadata().generate(value=outputs) - - for column_index, column_metadata in enumerate(target_columns_metadata): - column_metadata.pop("structural_type", None) - outputs_metadata = outputs_metadata.update_column(column_index, column_metadata) - - return outputs_metadata - - def _wrap_predictions(self, inputs: Inputs, predictions: ndarray) -> Outputs: - outputs = d3m_dataframe(predictions, generate_metadata=False) - outputs.metadata = self._update_predictions_metadata(inputs.metadata, outputs, self._target_columns_metadata) - return outputs - - - @classmethod - def _add_target_columns_metadata(cls, outputs_metadata: metadata_base.DataMetadata): - outputs_length = outputs_metadata.query((metadata_base.ALL_ELEMENTS,))['dimension']['length'] - - target_columns_metadata: List[OrderedDict] = [] - for column_index in range(outputs_length): - column_metadata = OrderedDict() - semantic_types = [] - semantic_types.append('https://metadata.datadrivendiscovery.org/types/PredictedTarget') - column_name = outputs_metadata.query((metadata_base.ALL_ELEMENTS, column_index)).get("name") - if column_name is None: - column_name = "output_{}".format(column_index) - column_metadata["semantic_types"] = semantic_types - column_metadata["name"] = str(column_name) - target_columns_metadata.append(column_metadata) - - return target_columns_metadata - - -SKPassiveAggressiveRegressor.__doc__ = PassiveAggressiveRegressor.__doc__ \ No newline at end of file diff --git a/common-primitives/sklearn-wrap/sklearn_wrap/SKPolynomialFeatures.py b/common-primitives/sklearn-wrap/sklearn_wrap/SKPolynomialFeatures.py deleted file mode 100644 index 283adfd..0000000 --- a/common-primitives/sklearn-wrap/sklearn_wrap/SKPolynomialFeatures.py +++ /dev/null @@ -1,346 +0,0 @@ -from typing import Any, Callable, List, Dict, Union, Optional, Sequence, Tuple -from numpy import ndarray -from collections import OrderedDict -from scipy import sparse -import os -import sklearn -import numpy -import typing - -# Custom import commands if any -from sklearn.preprocessing.data import PolynomialFeatures - - -from d3m.container.numpy import ndarray as d3m_ndarray -from d3m.container import DataFrame as d3m_dataframe -from d3m.metadata import hyperparams, params, base as metadata_base -from d3m import utils -from d3m.base import utils as base_utils -from d3m.exceptions import PrimitiveNotFittedError -from d3m.primitive_interfaces.base import CallResult, DockerContainer -from d3m.primitive_interfaces.unsupervised_learning import UnsupervisedLearnerPrimitiveBase - - -Inputs = d3m_dataframe -Outputs = d3m_dataframe - - -class Params(params.Params): - n_input_features_: Optional[int] - n_output_features_: Optional[int] - input_column_names: Optional[Any] - target_names_: Optional[Sequence[Any]] - training_indices_: Optional[Sequence[int]] - target_column_indices_: Optional[Sequence[int]] - target_columns_metadata_: Optional[List[OrderedDict]] - - - -class Hyperparams(hyperparams.Hyperparams): - degree = hyperparams.Hyperparameter[int]( - default=2, - description='The degree of the polynomial features. Default = 2.', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] - ) - include_bias = hyperparams.UniformBool( - default=True, - description='If True (default), then include a bias column, the feature in which all polynomial powers are zero (i.e. a column of ones - acts as an intercept term in a linear model). Examples -------- >>> X = np.arange(6).reshape(3, 2) >>> X array([[0, 1], [2, 3], [4, 5]]) >>> poly = PolynomialFeatures(2) >>> poly.fit_transform(X) array([[ 1., 0., 1., 0., 0., 1.], [ 1., 2., 3., 4., 6., 9.], [ 1., 4., 5., 16., 20., 25.]]) >>> poly = PolynomialFeatures(interaction_only=True) >>> poly.fit_transform(X) array([[ 1., 0., 1., 0.], [ 1., 2., 3., 6.], [ 1., 4., 5., 20.]])', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] - ) - - use_columns = hyperparams.Set( - elements=hyperparams.Hyperparameter[int](-1), - default=(), - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="A set of column indices to force primitive to operate on. If any specified column cannot be parsed, it is skipped.", - ) - exclude_columns = hyperparams.Set( - elements=hyperparams.Hyperparameter[int](-1), - default=(), - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="A set of column indices to not operate on. Applicable only if \"use_columns\" is not provided.", - ) - return_result = hyperparams.Enumeration( - values=['append', 'replace', 'new'], - default='new', - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="Should parsed columns be appended, should they replace original columns, or should only parsed columns be returned? This hyperparam is ignored if use_semantic_types is set to false.", - ) - use_semantic_types = hyperparams.UniformBool( - default=False, - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="Controls whether semantic_types metadata will be used for filtering columns in input dataframe. Setting this to false makes the code ignore return_result and will produce only the output dataframe" - ) - add_index_columns = hyperparams.UniformBool( - default=False, - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="Also include primary index columns if input data has them. Applicable only if \"return_result\" is set to \"new\".", - ) - error_on_no_input = hyperparams.UniformBool( - default=True, - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="Throw an exception if no input column is selected/provided. Defaults to true to behave like sklearn. To prevent pipelines from breaking set this to False.", - ) - - return_semantic_type = hyperparams.Enumeration[str]( - values=['https://metadata.datadrivendiscovery.org/types/Attribute', 'https://metadata.datadrivendiscovery.org/types/ConstructedAttribute'], - default='https://metadata.datadrivendiscovery.org/types/Attribute', - description='Decides what semantic type to attach to generated attributes', - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'] - ) - -class SKPolynomialFeatures(UnsupervisedLearnerPrimitiveBase[Inputs, Outputs, Params, Hyperparams]): - """ - Primitive wrapping for sklearn PolynomialFeatures - `sklearn documentation `_ - - """ - - __author__ = "JPL MARVIN" - metadata = metadata_base.PrimitiveMetadata({ - "algorithm_types": [metadata_base.PrimitiveAlgorithmType.STATISTICAL_MOMENT_ANALYSIS, ], - "name": "sklearn.preprocessing.data.PolynomialFeatures", - "primitive_family": metadata_base.PrimitiveFamily.DATA_PREPROCESSING, - "python_path": "d3m.primitives.data_preprocessing.polynomial_features.SKlearn", - "source": {'name': 'JPL', 'contact': 'mailto:shah@jpl.nasa.gov', 'uris': ['https://gitlab.com/datadrivendiscovery/sklearn-wrap/issues', 'https://scikit-learn.org/stable/modules/generated/sklearn.preprocessing.PolynomialFeatures.html']}, - "version": "2019.11.13", - "id": "93acb44b-532a-37d3-987a-8e61a8489d77", - "hyperparams_to_tune": ['degree'], - 'installation': [ - {'type': metadata_base.PrimitiveInstallationType.PIP, - 'package_uri': 'git+https://gitlab.com/datadrivendiscovery/sklearn-wrap.git@{git_commit}#egg=sklearn_wrap'.format( - git_commit=utils.current_git_commit(os.path.dirname(__file__)), - ), - }] - }) - - def __init__(self, *, - hyperparams: Hyperparams, - random_seed: int = 0, - docker_containers: Dict[str, DockerContainer] = None) -> None: - - super().__init__(hyperparams=hyperparams, random_seed=random_seed, docker_containers=docker_containers) - - # False - self._clf = PolynomialFeatures( - degree=self.hyperparams['degree'], - include_bias=self.hyperparams['include_bias'], - ) - - self._inputs = None - self._outputs = None - self._training_inputs = None - self._training_outputs = None - self._target_names = None - self._training_indices = None - self._target_column_indices = None - self._target_columns_metadata: List[OrderedDict] = None - self._input_column_names = None - self._fitted = False - - - def set_training_data(self, *, inputs: Inputs) -> None: - self._inputs = inputs - self._fitted = False - - def fit(self, *, timeout: float = None, iterations: int = None) -> CallResult[None]: - if self._fitted: - return CallResult(None) - - self._training_inputs, self._training_indices = self._get_columns_to_fit(self._inputs, self.hyperparams) - self._input_column_names = self._training_inputs.columns - - if self._training_inputs is None: - return CallResult(None) - - if len(self._training_indices) > 0: - self._clf.fit(self._training_inputs) - self._fitted = True - else: - if self.hyperparams['error_on_no_input']: - raise RuntimeError("No input columns were selected") - self.logger.warn("No input columns were selected") - return CallResult(None) - - def produce(self, *, inputs: Inputs, timeout: float = None, iterations: int = None) -> CallResult[Outputs]: - if not self._fitted: - raise PrimitiveNotFittedError("Primitive not fitted.") - sk_inputs = inputs - if self.hyperparams['use_semantic_types']: - sk_inputs = inputs.iloc[:, self._training_indices] - output_columns = [] - if len(self._training_indices) > 0: - sk_output = self._clf.transform(sk_inputs) - if sparse.issparse(sk_output): - sk_output = sk_output.toarray() - outputs = self._wrap_predictions(inputs, sk_output) - if len(outputs.columns) == len(self._input_column_names): - outputs.columns = self._input_column_names - output_columns = [outputs] - else: - if self.hyperparams['error_on_no_input']: - raise RuntimeError("No input columns were selected") - self.logger.warn("No input columns were selected") - outputs = base_utils.combine_columns(return_result=self.hyperparams['return_result'], - add_index_columns=self.hyperparams['add_index_columns'], - inputs=inputs, column_indices=self._training_indices, - columns_list=output_columns) - return CallResult(outputs) - - - def get_params(self) -> Params: - if not self._fitted: - return Params( - n_input_features_=None, - n_output_features_=None, - input_column_names=self._input_column_names, - training_indices_=self._training_indices, - target_names_=self._target_names, - target_column_indices_=self._target_column_indices, - target_columns_metadata_=self._target_columns_metadata - ) - - return Params( - n_input_features_=getattr(self._clf, 'n_input_features_', None), - n_output_features_=getattr(self._clf, 'n_output_features_', None), - input_column_names=self._input_column_names, - training_indices_=self._training_indices, - target_names_=self._target_names, - target_column_indices_=self._target_column_indices, - target_columns_metadata_=self._target_columns_metadata - ) - - def set_params(self, *, params: Params) -> None: - self._clf.n_input_features_ = params['n_input_features_'] - self._clf.n_output_features_ = params['n_output_features_'] - self._input_column_names = params['input_column_names'] - self._training_indices = params['training_indices_'] - self._target_names = params['target_names_'] - self._target_column_indices = params['target_column_indices_'] - self._target_columns_metadata = params['target_columns_metadata_'] - - if params['n_input_features_'] is not None: - self._fitted = True - if params['n_output_features_'] is not None: - self._fitted = True - - - - - - @classmethod - def _get_columns_to_fit(cls, inputs: Inputs, hyperparams: Hyperparams): - if not hyperparams['use_semantic_types']: - return inputs, list(range(len(inputs.columns))) - - inputs_metadata = inputs.metadata - - def can_produce_column(column_index: int) -> bool: - return cls._can_produce_column(inputs_metadata, column_index, hyperparams) - - columns_to_produce, columns_not_to_produce = base_utils.get_columns_to_use(inputs_metadata, - use_columns=hyperparams['use_columns'], - exclude_columns=hyperparams['exclude_columns'], - can_use_column=can_produce_column) - return inputs.iloc[:, columns_to_produce], columns_to_produce - # return columns_to_produce - - @classmethod - def _can_produce_column(cls, inputs_metadata: metadata_base.DataMetadata, column_index: int, hyperparams: Hyperparams) -> bool: - column_metadata = inputs_metadata.query((metadata_base.ALL_ELEMENTS, column_index)) - - accepted_structural_types = (int, float, numpy.integer, numpy.float64) - accepted_semantic_types = set() - accepted_semantic_types.add("https://metadata.datadrivendiscovery.org/types/Attribute") - if not issubclass(column_metadata['structural_type'], accepted_structural_types): - return False - - semantic_types = set(column_metadata.get('semantic_types', [])) - - if len(semantic_types) == 0: - cls.logger.warning("No semantic types found in column metadata") - return False - - # Making sure all accepted_semantic_types are available in semantic_types - if len(accepted_semantic_types - semantic_types) == 0: - return True - - return False - - - @classmethod - def _get_target_columns_metadata(cls, outputs_metadata: metadata_base.DataMetadata, hyperparams) -> List[OrderedDict]: - outputs_length = outputs_metadata.query((metadata_base.ALL_ELEMENTS,))['dimension']['length'] - - target_columns_metadata: List[OrderedDict] = [] - for column_index in range(outputs_length): - column_metadata = OrderedDict(outputs_metadata.query_column(column_index)) - - # Update semantic types and prepare it for predicted targets. - semantic_types = set(column_metadata.get('semantic_types', [])) - semantic_types_to_remove = set([]) - add_semantic_types = [] - add_semantic_types.add(hyperparams["return_semantic_type"]) - semantic_types = semantic_types - semantic_types_to_remove - semantic_types = semantic_types.union(add_semantic_types) - column_metadata['semantic_types'] = list(semantic_types) - - target_columns_metadata.append(column_metadata) - - return target_columns_metadata - - @classmethod - def _update_predictions_metadata(cls, inputs_metadata: metadata_base.DataMetadata, outputs: Optional[Outputs], - target_columns_metadata: List[OrderedDict]) -> metadata_base.DataMetadata: - outputs_metadata = metadata_base.DataMetadata().generate(value=outputs) - - for column_index, column_metadata in enumerate(target_columns_metadata): - column_metadata.pop("structural_type", None) - outputs_metadata = outputs_metadata.update_column(column_index, column_metadata) - - return outputs_metadata - - def _wrap_predictions(self, inputs: Inputs, predictions: ndarray) -> Outputs: - outputs = d3m_dataframe(predictions, generate_metadata=True) - target_columns_metadata = self._copy_inputs_metadata(inputs.metadata, self._training_indices, outputs.metadata, self.hyperparams) - outputs.metadata = self._update_predictions_metadata(inputs.metadata, outputs, target_columns_metadata) - return outputs - - - @classmethod - def _copy_inputs_metadata(cls, inputs_metadata: metadata_base.DataMetadata, input_indices: List[int], - outputs_metadata: metadata_base.DataMetadata, hyperparams): - outputs_length = outputs_metadata.query((metadata_base.ALL_ELEMENTS,))['dimension']['length'] - target_columns_metadata: List[OrderedDict] = [] - for column_index in input_indices: - column_name = inputs_metadata.query((metadata_base.ALL_ELEMENTS, column_index)).get("name") - if column_name is None: - column_name = "output_{}".format(column_index) - - column_metadata = OrderedDict(inputs_metadata.query_column(column_index)) - semantic_types = set(column_metadata.get('semantic_types', [])) - semantic_types_to_remove = set([]) - add_semantic_types = set() - add_semantic_types.add(hyperparams["return_semantic_type"]) - semantic_types = semantic_types - semantic_types_to_remove - semantic_types = semantic_types.union(add_semantic_types) - column_metadata['semantic_types'] = list(semantic_types) - - column_metadata["name"] = str(column_name) - target_columns_metadata.append(column_metadata) - - # If outputs has more columns than index, add Attribute Type to all remaining - if outputs_length > len(input_indices): - for column_index in range(len(input_indices), outputs_length): - column_metadata = OrderedDict() - semantic_types = set() - semantic_types.add(hyperparams["return_semantic_type"]) - column_name = "output_{}".format(column_index) - column_metadata["semantic_types"] = list(semantic_types) - column_metadata["name"] = str(column_name) - target_columns_metadata.append(column_metadata) - - return target_columns_metadata - - -SKPolynomialFeatures.__doc__ = PolynomialFeatures.__doc__ \ No newline at end of file diff --git a/common-primitives/sklearn-wrap/sklearn_wrap/SKQuadraticDiscriminantAnalysis.py b/common-primitives/sklearn-wrap/sklearn_wrap/SKQuadraticDiscriminantAnalysis.py deleted file mode 100644 index fa90760..0000000 --- a/common-primitives/sklearn-wrap/sklearn_wrap/SKQuadraticDiscriminantAnalysis.py +++ /dev/null @@ -1,473 +0,0 @@ -from typing import Any, Callable, List, Dict, Union, Optional, Sequence, Tuple -from numpy import ndarray -from collections import OrderedDict -from scipy import sparse -import os -import sklearn -import numpy -import typing - -# Custom import commands if any -from sklearn.discriminant_analysis import QuadraticDiscriminantAnalysis - - -from d3m.container.numpy import ndarray as d3m_ndarray -from d3m.container import DataFrame as d3m_dataframe -from d3m.metadata import hyperparams, params, base as metadata_base -from d3m import utils -from d3m.base import utils as base_utils -from d3m.exceptions import PrimitiveNotFittedError -from d3m.primitive_interfaces.base import CallResult, DockerContainer - -from d3m.primitive_interfaces.supervised_learning import SupervisedLearnerPrimitiveBase -from d3m.primitive_interfaces.base import ProbabilisticCompositionalityMixin, ContinueFitMixin -from d3m import exceptions -import pandas - - - -Inputs = d3m_dataframe -Outputs = d3m_dataframe - - -class Params(params.Params): - covariance_: Optional[ndarray] - means_: Optional[ndarray] - priors_: Optional[ndarray] - rotations_: Optional[Sequence[ndarray]] - scalings_: Optional[Sequence[ndarray]] - classes_: Optional[ndarray] - input_column_names: Optional[Any] - target_names_: Optional[Sequence[Any]] - training_indices_: Optional[Sequence[int]] - target_column_indices_: Optional[Sequence[int]] - target_columns_metadata_: Optional[List[OrderedDict]] - - - -class Hyperparams(hyperparams.Hyperparams): - reg_param = hyperparams.Bounded[float]( - default=0.0, - lower=0, - upper=1, - description='Regularizes the covariance estimate as ``(1-reg_param)*Sigma + reg_param*np.eye(n_features)``', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] - ) - tol = hyperparams.Bounded[float]( - default=0.0001, - lower=0, - upper=None, - description='Threshold used for rank estimation. .. versionadded:: 0.17', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] - ) - - use_inputs_columns = hyperparams.Set( - elements=hyperparams.Hyperparameter[int](-1), - default=(), - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="A set of column indices to force primitive to use as training input. If any specified column cannot be parsed, it is skipped.", - ) - use_outputs_columns = hyperparams.Set( - elements=hyperparams.Hyperparameter[int](-1), - default=(), - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="A set of column indices to force primitive to use as training target. If any specified column cannot be parsed, it is skipped.", - ) - exclude_inputs_columns = hyperparams.Set( - elements=hyperparams.Hyperparameter[int](-1), - default=(), - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="A set of column indices to not use as training inputs. Applicable only if \"use_columns\" is not provided.", - ) - exclude_outputs_columns = hyperparams.Set( - elements=hyperparams.Hyperparameter[int](-1), - default=(), - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="A set of column indices to not use as training target. Applicable only if \"use_columns\" is not provided.", - ) - return_result = hyperparams.Enumeration( - values=['append', 'replace', 'new'], - default='new', - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="Should parsed columns be appended, should they replace original columns, or should only parsed columns be returned? This hyperparam is ignored if use_semantic_types is set to false.", - ) - use_semantic_types = hyperparams.UniformBool( - default=False, - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="Controls whether semantic_types metadata will be used for filtering columns in input dataframe. Setting this to false makes the code ignore return_result and will produce only the output dataframe" - ) - add_index_columns = hyperparams.UniformBool( - default=False, - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="Also include primary index columns if input data has them. Applicable only if \"return_result\" is set to \"new\".", - ) - error_on_no_input = hyperparams.UniformBool( - default=True, - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="Throw an exception if no input column is selected/provided. Defaults to true to behave like sklearn. To prevent pipelines from breaking set this to False.", - ) - - return_semantic_type = hyperparams.Enumeration[str]( - values=['https://metadata.datadrivendiscovery.org/types/Attribute', 'https://metadata.datadrivendiscovery.org/types/ConstructedAttribute', 'https://metadata.datadrivendiscovery.org/types/PredictedTarget'], - default='https://metadata.datadrivendiscovery.org/types/PredictedTarget', - description='Decides what semantic type to attach to generated output', - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'] - ) - -class SKQuadraticDiscriminantAnalysis(SupervisedLearnerPrimitiveBase[Inputs, Outputs, Params, Hyperparams], - ProbabilisticCompositionalityMixin[Inputs, Outputs, Params, Hyperparams]): - """ - Primitive wrapping for sklearn QuadraticDiscriminantAnalysis - `sklearn documentation `_ - - """ - - __author__ = "JPL MARVIN" - metadata = metadata_base.PrimitiveMetadata({ - "algorithm_types": [metadata_base.PrimitiveAlgorithmType.QUADRATIC_DISCRIMINANT_ANALYSIS, ], - "name": "sklearn.discriminant_analysis.QuadraticDiscriminantAnalysis", - "primitive_family": metadata_base.PrimitiveFamily.CLASSIFICATION, - "python_path": "d3m.primitives.classification.quadratic_discriminant_analysis.SKlearn", - "source": {'name': 'JPL', 'contact': 'mailto:shah@jpl.nasa.gov', 'uris': ['https://gitlab.com/datadrivendiscovery/sklearn-wrap/issues', 'https://scikit-learn.org/stable/modules/generated/sklearn.discriminant_analysis.QuadraticDiscriminantAnalysis.html']}, - "version": "2019.11.13", - "id": "321dbf4d-07d9-3274-bd1b-2751520ed1d7", - "hyperparams_to_tune": ['reg_param'], - 'installation': [ - {'type': metadata_base.PrimitiveInstallationType.PIP, - 'package_uri': 'git+https://gitlab.com/datadrivendiscovery/sklearn-wrap.git@{git_commit}#egg=sklearn_wrap'.format( - git_commit=utils.current_git_commit(os.path.dirname(__file__)), - ), - }] - }) - - def __init__(self, *, - hyperparams: Hyperparams, - random_seed: int = 0, - docker_containers: Dict[str, DockerContainer] = None) -> None: - - super().__init__(hyperparams=hyperparams, random_seed=random_seed, docker_containers=docker_containers) - - # False - self._clf = QuadraticDiscriminantAnalysis( - reg_param=self.hyperparams['reg_param'], - tol=self.hyperparams['tol'], - ) - - self._inputs = None - self._outputs = None - self._training_inputs = None - self._training_outputs = None - self._target_names = None - self._training_indices = None - self._target_column_indices = None - self._target_columns_metadata: List[OrderedDict] = None - self._input_column_names = None - self._fitted = False - self._new_training_data = False - - def set_training_data(self, *, inputs: Inputs, outputs: Outputs) -> None: - self._inputs = inputs - self._outputs = outputs - self._fitted = False - self._new_training_data = True - - def fit(self, *, timeout: float = None, iterations: int = None) -> CallResult[None]: - if self._inputs is None or self._outputs is None: - raise ValueError("Missing training data.") - - if not self._new_training_data: - return CallResult(None) - self._new_training_data = False - - self._training_inputs, self._training_indices = self._get_columns_to_fit(self._inputs, self.hyperparams) - self._training_outputs, self._target_names, self._target_column_indices = self._get_targets(self._outputs, self.hyperparams) - self._input_column_names = self._training_inputs.columns - - if len(self._training_indices) > 0 and len(self._target_column_indices) > 0: - self._target_columns_metadata = self._get_target_columns_metadata(self._training_outputs.metadata, self.hyperparams) - sk_training_output = self._training_outputs.values - - shape = sk_training_output.shape - if len(shape) == 2 and shape[1] == 1: - sk_training_output = numpy.ravel(sk_training_output) - - self._clf.fit(self._training_inputs, sk_training_output) - self._fitted = True - else: - if self.hyperparams['error_on_no_input']: - raise RuntimeError("No input columns were selected") - self.logger.warn("No input columns were selected") - - return CallResult(None) - - - - def produce(self, *, inputs: Inputs, timeout: float = None, iterations: int = None) -> CallResult[Outputs]: - sk_inputs, columns_to_use = self._get_columns_to_fit(inputs, self.hyperparams) - output = [] - if len(sk_inputs.columns): - try: - sk_output = self._clf.predict(sk_inputs) - except sklearn.exceptions.NotFittedError as error: - raise PrimitiveNotFittedError("Primitive not fitted.") from error - # For primitives that allow predicting without fitting like GaussianProcessRegressor - if not self._fitted: - raise PrimitiveNotFittedError("Primitive not fitted.") - if sparse.issparse(sk_output): - sk_output = sk_output.toarray() - output = self._wrap_predictions(inputs, sk_output) - output.columns = self._target_names - output = [output] - else: - if self.hyperparams['error_on_no_input']: - raise RuntimeError("No input columns were selected") - self.logger.warn("No input columns were selected") - outputs = base_utils.combine_columns(return_result=self.hyperparams['return_result'], - add_index_columns=self.hyperparams['add_index_columns'], - inputs=inputs, column_indices=self._target_column_indices, - columns_list=output) - - return CallResult(outputs) - - - def get_params(self) -> Params: - if not self._fitted: - return Params( - covariance_=None, - means_=None, - priors_=None, - rotations_=None, - scalings_=None, - classes_=None, - input_column_names=self._input_column_names, - training_indices_=self._training_indices, - target_names_=self._target_names, - target_column_indices_=self._target_column_indices, - target_columns_metadata_=self._target_columns_metadata - ) - - return Params( - covariance_=getattr(self._clf, 'covariance_', None), - means_=getattr(self._clf, 'means_', None), - priors_=getattr(self._clf, 'priors_', None), - rotations_=getattr(self._clf, 'rotations_', None), - scalings_=getattr(self._clf, 'scalings_', None), - classes_=getattr(self._clf, 'classes_', None), - input_column_names=self._input_column_names, - training_indices_=self._training_indices, - target_names_=self._target_names, - target_column_indices_=self._target_column_indices, - target_columns_metadata_=self._target_columns_metadata - ) - - def set_params(self, *, params: Params) -> None: - self._clf.covariance_ = params['covariance_'] - self._clf.means_ = params['means_'] - self._clf.priors_ = params['priors_'] - self._clf.rotations_ = params['rotations_'] - self._clf.scalings_ = params['scalings_'] - self._clf.classes_ = params['classes_'] - self._input_column_names = params['input_column_names'] - self._training_indices = params['training_indices_'] - self._target_names = params['target_names_'] - self._target_column_indices = params['target_column_indices_'] - self._target_columns_metadata = params['target_columns_metadata_'] - - if params['covariance_'] is not None: - self._fitted = True - if params['means_'] is not None: - self._fitted = True - if params['priors_'] is not None: - self._fitted = True - if params['rotations_'] is not None: - self._fitted = True - if params['scalings_'] is not None: - self._fitted = True - if params['classes_'] is not None: - self._fitted = True - - - def log_likelihoods(self, *, - outputs: Outputs, - inputs: Inputs, - timeout: float = None, - iterations: int = None) -> CallResult[Sequence[float]]: - inputs = inputs.iloc[:, self._training_indices] # Get ndarray - outputs = outputs.iloc[:, self._target_column_indices] - - if len(inputs.columns) and len(outputs.columns): - - if outputs.shape[1] != self._clf.n_outputs_: - raise exceptions.InvalidArgumentValueError("\"outputs\" argument does not have the correct number of target columns.") - - log_proba = self._clf.predict_log_proba(inputs) - - # Making it always a list, even when only one target. - if self._clf.n_outputs_ == 1: - log_proba = [log_proba] - classes = [self._clf.classes_] - else: - classes = self._clf.classes_ - - samples_length = inputs.shape[0] - - log_likelihoods = [] - for k in range(self._clf.n_outputs_): - # We have to map each class to its internal (numerical) index used in the learner. - # This allows "outputs" to contain string classes. - outputs_column = outputs.iloc[:, k] - classes_map = pandas.Series(numpy.arange(len(classes[k])), index=classes[k]) - mapped_outputs_column = outputs_column.map(classes_map) - - # For each target column (column in "outputs"), for each sample (row) we pick the log - # likelihood for a given class. - log_likelihoods.append(log_proba[k][numpy.arange(samples_length), mapped_outputs_column]) - - results = d3m_dataframe(dict(enumerate(log_likelihoods)), generate_metadata=True) - results.columns = outputs.columns - - for k in range(self._clf.n_outputs_): - column_metadata = outputs.metadata.query_column(k) - if 'name' in column_metadata: - results.metadata = results.metadata.update_column(k, {'name': column_metadata['name']}) - - else: - results = d3m_dataframe(generate_metadata=True) - - return CallResult(results) - - - - - - @classmethod - def _get_columns_to_fit(cls, inputs: Inputs, hyperparams: Hyperparams): - if not hyperparams['use_semantic_types']: - return inputs, list(range(len(inputs.columns))) - - inputs_metadata = inputs.metadata - - def can_produce_column(column_index: int) -> bool: - return cls._can_produce_column(inputs_metadata, column_index, hyperparams) - - columns_to_produce, columns_not_to_produce = base_utils.get_columns_to_use(inputs_metadata, - use_columns=hyperparams['use_inputs_columns'], - exclude_columns=hyperparams['exclude_inputs_columns'], - can_use_column=can_produce_column) - return inputs.iloc[:, columns_to_produce], columns_to_produce - # return columns_to_produce - - @classmethod - def _can_produce_column(cls, inputs_metadata: metadata_base.DataMetadata, column_index: int, hyperparams: Hyperparams) -> bool: - column_metadata = inputs_metadata.query((metadata_base.ALL_ELEMENTS, column_index)) - - accepted_structural_types = (int, float, numpy.integer, numpy.float64) - accepted_semantic_types = set() - accepted_semantic_types.add("https://metadata.datadrivendiscovery.org/types/Attribute") - if not issubclass(column_metadata['structural_type'], accepted_structural_types): - return False - - semantic_types = set(column_metadata.get('semantic_types', [])) - - if len(semantic_types) == 0: - cls.logger.warning("No semantic types found in column metadata") - return False - # Making sure all accepted_semantic_types are available in semantic_types - if len(accepted_semantic_types - semantic_types) == 0: - return True - - return False - - @classmethod - def _get_targets(cls, data: d3m_dataframe, hyperparams: Hyperparams): - if not hyperparams['use_semantic_types']: - return data, list(data.columns), list(range(len(data.columns))) - - metadata = data.metadata - - def can_produce_column(column_index: int) -> bool: - accepted_semantic_types = set() - accepted_semantic_types.add("https://metadata.datadrivendiscovery.org/types/TrueTarget") - column_metadata = metadata.query((metadata_base.ALL_ELEMENTS, column_index)) - semantic_types = set(column_metadata.get('semantic_types', [])) - if len(semantic_types) == 0: - cls.logger.warning("No semantic types found in column metadata") - return False - # Making sure all accepted_semantic_types are available in semantic_types - if len(accepted_semantic_types - semantic_types) == 0: - return True - return False - - target_column_indices, target_columns_not_to_produce = base_utils.get_columns_to_use(metadata, - use_columns=hyperparams[ - 'use_outputs_columns'], - exclude_columns= - hyperparams[ - 'exclude_outputs_columns'], - can_use_column=can_produce_column) - targets = [] - if target_column_indices: - targets = data.select_columns(target_column_indices) - target_column_names = [] - for idx in target_column_indices: - target_column_names.append(data.columns[idx]) - return targets, target_column_names, target_column_indices - - @classmethod - def _get_target_columns_metadata(cls, outputs_metadata: metadata_base.DataMetadata, hyperparams) -> List[OrderedDict]: - outputs_length = outputs_metadata.query((metadata_base.ALL_ELEMENTS,))['dimension']['length'] - - target_columns_metadata: List[OrderedDict] = [] - for column_index in range(outputs_length): - column_metadata = OrderedDict(outputs_metadata.query_column(column_index)) - - # Update semantic types and prepare it for predicted targets. - semantic_types = set(column_metadata.get('semantic_types', [])) - semantic_types_to_remove = set(["https://metadata.datadrivendiscovery.org/types/TrueTarget","https://metadata.datadrivendiscovery.org/types/SuggestedTarget",]) - add_semantic_types = set(["https://metadata.datadrivendiscovery.org/types/PredictedTarget",]) - add_semantic_types.add(hyperparams["return_semantic_type"]) - semantic_types = semantic_types - semantic_types_to_remove - semantic_types = semantic_types.union(add_semantic_types) - column_metadata['semantic_types'] = list(semantic_types) - - target_columns_metadata.append(column_metadata) - - return target_columns_metadata - - @classmethod - def _update_predictions_metadata(cls, inputs_metadata: metadata_base.DataMetadata, outputs: Optional[Outputs], - target_columns_metadata: List[OrderedDict]) -> metadata_base.DataMetadata: - outputs_metadata = metadata_base.DataMetadata().generate(value=outputs) - - for column_index, column_metadata in enumerate(target_columns_metadata): - column_metadata.pop("structural_type", None) - outputs_metadata = outputs_metadata.update_column(column_index, column_metadata) - - return outputs_metadata - - def _wrap_predictions(self, inputs: Inputs, predictions: ndarray) -> Outputs: - outputs = d3m_dataframe(predictions, generate_metadata=False) - outputs.metadata = self._update_predictions_metadata(inputs.metadata, outputs, self._target_columns_metadata) - return outputs - - - @classmethod - def _add_target_columns_metadata(cls, outputs_metadata: metadata_base.DataMetadata): - outputs_length = outputs_metadata.query((metadata_base.ALL_ELEMENTS,))['dimension']['length'] - - target_columns_metadata: List[OrderedDict] = [] - for column_index in range(outputs_length): - column_metadata = OrderedDict() - semantic_types = [] - semantic_types.append('https://metadata.datadrivendiscovery.org/types/PredictedTarget') - column_name = outputs_metadata.query((metadata_base.ALL_ELEMENTS, column_index)).get("name") - if column_name is None: - column_name = "output_{}".format(column_index) - column_metadata["semantic_types"] = semantic_types - column_metadata["name"] = str(column_name) - target_columns_metadata.append(column_metadata) - - return target_columns_metadata - - -SKQuadraticDiscriminantAnalysis.__doc__ = QuadraticDiscriminantAnalysis.__doc__ \ No newline at end of file diff --git a/common-primitives/sklearn-wrap/sklearn_wrap/SKQuantileTransformer.py b/common-primitives/sklearn-wrap/sklearn_wrap/SKQuantileTransformer.py deleted file mode 100644 index e077dd2..0000000 --- a/common-primitives/sklearn-wrap/sklearn_wrap/SKQuantileTransformer.py +++ /dev/null @@ -1,364 +0,0 @@ -from typing import Any, Callable, List, Dict, Union, Optional, Sequence, Tuple -from numpy import ndarray -from collections import OrderedDict -from scipy import sparse -import os -import sklearn -import numpy -import typing - -# Custom import commands if any -from sklearn.preprocessing.data import QuantileTransformer - - -from d3m.container.numpy import ndarray as d3m_ndarray -from d3m.container import DataFrame as d3m_dataframe -from d3m.metadata import hyperparams, params, base as metadata_base -from d3m import utils -from d3m.base import utils as base_utils -from d3m.exceptions import PrimitiveNotFittedError -from d3m.primitive_interfaces.base import CallResult, DockerContainer -from d3m.primitive_interfaces.unsupervised_learning import UnsupervisedLearnerPrimitiveBase - - -Inputs = d3m_dataframe -Outputs = d3m_dataframe - - -class Params(params.Params): - quantiles_: Optional[ndarray] - references_: Optional[ndarray] - input_column_names: Optional[Any] - target_names_: Optional[Sequence[Any]] - training_indices_: Optional[Sequence[int]] - target_column_indices_: Optional[Sequence[int]] - target_columns_metadata_: Optional[List[OrderedDict]] - - - -class Hyperparams(hyperparams.Hyperparams): - n_quantiles = hyperparams.UniformInt( - default=1000, - lower=100, - upper=10000, - description='Number of quantiles to be computed. It corresponds to the number of landmarks used to discretize the cumulative distribution function.', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] - ) - output_distribution = hyperparams.Enumeration[str]( - default='uniform', - values=['uniform', 'normal'], - description='Marginal distribution for the transformed data. The choices are \'uniform\' (default) or \'normal\'.', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] - ) - ignore_implicit_zeros = hyperparams.UniformBool( - default=False, - description='Only applies to sparse matrices. If True, the sparse entries of the matrix are discarded to compute the quantile statistics. If False, these entries are treated as zeros.', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] - ) - subsample = hyperparams.Bounded[float]( - default=100000.0, - lower=1000.0, - upper=100000.0, - description='Maximum number of samples used to estimate the quantiles for computational efficiency. Note that the subsampling procedure may differ for value-identical sparse and dense matrices.', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] - ) - - use_columns = hyperparams.Set( - elements=hyperparams.Hyperparameter[int](-1), - default=(), - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="A set of column indices to force primitive to operate on. If any specified column cannot be parsed, it is skipped.", - ) - exclude_columns = hyperparams.Set( - elements=hyperparams.Hyperparameter[int](-1), - default=(), - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="A set of column indices to not operate on. Applicable only if \"use_columns\" is not provided.", - ) - return_result = hyperparams.Enumeration( - values=['append', 'replace', 'new'], - default='new', - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="Should parsed columns be appended, should they replace original columns, or should only parsed columns be returned? This hyperparam is ignored if use_semantic_types is set to false.", - ) - use_semantic_types = hyperparams.UniformBool( - default=False, - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="Controls whether semantic_types metadata will be used for filtering columns in input dataframe. Setting this to false makes the code ignore return_result and will produce only the output dataframe" - ) - add_index_columns = hyperparams.UniformBool( - default=False, - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="Also include primary index columns if input data has them. Applicable only if \"return_result\" is set to \"new\".", - ) - error_on_no_input = hyperparams.UniformBool( - default=True, - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="Throw an exception if no input column is selected/provided. Defaults to true to behave like sklearn. To prevent pipelines from breaking set this to False.", - ) - - return_semantic_type = hyperparams.Enumeration[str]( - values=['https://metadata.datadrivendiscovery.org/types/Attribute', 'https://metadata.datadrivendiscovery.org/types/ConstructedAttribute'], - default='https://metadata.datadrivendiscovery.org/types/Attribute', - description='Decides what semantic type to attach to generated attributes', - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'] - ) - -class SKQuantileTransformer(UnsupervisedLearnerPrimitiveBase[Inputs, Outputs, Params, Hyperparams]): - """ - Primitive wrapping for sklearn QuantileTransformer - `sklearn documentation `_ - - """ - - __author__ = "JPL MARVIN" - metadata = metadata_base.PrimitiveMetadata({ - "algorithm_types": [metadata_base.PrimitiveAlgorithmType.DATA_CONVERSION, ], - "name": "sklearn.preprocessing.data.QuantileTransformer", - "primitive_family": metadata_base.PrimitiveFamily.DATA_PREPROCESSING, - "python_path": "d3m.primitives.data_preprocessing.quantile_transformer.SKlearn", - "source": {'name': 'JPL', 'contact': 'mailto:shah@jpl.nasa.gov', 'uris': ['https://gitlab.com/datadrivendiscovery/sklearn-wrap/issues', 'https://scikit-learn.org/stable/modules/generated/sklearn.preprocessing.QuantileTransformer.html']}, - "version": "2019.11.13", - "id": "54c5e71f-0909-400b-ae65-b33631e7648f", - "hyperparams_to_tune": ['n_quantiles', 'output_distribution'], - 'installation': [ - {'type': metadata_base.PrimitiveInstallationType.PIP, - 'package_uri': 'git+https://gitlab.com/datadrivendiscovery/sklearn-wrap.git@{git_commit}#egg=sklearn_wrap'.format( - git_commit=utils.current_git_commit(os.path.dirname(__file__)), - ), - }] - }) - - def __init__(self, *, - hyperparams: Hyperparams, - random_seed: int = 0, - docker_containers: Dict[str, DockerContainer] = None) -> None: - - super().__init__(hyperparams=hyperparams, random_seed=random_seed, docker_containers=docker_containers) - - # False - self._clf = QuantileTransformer( - n_quantiles=self.hyperparams['n_quantiles'], - output_distribution=self.hyperparams['output_distribution'], - ignore_implicit_zeros=self.hyperparams['ignore_implicit_zeros'], - subsample=self.hyperparams['subsample'], - random_state=self.random_seed, - ) - - self._inputs = None - self._outputs = None - self._training_inputs = None - self._training_outputs = None - self._target_names = None - self._training_indices = None - self._target_column_indices = None - self._target_columns_metadata: List[OrderedDict] = None - self._input_column_names = None - self._fitted = False - - - def set_training_data(self, *, inputs: Inputs) -> None: - self._inputs = inputs - self._fitted = False - - def fit(self, *, timeout: float = None, iterations: int = None) -> CallResult[None]: - if self._fitted: - return CallResult(None) - - self._training_inputs, self._training_indices = self._get_columns_to_fit(self._inputs, self.hyperparams) - self._input_column_names = self._training_inputs.columns - - if self._training_inputs is None: - return CallResult(None) - - if len(self._training_indices) > 0: - self._clf.fit(self._training_inputs) - self._fitted = True - else: - if self.hyperparams['error_on_no_input']: - raise RuntimeError("No input columns were selected") - self.logger.warn("No input columns were selected") - return CallResult(None) - - def produce(self, *, inputs: Inputs, timeout: float = None, iterations: int = None) -> CallResult[Outputs]: - if not self._fitted: - raise PrimitiveNotFittedError("Primitive not fitted.") - sk_inputs = inputs - if self.hyperparams['use_semantic_types']: - sk_inputs = inputs.iloc[:, self._training_indices] - output_columns = [] - if len(self._training_indices) > 0: - sk_output = self._clf.transform(sk_inputs) - if sparse.issparse(sk_output): - sk_output = sk_output.toarray() - outputs = self._wrap_predictions(inputs, sk_output) - if len(outputs.columns) == len(self._input_column_names): - outputs.columns = self._input_column_names - output_columns = [outputs] - else: - if self.hyperparams['error_on_no_input']: - raise RuntimeError("No input columns were selected") - self.logger.warn("No input columns were selected") - outputs = base_utils.combine_columns(return_result=self.hyperparams['return_result'], - add_index_columns=self.hyperparams['add_index_columns'], - inputs=inputs, column_indices=self._training_indices, - columns_list=output_columns) - return CallResult(outputs) - - - def get_params(self) -> Params: - if not self._fitted: - return Params( - quantiles_=None, - references_=None, - input_column_names=self._input_column_names, - training_indices_=self._training_indices, - target_names_=self._target_names, - target_column_indices_=self._target_column_indices, - target_columns_metadata_=self._target_columns_metadata - ) - - return Params( - quantiles_=getattr(self._clf, 'quantiles_', None), - references_=getattr(self._clf, 'references_', None), - input_column_names=self._input_column_names, - training_indices_=self._training_indices, - target_names_=self._target_names, - target_column_indices_=self._target_column_indices, - target_columns_metadata_=self._target_columns_metadata - ) - - def set_params(self, *, params: Params) -> None: - self._clf.quantiles_ = params['quantiles_'] - self._clf.references_ = params['references_'] - self._input_column_names = params['input_column_names'] - self._training_indices = params['training_indices_'] - self._target_names = params['target_names_'] - self._target_column_indices = params['target_column_indices_'] - self._target_columns_metadata = params['target_columns_metadata_'] - - if params['quantiles_'] is not None: - self._fitted = True - if params['references_'] is not None: - self._fitted = True - - - - - - @classmethod - def _get_columns_to_fit(cls, inputs: Inputs, hyperparams: Hyperparams): - if not hyperparams['use_semantic_types']: - return inputs, list(range(len(inputs.columns))) - - inputs_metadata = inputs.metadata - - def can_produce_column(column_index: int) -> bool: - return cls._can_produce_column(inputs_metadata, column_index, hyperparams) - - columns_to_produce, columns_not_to_produce = base_utils.get_columns_to_use(inputs_metadata, - use_columns=hyperparams['use_columns'], - exclude_columns=hyperparams['exclude_columns'], - can_use_column=can_produce_column) - return inputs.iloc[:, columns_to_produce], columns_to_produce - # return columns_to_produce - - @classmethod - def _can_produce_column(cls, inputs_metadata: metadata_base.DataMetadata, column_index: int, hyperparams: Hyperparams) -> bool: - column_metadata = inputs_metadata.query((metadata_base.ALL_ELEMENTS, column_index)) - - accepted_structural_types = (int, float, numpy.integer, numpy.float64) - accepted_semantic_types = set() - accepted_semantic_types.add("https://metadata.datadrivendiscovery.org/types/Attribute") - if not issubclass(column_metadata['structural_type'], accepted_structural_types): - return False - - semantic_types = set(column_metadata.get('semantic_types', [])) - - if len(semantic_types) == 0: - cls.logger.warning("No semantic types found in column metadata") - return False - - # Making sure all accepted_semantic_types are available in semantic_types - if len(accepted_semantic_types - semantic_types) == 0: - return True - - return False - - - @classmethod - def _get_target_columns_metadata(cls, outputs_metadata: metadata_base.DataMetadata, hyperparams) -> List[OrderedDict]: - outputs_length = outputs_metadata.query((metadata_base.ALL_ELEMENTS,))['dimension']['length'] - - target_columns_metadata: List[OrderedDict] = [] - for column_index in range(outputs_length): - column_metadata = OrderedDict(outputs_metadata.query_column(column_index)) - - # Update semantic types and prepare it for predicted targets. - semantic_types = set(column_metadata.get('semantic_types', [])) - semantic_types_to_remove = set([]) - add_semantic_types = [] - add_semantic_types.add(hyperparams["return_semantic_type"]) - semantic_types = semantic_types - semantic_types_to_remove - semantic_types = semantic_types.union(add_semantic_types) - column_metadata['semantic_types'] = list(semantic_types) - - target_columns_metadata.append(column_metadata) - - return target_columns_metadata - - @classmethod - def _update_predictions_metadata(cls, inputs_metadata: metadata_base.DataMetadata, outputs: Optional[Outputs], - target_columns_metadata: List[OrderedDict]) -> metadata_base.DataMetadata: - outputs_metadata = metadata_base.DataMetadata().generate(value=outputs) - - for column_index, column_metadata in enumerate(target_columns_metadata): - column_metadata.pop("structural_type", None) - outputs_metadata = outputs_metadata.update_column(column_index, column_metadata) - - return outputs_metadata - - def _wrap_predictions(self, inputs: Inputs, predictions: ndarray) -> Outputs: - outputs = d3m_dataframe(predictions, generate_metadata=True) - target_columns_metadata = self._copy_inputs_metadata(inputs.metadata, self._training_indices, outputs.metadata, self.hyperparams) - outputs.metadata = self._update_predictions_metadata(inputs.metadata, outputs, target_columns_metadata) - return outputs - - - @classmethod - def _copy_inputs_metadata(cls, inputs_metadata: metadata_base.DataMetadata, input_indices: List[int], - outputs_metadata: metadata_base.DataMetadata, hyperparams): - outputs_length = outputs_metadata.query((metadata_base.ALL_ELEMENTS,))['dimension']['length'] - target_columns_metadata: List[OrderedDict] = [] - for column_index in input_indices: - column_name = inputs_metadata.query((metadata_base.ALL_ELEMENTS, column_index)).get("name") - if column_name is None: - column_name = "output_{}".format(column_index) - - column_metadata = OrderedDict(inputs_metadata.query_column(column_index)) - semantic_types = set(column_metadata.get('semantic_types', [])) - semantic_types_to_remove = set([]) - add_semantic_types = set() - add_semantic_types.add(hyperparams["return_semantic_type"]) - semantic_types = semantic_types - semantic_types_to_remove - semantic_types = semantic_types.union(add_semantic_types) - column_metadata['semantic_types'] = list(semantic_types) - - column_metadata["name"] = str(column_name) - target_columns_metadata.append(column_metadata) - - # If outputs has more columns than index, add Attribute Type to all remaining - if outputs_length > len(input_indices): - for column_index in range(len(input_indices), outputs_length): - column_metadata = OrderedDict() - semantic_types = set() - semantic_types.add(hyperparams["return_semantic_type"]) - column_name = "output_{}".format(column_index) - column_metadata["semantic_types"] = list(semantic_types) - column_metadata["name"] = str(column_name) - target_columns_metadata.append(column_metadata) - - return target_columns_metadata - - -SKQuantileTransformer.__doc__ = QuantileTransformer.__doc__ \ No newline at end of file diff --git a/common-primitives/sklearn-wrap/sklearn_wrap/SKRBFSampler.py b/common-primitives/sklearn-wrap/sklearn_wrap/SKRBFSampler.py deleted file mode 100644 index 03cd11c..0000000 --- a/common-primitives/sklearn-wrap/sklearn_wrap/SKRBFSampler.py +++ /dev/null @@ -1,349 +0,0 @@ -from typing import Any, Callable, List, Dict, Union, Optional, Sequence, Tuple -from numpy import ndarray -from collections import OrderedDict -from scipy import sparse -import os -import sklearn -import numpy -import typing - -# Custom import commands if any -from sklearn.kernel_approximation import RBFSampler - - -from d3m.container.numpy import ndarray as d3m_ndarray -from d3m.container import DataFrame as d3m_dataframe -from d3m.metadata import hyperparams, params, base as metadata_base -from d3m import utils -from d3m.base import utils as base_utils -from d3m.exceptions import PrimitiveNotFittedError -from d3m.primitive_interfaces.base import CallResult, DockerContainer -from d3m.primitive_interfaces.unsupervised_learning import UnsupervisedLearnerPrimitiveBase - - -Inputs = d3m_dataframe -Outputs = d3m_dataframe - - -class Params(params.Params): - random_weights_: Optional[ndarray] - random_offset_: Optional[ndarray] - input_column_names: Optional[Any] - target_names_: Optional[Sequence[Any]] - training_indices_: Optional[Sequence[int]] - target_column_indices_: Optional[Sequence[int]] - target_columns_metadata_: Optional[List[OrderedDict]] - - - -class Hyperparams(hyperparams.Hyperparams): - gamma = hyperparams.Hyperparameter[float]( - default=1, - description='Parameter of RBF kernel: exp(-gamma * x^2)', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] - ) - n_components = hyperparams.Bounded[int]( - lower=0, - upper=None, - default=100, - description='Number of Monte Carlo samples per original feature. Equals the dimensionality of the computed feature space.', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] - ) - - use_columns = hyperparams.Set( - elements=hyperparams.Hyperparameter[int](-1), - default=(), - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="A set of column indices to force primitive to operate on. If any specified column cannot be parsed, it is skipped.", - ) - exclude_columns = hyperparams.Set( - elements=hyperparams.Hyperparameter[int](-1), - default=(), - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="A set of column indices to not operate on. Applicable only if \"use_columns\" is not provided.", - ) - return_result = hyperparams.Enumeration( - values=['append', 'replace', 'new'], - default='new', - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="Should parsed columns be appended, should they replace original columns, or should only parsed columns be returned? This hyperparam is ignored if use_semantic_types is set to false.", - ) - use_semantic_types = hyperparams.UniformBool( - default=False, - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="Controls whether semantic_types metadata will be used for filtering columns in input dataframe. Setting this to false makes the code ignore return_result and will produce only the output dataframe" - ) - add_index_columns = hyperparams.UniformBool( - default=False, - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="Also include primary index columns if input data has them. Applicable only if \"return_result\" is set to \"new\".", - ) - error_on_no_input = hyperparams.UniformBool( - default=True, - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="Throw an exception if no input column is selected/provided. Defaults to true to behave like sklearn. To prevent pipelines from breaking set this to False.", - ) - - return_semantic_type = hyperparams.Enumeration[str]( - values=['https://metadata.datadrivendiscovery.org/types/Attribute', 'https://metadata.datadrivendiscovery.org/types/ConstructedAttribute'], - default='https://metadata.datadrivendiscovery.org/types/Attribute', - description='Decides what semantic type to attach to generated attributes', - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'] - ) - -class SKRBFSampler(UnsupervisedLearnerPrimitiveBase[Inputs, Outputs, Params, Hyperparams]): - """ - Primitive wrapping for sklearn RBFSampler - `sklearn documentation `_ - - """ - - __author__ = "JPL MARVIN" - metadata = metadata_base.PrimitiveMetadata({ - "algorithm_types": [metadata_base.PrimitiveAlgorithmType.KERNEL_METHOD, ], - "name": "sklearn.kernel_approximation.RBFSampler", - "primitive_family": metadata_base.PrimitiveFamily.DATA_PREPROCESSING, - "python_path": "d3m.primitives.data_preprocessing.rbf_sampler.SKlearn", - "source": {'name': 'JPL', 'contact': 'mailto:shah@jpl.nasa.gov', 'uris': ['https://gitlab.com/datadrivendiscovery/sklearn-wrap/issues', 'https://scikit-learn.org/stable/modules/generated/sklearn.kernel_approximation.RBFSampler.html']}, - "version": "2019.11.13", - "id": "0823123d-45a3-3dc8-9ef1-ff643236993a", - "hyperparams_to_tune": ['gamma', 'n_components'], - 'installation': [ - {'type': metadata_base.PrimitiveInstallationType.PIP, - 'package_uri': 'git+https://gitlab.com/datadrivendiscovery/sklearn-wrap.git@{git_commit}#egg=sklearn_wrap'.format( - git_commit=utils.current_git_commit(os.path.dirname(__file__)), - ), - }] - }) - - def __init__(self, *, - hyperparams: Hyperparams, - random_seed: int = 0, - docker_containers: Dict[str, DockerContainer] = None) -> None: - - super().__init__(hyperparams=hyperparams, random_seed=random_seed, docker_containers=docker_containers) - - # False - self._clf = RBFSampler( - gamma=self.hyperparams['gamma'], - n_components=self.hyperparams['n_components'], - random_state=self.random_seed, - ) - - self._inputs = None - self._outputs = None - self._training_inputs = None - self._training_outputs = None - self._target_names = None - self._training_indices = None - self._target_column_indices = None - self._target_columns_metadata: List[OrderedDict] = None - self._input_column_names = None - self._fitted = False - - - def set_training_data(self, *, inputs: Inputs) -> None: - self._inputs = inputs - self._fitted = False - - def fit(self, *, timeout: float = None, iterations: int = None) -> CallResult[None]: - if self._fitted: - return CallResult(None) - - self._training_inputs, self._training_indices = self._get_columns_to_fit(self._inputs, self.hyperparams) - self._input_column_names = self._training_inputs.columns - - if self._training_inputs is None: - return CallResult(None) - - if len(self._training_indices) > 0: - self._clf.fit(self._training_inputs) - self._fitted = True - else: - if self.hyperparams['error_on_no_input']: - raise RuntimeError("No input columns were selected") - self.logger.warn("No input columns were selected") - return CallResult(None) - - def produce(self, *, inputs: Inputs, timeout: float = None, iterations: int = None) -> CallResult[Outputs]: - if not self._fitted: - raise PrimitiveNotFittedError("Primitive not fitted.") - sk_inputs = inputs - if self.hyperparams['use_semantic_types']: - sk_inputs = inputs.iloc[:, self._training_indices] - output_columns = [] - if len(self._training_indices) > 0: - sk_output = self._clf.transform(sk_inputs) - if sparse.issparse(sk_output): - sk_output = sk_output.toarray() - outputs = self._wrap_predictions(inputs, sk_output) - if len(outputs.columns) == len(self._input_column_names): - outputs.columns = self._input_column_names - output_columns = [outputs] - else: - if self.hyperparams['error_on_no_input']: - raise RuntimeError("No input columns were selected") - self.logger.warn("No input columns were selected") - outputs = base_utils.combine_columns(return_result=self.hyperparams['return_result'], - add_index_columns=self.hyperparams['add_index_columns'], - inputs=inputs, column_indices=self._training_indices, - columns_list=output_columns) - return CallResult(outputs) - - - def get_params(self) -> Params: - if not self._fitted: - return Params( - random_weights_=None, - random_offset_=None, - input_column_names=self._input_column_names, - training_indices_=self._training_indices, - target_names_=self._target_names, - target_column_indices_=self._target_column_indices, - target_columns_metadata_=self._target_columns_metadata - ) - - return Params( - random_weights_=getattr(self._clf, 'random_weights_', None), - random_offset_=getattr(self._clf, 'random_offset_', None), - input_column_names=self._input_column_names, - training_indices_=self._training_indices, - target_names_=self._target_names, - target_column_indices_=self._target_column_indices, - target_columns_metadata_=self._target_columns_metadata - ) - - def set_params(self, *, params: Params) -> None: - self._clf.random_weights_ = params['random_weights_'] - self._clf.random_offset_ = params['random_offset_'] - self._input_column_names = params['input_column_names'] - self._training_indices = params['training_indices_'] - self._target_names = params['target_names_'] - self._target_column_indices = params['target_column_indices_'] - self._target_columns_metadata = params['target_columns_metadata_'] - - if params['random_weights_'] is not None: - self._fitted = True - if params['random_offset_'] is not None: - self._fitted = True - - - - - - @classmethod - def _get_columns_to_fit(cls, inputs: Inputs, hyperparams: Hyperparams): - if not hyperparams['use_semantic_types']: - return inputs, list(range(len(inputs.columns))) - - inputs_metadata = inputs.metadata - - def can_produce_column(column_index: int) -> bool: - return cls._can_produce_column(inputs_metadata, column_index, hyperparams) - - columns_to_produce, columns_not_to_produce = base_utils.get_columns_to_use(inputs_metadata, - use_columns=hyperparams['use_columns'], - exclude_columns=hyperparams['exclude_columns'], - can_use_column=can_produce_column) - return inputs.iloc[:, columns_to_produce], columns_to_produce - # return columns_to_produce - - @classmethod - def _can_produce_column(cls, inputs_metadata: metadata_base.DataMetadata, column_index: int, hyperparams: Hyperparams) -> bool: - column_metadata = inputs_metadata.query((metadata_base.ALL_ELEMENTS, column_index)) - - accepted_structural_types = (int, float, numpy.integer, numpy.float64) - accepted_semantic_types = set() - accepted_semantic_types.add("https://metadata.datadrivendiscovery.org/types/Attribute") - if not issubclass(column_metadata['structural_type'], accepted_structural_types): - return False - - semantic_types = set(column_metadata.get('semantic_types', [])) - - if len(semantic_types) == 0: - cls.logger.warning("No semantic types found in column metadata") - return False - - # Making sure all accepted_semantic_types are available in semantic_types - if len(accepted_semantic_types - semantic_types) == 0: - return True - - return False - - - @classmethod - def _get_target_columns_metadata(cls, outputs_metadata: metadata_base.DataMetadata, hyperparams) -> List[OrderedDict]: - outputs_length = outputs_metadata.query((metadata_base.ALL_ELEMENTS,))['dimension']['length'] - - target_columns_metadata: List[OrderedDict] = [] - for column_index in range(outputs_length): - column_metadata = OrderedDict(outputs_metadata.query_column(column_index)) - - # Update semantic types and prepare it for predicted targets. - semantic_types = set(column_metadata.get('semantic_types', [])) - semantic_types_to_remove = set([]) - add_semantic_types = [] - add_semantic_types.add(hyperparams["return_semantic_type"]) - semantic_types = semantic_types - semantic_types_to_remove - semantic_types = semantic_types.union(add_semantic_types) - column_metadata['semantic_types'] = list(semantic_types) - - target_columns_metadata.append(column_metadata) - - return target_columns_metadata - - @classmethod - def _update_predictions_metadata(cls, inputs_metadata: metadata_base.DataMetadata, outputs: Optional[Outputs], - target_columns_metadata: List[OrderedDict]) -> metadata_base.DataMetadata: - outputs_metadata = metadata_base.DataMetadata().generate(value=outputs) - - for column_index, column_metadata in enumerate(target_columns_metadata): - column_metadata.pop("structural_type", None) - outputs_metadata = outputs_metadata.update_column(column_index, column_metadata) - - return outputs_metadata - - def _wrap_predictions(self, inputs: Inputs, predictions: ndarray) -> Outputs: - outputs = d3m_dataframe(predictions, generate_metadata=True) - target_columns_metadata = self._copy_inputs_metadata(inputs.metadata, self._training_indices, outputs.metadata, self.hyperparams) - outputs.metadata = self._update_predictions_metadata(inputs.metadata, outputs, target_columns_metadata) - return outputs - - - @classmethod - def _copy_inputs_metadata(cls, inputs_metadata: metadata_base.DataMetadata, input_indices: List[int], - outputs_metadata: metadata_base.DataMetadata, hyperparams): - outputs_length = outputs_metadata.query((metadata_base.ALL_ELEMENTS,))['dimension']['length'] - target_columns_metadata: List[OrderedDict] = [] - for column_index in input_indices: - column_name = inputs_metadata.query((metadata_base.ALL_ELEMENTS, column_index)).get("name") - if column_name is None: - column_name = "output_{}".format(column_index) - - column_metadata = OrderedDict(inputs_metadata.query_column(column_index)) - semantic_types = set(column_metadata.get('semantic_types', [])) - semantic_types_to_remove = set([]) - add_semantic_types = set() - add_semantic_types.add(hyperparams["return_semantic_type"]) - semantic_types = semantic_types - semantic_types_to_remove - semantic_types = semantic_types.union(add_semantic_types) - column_metadata['semantic_types'] = list(semantic_types) - - column_metadata["name"] = str(column_name) - target_columns_metadata.append(column_metadata) - - # If outputs has more columns than index, add Attribute Type to all remaining - if outputs_length > len(input_indices): - for column_index in range(len(input_indices), outputs_length): - column_metadata = OrderedDict() - semantic_types = set() - semantic_types.add(hyperparams["return_semantic_type"]) - column_name = "output_{}".format(column_index) - column_metadata["semantic_types"] = list(semantic_types) - column_metadata["name"] = str(column_name) - target_columns_metadata.append(column_metadata) - - return target_columns_metadata - - -SKRBFSampler.__doc__ = RBFSampler.__doc__ \ No newline at end of file diff --git a/common-primitives/sklearn-wrap/sklearn_wrap/SKRandomForestClassifier.py b/common-primitives/sklearn-wrap/sklearn_wrap/SKRandomForestClassifier.py deleted file mode 100644 index ddef232..0000000 --- a/common-primitives/sklearn-wrap/sklearn_wrap/SKRandomForestClassifier.py +++ /dev/null @@ -1,682 +0,0 @@ -from typing import Any, Callable, List, Dict, Union, Optional, Sequence, Tuple -from numpy import ndarray -from collections import OrderedDict -from scipy import sparse -import os -import sklearn -import numpy -import typing - -# Custom import commands if any -from sklearn.ensemble.forest import RandomForestClassifier - - -from d3m.container.numpy import ndarray as d3m_ndarray -from d3m.container import DataFrame as d3m_dataframe -from d3m.metadata import hyperparams, params, base as metadata_base -from d3m import utils -from d3m.base import utils as base_utils -from d3m.exceptions import PrimitiveNotFittedError -from d3m.primitive_interfaces.base import CallResult, DockerContainer - -from d3m.primitive_interfaces.supervised_learning import SupervisedLearnerPrimitiveBase -from d3m.primitive_interfaces.base import ProbabilisticCompositionalityMixin, ContinueFitMixin -from d3m import exceptions -import pandas - - - -Inputs = d3m_dataframe -Outputs = d3m_dataframe - - -class Params(params.Params): - estimators_: Optional[List[sklearn.tree.DecisionTreeClassifier]] - classes_: Optional[Union[ndarray, List[ndarray]]] - n_classes_: Optional[Union[int, List[int]]] - n_features_: Optional[int] - n_outputs_: Optional[int] - oob_score_: Optional[float] - oob_decision_function_: Optional[ndarray] - base_estimator_: Optional[object] - estimator_params: Optional[tuple] - base_estimator: Optional[object] - input_column_names: Optional[Any] - target_names_: Optional[Sequence[Any]] - training_indices_: Optional[Sequence[int]] - target_column_indices_: Optional[Sequence[int]] - target_columns_metadata_: Optional[List[OrderedDict]] - - - -class Hyperparams(hyperparams.Hyperparams): - n_estimators = hyperparams.Bounded[int]( - default=10, - lower=1, - upper=None, - description='The number of trees in the forest.', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] - ) - criterion = hyperparams.Enumeration[str]( - values=['gini', 'entropy'], - default='gini', - description='The function to measure the quality of a split. Supported criteria are "gini" for the Gini impurity and "entropy" for the information gain. Note: this parameter is tree-specific.', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] - ) - max_features = hyperparams.Union( - configuration=OrderedDict({ - 'specified_int': hyperparams.Bounded[int]( - lower=0, - upper=None, - default=0, - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'], - ), - 'calculated': hyperparams.Enumeration[str]( - values=['auto', 'sqrt', 'log2'], - default='auto', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'], - ), - 'none': hyperparams.Constant( - default=None, - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'], - ), - 'percent': hyperparams.Uniform( - default=0.25, - lower=0, - upper=1, - lower_inclusive=True, - upper_inclusive=False, - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'], - ) - }), - default='calculated', - description='The number of features to consider when looking for the best split: - If int, then consider `max_features` features at each split. - If float, then `max_features` is a percentage and `int(max_features * n_features)` features are considered at each split. - If "auto", then `max_features=sqrt(n_features)`. - If "sqrt", then `max_features=sqrt(n_features)` (same as "auto"). - If "log2", then `max_features=log2(n_features)`. - If None, then `max_features=n_features`. Note: the search for a split does not stop until at least one valid partition of the node samples is found, even if it requires to effectively inspect more than ``max_features`` features.', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] - ) - max_depth = hyperparams.Union( - configuration=OrderedDict({ - 'int': hyperparams.Bounded[int]( - default=10, - lower=0, - upper=None, - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'], - ), - 'none': hyperparams.Constant( - default=None, - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'], - ) - }), - default='none', - description='The maximum depth of the tree. If None, then nodes are expanded until all leaves are pure or until all leaves contain less than min_samples_split samples.', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] - ) - min_samples_split = hyperparams.Union( - configuration=OrderedDict({ - 'absolute': hyperparams.Bounded[int]( - default=2, - lower=1, - upper=None, - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'], - ), - 'percent': hyperparams.Uniform( - default=0.25, - lower=0, - upper=1, - lower_inclusive=False, - upper_inclusive=True, - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'], - ) - }), - default='absolute', - description='The minimum number of samples required to split an internal node: - If int, then consider `min_samples_split` as the minimum number. - If float, then `min_samples_split` is a percentage and `ceil(min_samples_split * n_samples)` are the minimum number of samples for each split. .. versionchanged:: 0.18 Added float values for percentages.', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] - ) - min_samples_leaf = hyperparams.Union( - configuration=OrderedDict({ - 'absolute': hyperparams.Bounded[int]( - default=1, - lower=1, - upper=None, - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'], - ), - 'percent': hyperparams.Uniform( - default=0.25, - lower=0, - upper=0.5, - lower_inclusive=False, - upper_inclusive=True, - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'], - ) - }), - default='absolute', - description='The minimum number of samples required to be at a leaf node: - If int, then consider `min_samples_leaf` as the minimum number. - If float, then `min_samples_leaf` is a percentage and `ceil(min_samples_leaf * n_samples)` are the minimum number of samples for each node. .. versionchanged:: 0.18 Added float values for percentages.', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] - ) - min_weight_fraction_leaf = hyperparams.Uniform( - default=0, - lower=0, - upper=0.5, - upper_inclusive=True, - description='The minimum weighted fraction of the sum total of weights (of all the input samples) required to be at a leaf node. Samples have equal weight when sample_weight is not provided.', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] - ) - max_leaf_nodes = hyperparams.Union( - configuration=OrderedDict({ - 'int': hyperparams.Bounded[int]( - default=10, - lower=0, - upper=None, - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'], - ), - 'none': hyperparams.Constant( - default=None, - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'], - ) - }), - default='none', - description='Grow trees with ``max_leaf_nodes`` in best-first fashion. Best nodes are defined as relative reduction in impurity. If None then unlimited number of leaf nodes.', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] - ) - min_impurity_decrease = hyperparams.Bounded[float]( - default=0.0, - lower=0.0, - upper=None, - description='A node will be split if this split induces a decrease of the impurity greater than or equal to this value. The weighted impurity decrease equation is the following:: N_t / N * (impurity - N_t_R / N_t * right_impurity - N_t_L / N_t * left_impurity) where ``N`` is the total number of samples, ``N_t`` is the number of samples at the current node, ``N_t_L`` is the number of samples in the left child, and ``N_t_R`` is the number of samples in the right child. ``N``, ``N_t``, ``N_t_R`` and ``N_t_L`` all refer to the weighted sum, if ``sample_weight`` is passed. .. versionadded:: 0.19 ', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] - ) - bootstrap = hyperparams.Enumeration[str]( - values=['bootstrap', 'bootstrap_with_oob_score', 'disabled'], - default='bootstrap', - description='Whether bootstrap samples are used when building trees.' - ' And whether to use out-of-bag samples to estimate the generalization accuracy.', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'], - ) - n_jobs = hyperparams.Union( - configuration=OrderedDict({ - 'limit': hyperparams.Bounded[int]( - default=1, - lower=1, - upper=None, - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'], - ), - 'all_cores': hyperparams.Constant( - default=-1, - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'], - ) - }), - default='limit', - description='The number of jobs to run in parallel for both `fit` and `predict`. If -1, then the number of jobs is set to the number of cores.', - semantic_types=['https://metadata.datadrivendiscovery.org/types/ResourcesUseParameter'] - ) - warm_start = hyperparams.UniformBool( - default=False, - description='When set to ``True``, reuse the solution of the previous call to fit and add more estimators to the ensemble, otherwise, just fit a whole new forest.', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] - ) - class_weight = hyperparams.Union( - configuration=OrderedDict({ - 'str': hyperparams.Enumeration[str]( - default='balanced', - values=['balanced', 'balanced_subsample'], - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'], - ), - 'none': hyperparams.Constant( - default=None, - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'], - ) - }), - default='none', - description='"balanced_subsample" or None, optional (default=None) Weights associated with classes in the form ``{class_label: weight}``. If not given, all classes are supposed to have weight one. For multi-output problems, a list of dicts can be provided in the same order as the columns of y. The "balanced" mode uses the values of y to automatically adjust weights inversely proportional to class frequencies in the input data as ``n_samples / (n_classes * np.bincount(y))`` The "balanced_subsample" mode is the same as "balanced" except that weights are computed based on the bootstrap sample for every tree grown. For multi-output, the weights of each column of y will be multiplied. Note that these weights will be multiplied with sample_weight (passed through the fit method) if sample_weight is specified.', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] - ) - - use_inputs_columns = hyperparams.Set( - elements=hyperparams.Hyperparameter[int](-1), - default=(), - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="A set of column indices to force primitive to use as training input. If any specified column cannot be parsed, it is skipped.", - ) - use_outputs_columns = hyperparams.Set( - elements=hyperparams.Hyperparameter[int](-1), - default=(), - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="A set of column indices to force primitive to use as training target. If any specified column cannot be parsed, it is skipped.", - ) - exclude_inputs_columns = hyperparams.Set( - elements=hyperparams.Hyperparameter[int](-1), - default=(), - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="A set of column indices to not use as training inputs. Applicable only if \"use_columns\" is not provided.", - ) - exclude_outputs_columns = hyperparams.Set( - elements=hyperparams.Hyperparameter[int](-1), - default=(), - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="A set of column indices to not use as training target. Applicable only if \"use_columns\" is not provided.", - ) - return_result = hyperparams.Enumeration( - values=['append', 'replace', 'new'], - default='new', - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="Should parsed columns be appended, should they replace original columns, or should only parsed columns be returned? This hyperparam is ignored if use_semantic_types is set to false.", - ) - use_semantic_types = hyperparams.UniformBool( - default=False, - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="Controls whether semantic_types metadata will be used for filtering columns in input dataframe. Setting this to false makes the code ignore return_result and will produce only the output dataframe" - ) - add_index_columns = hyperparams.UniformBool( - default=False, - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="Also include primary index columns if input data has them. Applicable only if \"return_result\" is set to \"new\".", - ) - error_on_no_input = hyperparams.UniformBool( - default=True, - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="Throw an exception if no input column is selected/provided. Defaults to true to behave like sklearn. To prevent pipelines from breaking set this to False.", - ) - - return_semantic_type = hyperparams.Enumeration[str]( - values=['https://metadata.datadrivendiscovery.org/types/Attribute', 'https://metadata.datadrivendiscovery.org/types/ConstructedAttribute', 'https://metadata.datadrivendiscovery.org/types/PredictedTarget'], - default='https://metadata.datadrivendiscovery.org/types/PredictedTarget', - description='Decides what semantic type to attach to generated output', - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'] - ) - -class SKRandomForestClassifier(SupervisedLearnerPrimitiveBase[Inputs, Outputs, Params, Hyperparams], - ProbabilisticCompositionalityMixin[Inputs, Outputs, Params, Hyperparams]): - """ - Primitive wrapping for sklearn RandomForestClassifier - `sklearn documentation `_ - - """ - - __author__ = "JPL MARVIN" - metadata = metadata_base.PrimitiveMetadata({ - "algorithm_types": [metadata_base.PrimitiveAlgorithmType.RANDOM_FOREST, ], - "name": "sklearn.ensemble.forest.RandomForestClassifier", - "primitive_family": metadata_base.PrimitiveFamily.CLASSIFICATION, - "python_path": "d3m.primitives.classification.random_forest.SKlearn", - "source": {'name': 'JPL', 'contact': 'mailto:shah@jpl.nasa.gov', 'uris': ['https://gitlab.com/datadrivendiscovery/sklearn-wrap/issues', 'https://scikit-learn.org/stable/modules/generated/sklearn.ensemble.RandomForestClassifier.html']}, - "version": "2019.11.13", - "id": "1dd82833-5692-39cb-84fb-2455683075f3", - "hyperparams_to_tune": ['n_estimators', 'max_depth', 'min_samples_split', 'min_samples_leaf', 'max_features'], - 'installation': [ - {'type': metadata_base.PrimitiveInstallationType.PIP, - 'package_uri': 'git+https://gitlab.com/datadrivendiscovery/sklearn-wrap.git@{git_commit}#egg=sklearn_wrap'.format( - git_commit=utils.current_git_commit(os.path.dirname(__file__)), - ), - }] - }) - - def __init__(self, *, - hyperparams: Hyperparams, - random_seed: int = 0, - docker_containers: Dict[str, DockerContainer] = None, - _verbose: int = 0) -> None: - - super().__init__(hyperparams=hyperparams, random_seed=random_seed, docker_containers=docker_containers) - - # False - self._clf = RandomForestClassifier( - n_estimators=self.hyperparams['n_estimators'], - criterion=self.hyperparams['criterion'], - max_features=self.hyperparams['max_features'], - max_depth=self.hyperparams['max_depth'], - min_samples_split=self.hyperparams['min_samples_split'], - min_samples_leaf=self.hyperparams['min_samples_leaf'], - min_weight_fraction_leaf=self.hyperparams['min_weight_fraction_leaf'], - max_leaf_nodes=self.hyperparams['max_leaf_nodes'], - min_impurity_decrease=self.hyperparams['min_impurity_decrease'], - bootstrap=self.hyperparams['bootstrap'] in ['bootstrap', 'bootstrap_with_oob_score'], - oob_score=self.hyperparams['bootstrap'] in ['bootstrap_with_oob_score'], - n_jobs=self.hyperparams['n_jobs'], - warm_start=self.hyperparams['warm_start'], - class_weight=self.hyperparams['class_weight'], - random_state=self.random_seed, - verbose=_verbose - ) - - self._inputs = None - self._outputs = None - self._training_inputs = None - self._training_outputs = None - self._target_names = None - self._training_indices = None - self._target_column_indices = None - self._target_columns_metadata: List[OrderedDict] = None - self._input_column_names = None - self._fitted = False - self._new_training_data = False - - def set_training_data(self, *, inputs: Inputs, outputs: Outputs) -> None: - self._inputs = inputs - self._outputs = outputs - self._fitted = False - self._new_training_data = True - - def fit(self, *, timeout: float = None, iterations: int = None) -> CallResult[None]: - if self._inputs is None or self._outputs is None: - raise ValueError("Missing training data.") - - if not self._new_training_data: - return CallResult(None) - self._new_training_data = False - - self._training_inputs, self._training_indices = self._get_columns_to_fit(self._inputs, self.hyperparams) - self._training_outputs, self._target_names, self._target_column_indices = self._get_targets(self._outputs, self.hyperparams) - self._input_column_names = self._training_inputs.columns - - if len(self._training_indices) > 0 and len(self._target_column_indices) > 0: - self._target_columns_metadata = self._get_target_columns_metadata(self._training_outputs.metadata, self.hyperparams) - sk_training_output = self._training_outputs.values - - shape = sk_training_output.shape - if len(shape) == 2 and shape[1] == 1: - sk_training_output = numpy.ravel(sk_training_output) - - self._clf.fit(self._training_inputs, sk_training_output) - self._fitted = True - else: - if self.hyperparams['error_on_no_input']: - raise RuntimeError("No input columns were selected") - self.logger.warn("No input columns were selected") - - return CallResult(None) - - - - def produce(self, *, inputs: Inputs, timeout: float = None, iterations: int = None) -> CallResult[Outputs]: - sk_inputs, columns_to_use = self._get_columns_to_fit(inputs, self.hyperparams) - output = [] - if len(sk_inputs.columns): - try: - sk_output = self._clf.predict(sk_inputs) - except sklearn.exceptions.NotFittedError as error: - raise PrimitiveNotFittedError("Primitive not fitted.") from error - # For primitives that allow predicting without fitting like GaussianProcessRegressor - if not self._fitted: - raise PrimitiveNotFittedError("Primitive not fitted.") - if sparse.issparse(sk_output): - sk_output = sk_output.toarray() - output = self._wrap_predictions(inputs, sk_output) - output.columns = self._target_names - output = [output] - else: - if self.hyperparams['error_on_no_input']: - raise RuntimeError("No input columns were selected") - self.logger.warn("No input columns were selected") - outputs = base_utils.combine_columns(return_result=self.hyperparams['return_result'], - add_index_columns=self.hyperparams['add_index_columns'], - inputs=inputs, column_indices=self._target_column_indices, - columns_list=output) - - return CallResult(outputs) - - - def get_params(self) -> Params: - if not self._fitted: - return Params( - estimators_=None, - classes_=None, - n_classes_=None, - n_features_=None, - n_outputs_=None, - oob_score_=None, - oob_decision_function_=None, - base_estimator_=None, - estimator_params=None, - base_estimator=None, - input_column_names=self._input_column_names, - training_indices_=self._training_indices, - target_names_=self._target_names, - target_column_indices_=self._target_column_indices, - target_columns_metadata_=self._target_columns_metadata - ) - - return Params( - estimators_=getattr(self._clf, 'estimators_', None), - classes_=getattr(self._clf, 'classes_', None), - n_classes_=getattr(self._clf, 'n_classes_', None), - n_features_=getattr(self._clf, 'n_features_', None), - n_outputs_=getattr(self._clf, 'n_outputs_', None), - oob_score_=getattr(self._clf, 'oob_score_', None), - oob_decision_function_=getattr(self._clf, 'oob_decision_function_', None), - base_estimator_=getattr(self._clf, 'base_estimator_', None), - estimator_params=getattr(self._clf, 'estimator_params', None), - base_estimator=getattr(self._clf, 'base_estimator', None), - input_column_names=self._input_column_names, - training_indices_=self._training_indices, - target_names_=self._target_names, - target_column_indices_=self._target_column_indices, - target_columns_metadata_=self._target_columns_metadata - ) - - def set_params(self, *, params: Params) -> None: - self._clf.estimators_ = params['estimators_'] - self._clf.classes_ = params['classes_'] - self._clf.n_classes_ = params['n_classes_'] - self._clf.n_features_ = params['n_features_'] - self._clf.n_outputs_ = params['n_outputs_'] - self._clf.oob_score_ = params['oob_score_'] - self._clf.oob_decision_function_ = params['oob_decision_function_'] - self._clf.base_estimator_ = params['base_estimator_'] - self._clf.estimator_params = params['estimator_params'] - self._clf.base_estimator = params['base_estimator'] - self._input_column_names = params['input_column_names'] - self._training_indices = params['training_indices_'] - self._target_names = params['target_names_'] - self._target_column_indices = params['target_column_indices_'] - self._target_columns_metadata = params['target_columns_metadata_'] - - if params['estimators_'] is not None: - self._fitted = True - if params['classes_'] is not None: - self._fitted = True - if params['n_classes_'] is not None: - self._fitted = True - if params['n_features_'] is not None: - self._fitted = True - if params['n_outputs_'] is not None: - self._fitted = True - if params['oob_score_'] is not None: - self._fitted = True - if params['oob_decision_function_'] is not None: - self._fitted = True - if params['base_estimator_'] is not None: - self._fitted = True - if params['estimator_params'] is not None: - self._fitted = True - if params['base_estimator'] is not None: - self._fitted = True - - - def log_likelihoods(self, *, - outputs: Outputs, - inputs: Inputs, - timeout: float = None, - iterations: int = None) -> CallResult[Sequence[float]]: - inputs = inputs.iloc[:, self._training_indices] # Get ndarray - outputs = outputs.iloc[:, self._target_column_indices] - - if len(inputs.columns) and len(outputs.columns): - - if outputs.shape[1] != self._clf.n_outputs_: - raise exceptions.InvalidArgumentValueError("\"outputs\" argument does not have the correct number of target columns.") - - log_proba = self._clf.predict_log_proba(inputs) - - # Making it always a list, even when only one target. - if self._clf.n_outputs_ == 1: - log_proba = [log_proba] - classes = [self._clf.classes_] - else: - classes = self._clf.classes_ - - samples_length = inputs.shape[0] - - log_likelihoods = [] - for k in range(self._clf.n_outputs_): - # We have to map each class to its internal (numerical) index used in the learner. - # This allows "outputs" to contain string classes. - outputs_column = outputs.iloc[:, k] - classes_map = pandas.Series(numpy.arange(len(classes[k])), index=classes[k]) - mapped_outputs_column = outputs_column.map(classes_map) - - # For each target column (column in "outputs"), for each sample (row) we pick the log - # likelihood for a given class. - log_likelihoods.append(log_proba[k][numpy.arange(samples_length), mapped_outputs_column]) - - results = d3m_dataframe(dict(enumerate(log_likelihoods)), generate_metadata=True) - results.columns = outputs.columns - - for k in range(self._clf.n_outputs_): - column_metadata = outputs.metadata.query_column(k) - if 'name' in column_metadata: - results.metadata = results.metadata.update_column(k, {'name': column_metadata['name']}) - - else: - results = d3m_dataframe(generate_metadata=True) - - return CallResult(results) - - - - def produce_feature_importances(self, *, timeout: float = None, iterations: int = None) -> CallResult[d3m_dataframe]: - output = d3m_dataframe(self._clf.feature_importances_.reshape((1, len(self._input_column_names)))) - output.columns = self._input_column_names - for i in range(len(self._input_column_names)): - output.metadata = output.metadata.update_column(i, {"name": self._input_column_names[i]}) - return CallResult(output) - - @classmethod - def _get_columns_to_fit(cls, inputs: Inputs, hyperparams: Hyperparams): - if not hyperparams['use_semantic_types']: - return inputs, list(range(len(inputs.columns))) - - inputs_metadata = inputs.metadata - - def can_produce_column(column_index: int) -> bool: - return cls._can_produce_column(inputs_metadata, column_index, hyperparams) - - columns_to_produce, columns_not_to_produce = base_utils.get_columns_to_use(inputs_metadata, - use_columns=hyperparams['use_inputs_columns'], - exclude_columns=hyperparams['exclude_inputs_columns'], - can_use_column=can_produce_column) - return inputs.iloc[:, columns_to_produce], columns_to_produce - # return columns_to_produce - - @classmethod - def _can_produce_column(cls, inputs_metadata: metadata_base.DataMetadata, column_index: int, hyperparams: Hyperparams) -> bool: - column_metadata = inputs_metadata.query((metadata_base.ALL_ELEMENTS, column_index)) - - accepted_structural_types = (int, float, numpy.integer, numpy.float64) - accepted_semantic_types = set() - accepted_semantic_types.add("https://metadata.datadrivendiscovery.org/types/Attribute") - if not issubclass(column_metadata['structural_type'], accepted_structural_types): - return False - - semantic_types = set(column_metadata.get('semantic_types', [])) - - if len(semantic_types) == 0: - cls.logger.warning("No semantic types found in column metadata") - return False - # Making sure all accepted_semantic_types are available in semantic_types - if len(accepted_semantic_types - semantic_types) == 0: - return True - - return False - - @classmethod - def _get_targets(cls, data: d3m_dataframe, hyperparams: Hyperparams): - if not hyperparams['use_semantic_types']: - return data, list(data.columns), list(range(len(data.columns))) - - metadata = data.metadata - - def can_produce_column(column_index: int) -> bool: - accepted_semantic_types = set() - accepted_semantic_types.add("https://metadata.datadrivendiscovery.org/types/TrueTarget") - column_metadata = metadata.query((metadata_base.ALL_ELEMENTS, column_index)) - semantic_types = set(column_metadata.get('semantic_types', [])) - if len(semantic_types) == 0: - cls.logger.warning("No semantic types found in column metadata") - return False - # Making sure all accepted_semantic_types are available in semantic_types - if len(accepted_semantic_types - semantic_types) == 0: - return True - return False - - target_column_indices, target_columns_not_to_produce = base_utils.get_columns_to_use(metadata, - use_columns=hyperparams[ - 'use_outputs_columns'], - exclude_columns= - hyperparams[ - 'exclude_outputs_columns'], - can_use_column=can_produce_column) - targets = [] - if target_column_indices: - targets = data.select_columns(target_column_indices) - target_column_names = [] - for idx in target_column_indices: - target_column_names.append(data.columns[idx]) - return targets, target_column_names, target_column_indices - - @classmethod - def _get_target_columns_metadata(cls, outputs_metadata: metadata_base.DataMetadata, hyperparams) -> List[OrderedDict]: - outputs_length = outputs_metadata.query((metadata_base.ALL_ELEMENTS,))['dimension']['length'] - - target_columns_metadata: List[OrderedDict] = [] - for column_index in range(outputs_length): - column_metadata = OrderedDict(outputs_metadata.query_column(column_index)) - - # Update semantic types and prepare it for predicted targets. - semantic_types = set(column_metadata.get('semantic_types', [])) - semantic_types_to_remove = set(["https://metadata.datadrivendiscovery.org/types/TrueTarget","https://metadata.datadrivendiscovery.org/types/SuggestedTarget",]) - add_semantic_types = set(["https://metadata.datadrivendiscovery.org/types/PredictedTarget",]) - add_semantic_types.add(hyperparams["return_semantic_type"]) - semantic_types = semantic_types - semantic_types_to_remove - semantic_types = semantic_types.union(add_semantic_types) - column_metadata['semantic_types'] = list(semantic_types) - - target_columns_metadata.append(column_metadata) - - return target_columns_metadata - - @classmethod - def _update_predictions_metadata(cls, inputs_metadata: metadata_base.DataMetadata, outputs: Optional[Outputs], - target_columns_metadata: List[OrderedDict]) -> metadata_base.DataMetadata: - outputs_metadata = metadata_base.DataMetadata().generate(value=outputs) - - for column_index, column_metadata in enumerate(target_columns_metadata): - column_metadata.pop("structural_type", None) - outputs_metadata = outputs_metadata.update_column(column_index, column_metadata) - - return outputs_metadata - - def _wrap_predictions(self, inputs: Inputs, predictions: ndarray) -> Outputs: - outputs = d3m_dataframe(predictions, generate_metadata=False) - outputs.metadata = self._update_predictions_metadata(inputs.metadata, outputs, self._target_columns_metadata) - return outputs - - - @classmethod - def _add_target_columns_metadata(cls, outputs_metadata: metadata_base.DataMetadata): - outputs_length = outputs_metadata.query((metadata_base.ALL_ELEMENTS,))['dimension']['length'] - - target_columns_metadata: List[OrderedDict] = [] - for column_index in range(outputs_length): - column_metadata = OrderedDict() - semantic_types = [] - semantic_types.append('https://metadata.datadrivendiscovery.org/types/PredictedTarget') - column_name = outputs_metadata.query((metadata_base.ALL_ELEMENTS, column_index)).get("name") - if column_name is None: - column_name = "output_{}".format(column_index) - column_metadata["semantic_types"] = semantic_types - column_metadata["name"] = str(column_name) - target_columns_metadata.append(column_metadata) - - return target_columns_metadata - - -SKRandomForestClassifier.__doc__ = RandomForestClassifier.__doc__ \ No newline at end of file diff --git a/common-primitives/sklearn-wrap/sklearn_wrap/SKRandomForestRegressor.py b/common-primitives/sklearn-wrap/sklearn_wrap/SKRandomForestRegressor.py deleted file mode 100644 index 181105a..0000000 --- a/common-primitives/sklearn-wrap/sklearn_wrap/SKRandomForestRegressor.py +++ /dev/null @@ -1,609 +0,0 @@ -from typing import Any, Callable, List, Dict, Union, Optional, Sequence, Tuple -from numpy import ndarray -from collections import OrderedDict -from scipy import sparse -import os -import sklearn -import numpy -import typing - -# Custom import commands if any -from sklearn.ensemble.forest import RandomForestRegressor - - -from d3m.container.numpy import ndarray as d3m_ndarray -from d3m.container import DataFrame as d3m_dataframe -from d3m.metadata import hyperparams, params, base as metadata_base -from d3m import utils -from d3m.base import utils as base_utils -from d3m.exceptions import PrimitiveNotFittedError -from d3m.primitive_interfaces.base import CallResult, DockerContainer - -from d3m.primitive_interfaces.supervised_learning import SupervisedLearnerPrimitiveBase -from d3m.primitive_interfaces.base import ProbabilisticCompositionalityMixin, ContinueFitMixin -from d3m import exceptions -import pandas - - - -Inputs = d3m_dataframe -Outputs = d3m_dataframe - - -class Params(params.Params): - estimators_: Optional[List[sklearn.tree.DecisionTreeRegressor]] - n_features_: Optional[int] - n_outputs_: Optional[int] - oob_score_: Optional[float] - oob_prediction_: Optional[ndarray] - base_estimator_: Optional[object] - estimator_params: Optional[tuple] - base_estimator: Optional[object] - class_weight: Optional[Union[str, dict, List[dict]]] - input_column_names: Optional[Any] - target_names_: Optional[Sequence[Any]] - training_indices_: Optional[Sequence[int]] - target_column_indices_: Optional[Sequence[int]] - target_columns_metadata_: Optional[List[OrderedDict]] - - - -class Hyperparams(hyperparams.Hyperparams): - n_estimators = hyperparams.Bounded[int]( - default=10, - lower=1, - upper=None, - description='The number of trees in the forest.', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] - ) - criterion = hyperparams.Enumeration[str]( - values=['mse', 'mae'], - default='mse', - description='The function to measure the quality of a split. Supported criteria are "mse" for the mean squared error, which is equal to variance reduction as feature selection criterion, and "mae" for the mean absolute error. .. versionadded:: 0.18 Mean Absolute Error (MAE) criterion.', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] - ) - max_features = hyperparams.Union( - configuration=OrderedDict({ - 'specified_int': hyperparams.Bounded[int]( - lower=0, - upper=None, - default=0, - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'], - ), - 'calculated': hyperparams.Enumeration[str]( - values=['auto', 'sqrt', 'log2'], - default='auto', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'], - ), - 'none': hyperparams.Constant( - default=None, - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'], - ), - 'percent': hyperparams.Uniform( - default=0.25, - lower=0, - upper=1, - lower_inclusive=True, - upper_inclusive=False, - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'], - ) - }), - default='calculated', - description='The number of features to consider when looking for the best split: - If int, then consider `max_features` features at each split. - If float, then `max_features` is a percentage and `int(max_features * n_features)` features are considered at each split. - If "auto", then `max_features=n_features`. - If "sqrt", then `max_features=sqrt(n_features)`. - If "log2", then `max_features=log2(n_features)`. - If None, then `max_features=n_features`. Note: the search for a split does not stop until at least one valid partition of the node samples is found, even if it requires to effectively inspect more than ``max_features`` features.', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] - ) - max_depth = hyperparams.Union( - configuration=OrderedDict({ - 'int': hyperparams.Bounded[int]( - default=10, - lower=0, - upper=None, - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'], - ), - 'none': hyperparams.Constant( - default=None, - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'], - ) - }), - default='none', - description='The maximum depth of the tree. If None, then nodes are expanded until all leaves are pure or until all leaves contain less than min_samples_split samples.', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] - ) - min_samples_split = hyperparams.Union( - configuration=OrderedDict({ - 'absolute': hyperparams.Bounded[int]( - default=2, - lower=1, - upper=None, - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'], - ), - 'percent': hyperparams.Uniform( - default=0.25, - lower=0, - upper=1, - lower_inclusive=False, - upper_inclusive=True, - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'], - ) - }), - default='absolute', - description='The minimum number of samples required to split an internal node: - If int, then consider `min_samples_split` as the minimum number. - If float, then `min_samples_split` is a percentage and `ceil(min_samples_split * n_samples)` are the minimum number of samples for each split. .. versionchanged:: 0.18 Added float values for percentages.', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] - ) - min_samples_leaf = hyperparams.Union( - configuration=OrderedDict({ - 'absolute': hyperparams.Bounded[int]( - default=1, - lower=1, - upper=None, - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'], - ), - 'percent': hyperparams.Uniform( - default=0.25, - lower=0, - upper=0.5, - lower_inclusive=False, - upper_inclusive=True, - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'], - ) - }), - default='absolute', - description='The minimum number of samples required to be at a leaf node: - If int, then consider `min_samples_leaf` as the minimum number. - If float, then `min_samples_leaf` is a percentage and `ceil(min_samples_leaf * n_samples)` are the minimum number of samples for each node. .. versionchanged:: 0.18 Added float values for percentages.', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] - ) - min_weight_fraction_leaf = hyperparams.Uniform( - default=0, - lower=0, - upper=0.5, - upper_inclusive=True, - description='The minimum weighted fraction of the sum total of weights (of all the input samples) required to be at a leaf node. Samples have equal weight when sample_weight is not provided.', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] - ) - max_leaf_nodes = hyperparams.Union( - configuration=OrderedDict({ - 'int': hyperparams.Bounded[int]( - default=10, - lower=0, - upper=None, - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'], - ), - 'none': hyperparams.Constant( - default=None, - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'], - ) - }), - default='none', - description='Grow trees with ``max_leaf_nodes`` in best-first fashion. Best nodes are defined as relative reduction in impurity. If None then unlimited number of leaf nodes.', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] - ) - min_impurity_decrease = hyperparams.Bounded[float]( - default=0.0, - lower=0.0, - upper=None, - description='A node will be split if this split induces a decrease of the impurity greater than or equal to this value. The weighted impurity decrease equation is the following:: N_t / N * (impurity - N_t_R / N_t * right_impurity - N_t_L / N_t * left_impurity) where ``N`` is the total number of samples, ``N_t`` is the number of samples at the current node, ``N_t_L`` is the number of samples in the left child, and ``N_t_R`` is the number of samples in the right child. ``N``, ``N_t``, ``N_t_R`` and ``N_t_L`` all refer to the weighted sum, if ``sample_weight`` is passed. .. versionadded:: 0.19 ', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] - ) - bootstrap = hyperparams.Enumeration[str]( - values=['bootstrap', 'bootstrap_with_oob_score', 'disabled'], - default='bootstrap', - description='Whether bootstrap samples are used when building trees.' - ' And whether to use out-of-bag samples to estimate the generalization accuracy.', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'], - ) - n_jobs = hyperparams.Union( - configuration=OrderedDict({ - 'limit': hyperparams.Bounded[int]( - default=1, - lower=1, - upper=None, - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'], - ), - 'all_cores': hyperparams.Constant( - default=-1, - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'], - ) - }), - default='limit', - description='The number of jobs to run in parallel for both `fit` and `predict`. If -1, then the number of jobs is set to the number of cores.', - semantic_types=['https://metadata.datadrivendiscovery.org/types/ResourcesUseParameter'] - ) - warm_start = hyperparams.UniformBool( - default=False, - description='When set to ``True``, reuse the solution of the previous call to fit and add more estimators to the ensemble, otherwise, just fit a whole new forest.', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] - ) - - use_inputs_columns = hyperparams.Set( - elements=hyperparams.Hyperparameter[int](-1), - default=(), - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="A set of column indices to force primitive to use as training input. If any specified column cannot be parsed, it is skipped.", - ) - use_outputs_columns = hyperparams.Set( - elements=hyperparams.Hyperparameter[int](-1), - default=(), - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="A set of column indices to force primitive to use as training target. If any specified column cannot be parsed, it is skipped.", - ) - exclude_inputs_columns = hyperparams.Set( - elements=hyperparams.Hyperparameter[int](-1), - default=(), - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="A set of column indices to not use as training inputs. Applicable only if \"use_columns\" is not provided.", - ) - exclude_outputs_columns = hyperparams.Set( - elements=hyperparams.Hyperparameter[int](-1), - default=(), - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="A set of column indices to not use as training target. Applicable only if \"use_columns\" is not provided.", - ) - return_result = hyperparams.Enumeration( - values=['append', 'replace', 'new'], - default='new', - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="Should parsed columns be appended, should they replace original columns, or should only parsed columns be returned? This hyperparam is ignored if use_semantic_types is set to false.", - ) - use_semantic_types = hyperparams.UniformBool( - default=False, - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="Controls whether semantic_types metadata will be used for filtering columns in input dataframe. Setting this to false makes the code ignore return_result and will produce only the output dataframe" - ) - add_index_columns = hyperparams.UniformBool( - default=False, - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="Also include primary index columns if input data has them. Applicable only if \"return_result\" is set to \"new\".", - ) - error_on_no_input = hyperparams.UniformBool( - default=True, - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="Throw an exception if no input column is selected/provided. Defaults to true to behave like sklearn. To prevent pipelines from breaking set this to False.", - ) - - return_semantic_type = hyperparams.Enumeration[str]( - values=['https://metadata.datadrivendiscovery.org/types/Attribute', 'https://metadata.datadrivendiscovery.org/types/ConstructedAttribute', 'https://metadata.datadrivendiscovery.org/types/PredictedTarget'], - default='https://metadata.datadrivendiscovery.org/types/PredictedTarget', - description='Decides what semantic type to attach to generated output', - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'] - ) - -class SKRandomForestRegressor(SupervisedLearnerPrimitiveBase[Inputs, Outputs, Params, Hyperparams]): - """ - Primitive wrapping for sklearn RandomForestRegressor - `sklearn documentation `_ - - """ - - __author__ = "JPL MARVIN" - metadata = metadata_base.PrimitiveMetadata({ - "algorithm_types": [metadata_base.PrimitiveAlgorithmType.RANDOM_FOREST, ], - "name": "sklearn.ensemble.forest.RandomForestRegressor", - "primitive_family": metadata_base.PrimitiveFamily.REGRESSION, - "python_path": "d3m.primitives.regression.random_forest.SKlearn", - "source": {'name': 'JPL', 'contact': 'mailto:shah@jpl.nasa.gov', 'uris': ['https://gitlab.com/datadrivendiscovery/sklearn-wrap/issues', 'https://scikit-learn.org/stable/modules/generated/sklearn.ensemble.RandomForestRegressor.html']}, - "version": "2019.11.13", - "id": "f0fd7a62-09b5-3abc-93bb-f5f999f7cc80", - "hyperparams_to_tune": ['n_estimators', 'max_depth', 'min_samples_split', 'min_samples_leaf', 'max_features'], - 'installation': [ - {'type': metadata_base.PrimitiveInstallationType.PIP, - 'package_uri': 'git+https://gitlab.com/datadrivendiscovery/sklearn-wrap.git@{git_commit}#egg=sklearn_wrap'.format( - git_commit=utils.current_git_commit(os.path.dirname(__file__)), - ), - }] - }) - - def __init__(self, *, - hyperparams: Hyperparams, - random_seed: int = 0, - docker_containers: Dict[str, DockerContainer] = None, - _verbose: int = 0) -> None: - - super().__init__(hyperparams=hyperparams, random_seed=random_seed, docker_containers=docker_containers) - - # False - self._clf = RandomForestRegressor( - n_estimators=self.hyperparams['n_estimators'], - criterion=self.hyperparams['criterion'], - max_features=self.hyperparams['max_features'], - max_depth=self.hyperparams['max_depth'], - min_samples_split=self.hyperparams['min_samples_split'], - min_samples_leaf=self.hyperparams['min_samples_leaf'], - min_weight_fraction_leaf=self.hyperparams['min_weight_fraction_leaf'], - max_leaf_nodes=self.hyperparams['max_leaf_nodes'], - min_impurity_decrease=self.hyperparams['min_impurity_decrease'], - bootstrap=self.hyperparams['bootstrap'] in ['bootstrap', 'bootstrap_with_oob_score'], - oob_score=self.hyperparams['bootstrap'] in ['bootstrap_with_oob_score'], - n_jobs=self.hyperparams['n_jobs'], - warm_start=self.hyperparams['warm_start'], - random_state=self.random_seed, - verbose=_verbose - ) - - self._inputs = None - self._outputs = None - self._training_inputs = None - self._training_outputs = None - self._target_names = None - self._training_indices = None - self._target_column_indices = None - self._target_columns_metadata: List[OrderedDict] = None - self._input_column_names = None - self._fitted = False - self._new_training_data = False - - def set_training_data(self, *, inputs: Inputs, outputs: Outputs) -> None: - self._inputs = inputs - self._outputs = outputs - self._fitted = False - self._new_training_data = True - - def fit(self, *, timeout: float = None, iterations: int = None) -> CallResult[None]: - if self._inputs is None or self._outputs is None: - raise ValueError("Missing training data.") - - if not self._new_training_data: - return CallResult(None) - self._new_training_data = False - - self._training_inputs, self._training_indices = self._get_columns_to_fit(self._inputs, self.hyperparams) - self._training_outputs, self._target_names, self._target_column_indices = self._get_targets(self._outputs, self.hyperparams) - self._input_column_names = self._training_inputs.columns - - if len(self._training_indices) > 0 and len(self._target_column_indices) > 0: - self._target_columns_metadata = self._get_target_columns_metadata(self._training_outputs.metadata, self.hyperparams) - sk_training_output = self._training_outputs.values - - shape = sk_training_output.shape - if len(shape) == 2 and shape[1] == 1: - sk_training_output = numpy.ravel(sk_training_output) - self._clf.fit(self._training_inputs, sk_training_output) - self._fitted = True - else: - if self.hyperparams['error_on_no_input']: - raise RuntimeError("No input columns were selected") - self.logger.warn("No input columns were selected") - - return CallResult(None) - - - - def produce(self, *, inputs: Inputs, timeout: float = None, iterations: int = None) -> CallResult[Outputs]: - sk_inputs, columns_to_use = self._get_columns_to_fit(inputs, self.hyperparams) - output = [] - if len(sk_inputs.columns): - try: - sk_output = self._clf.predict(sk_inputs) - except sklearn.exceptions.NotFittedError as error: - raise PrimitiveNotFittedError("Primitive not fitted.") from error - # For primitives that allow predicting without fitting like GaussianProcessRegressor - if not self._fitted: - raise PrimitiveNotFittedError("Primitive not fitted.") - if sparse.issparse(sk_output): - sk_output = sk_output.toarray() - output = self._wrap_predictions(inputs, sk_output) - output.columns = self._target_names - output = [output] - else: - if self.hyperparams['error_on_no_input']: - raise RuntimeError("No input columns were selected") - self.logger.warn("No input columns were selected") - outputs = base_utils.combine_columns(return_result=self.hyperparams['return_result'], - add_index_columns=self.hyperparams['add_index_columns'], - inputs=inputs, column_indices=self._target_column_indices, - columns_list=output) - - return CallResult(outputs) - - - def get_params(self) -> Params: - if not self._fitted: - return Params( - estimators_=None, - n_features_=None, - n_outputs_=None, - oob_score_=None, - oob_prediction_=None, - base_estimator_=None, - estimator_params=None, - base_estimator=None, - class_weight=None, - input_column_names=self._input_column_names, - training_indices_=self._training_indices, - target_names_=self._target_names, - target_column_indices_=self._target_column_indices, - target_columns_metadata_=self._target_columns_metadata - ) - - return Params( - estimators_=getattr(self._clf, 'estimators_', None), - n_features_=getattr(self._clf, 'n_features_', None), - n_outputs_=getattr(self._clf, 'n_outputs_', None), - oob_score_=getattr(self._clf, 'oob_score_', None), - oob_prediction_=getattr(self._clf, 'oob_prediction_', None), - base_estimator_=getattr(self._clf, 'base_estimator_', None), - estimator_params=getattr(self._clf, 'estimator_params', None), - base_estimator=getattr(self._clf, 'base_estimator', None), - class_weight=getattr(self._clf, 'class_weight', None), - input_column_names=self._input_column_names, - training_indices_=self._training_indices, - target_names_=self._target_names, - target_column_indices_=self._target_column_indices, - target_columns_metadata_=self._target_columns_metadata - ) - - def set_params(self, *, params: Params) -> None: - self._clf.estimators_ = params['estimators_'] - self._clf.n_features_ = params['n_features_'] - self._clf.n_outputs_ = params['n_outputs_'] - self._clf.oob_score_ = params['oob_score_'] - self._clf.oob_prediction_ = params['oob_prediction_'] - self._clf.base_estimator_ = params['base_estimator_'] - self._clf.estimator_params = params['estimator_params'] - self._clf.base_estimator = params['base_estimator'] - self._clf.class_weight = params['class_weight'] - self._input_column_names = params['input_column_names'] - self._training_indices = params['training_indices_'] - self._target_names = params['target_names_'] - self._target_column_indices = params['target_column_indices_'] - self._target_columns_metadata = params['target_columns_metadata_'] - - if params['estimators_'] is not None: - self._fitted = True - if params['n_features_'] is not None: - self._fitted = True - if params['n_outputs_'] is not None: - self._fitted = True - if params['oob_score_'] is not None: - self._fitted = True - if params['oob_prediction_'] is not None: - self._fitted = True - if params['base_estimator_'] is not None: - self._fitted = True - if params['estimator_params'] is not None: - self._fitted = True - if params['base_estimator'] is not None: - self._fitted = True - if params['class_weight'] is not None: - self._fitted = True - - - - - - def produce_feature_importances(self, *, timeout: float = None, iterations: int = None) -> CallResult[d3m_dataframe]: - output = d3m_dataframe(self._clf.feature_importances_.reshape((1, len(self._input_column_names)))) - output.columns = self._input_column_names - for i in range(len(self._input_column_names)): - output.metadata = output.metadata.update_column(i, {"name": self._input_column_names[i]}) - return CallResult(output) - - @classmethod - def _get_columns_to_fit(cls, inputs: Inputs, hyperparams: Hyperparams): - if not hyperparams['use_semantic_types']: - return inputs, list(range(len(inputs.columns))) - - inputs_metadata = inputs.metadata - - def can_produce_column(column_index: int) -> bool: - return cls._can_produce_column(inputs_metadata, column_index, hyperparams) - - columns_to_produce, columns_not_to_produce = base_utils.get_columns_to_use(inputs_metadata, - use_columns=hyperparams['use_inputs_columns'], - exclude_columns=hyperparams['exclude_inputs_columns'], - can_use_column=can_produce_column) - return inputs.iloc[:, columns_to_produce], columns_to_produce - # return columns_to_produce - - @classmethod - def _can_produce_column(cls, inputs_metadata: metadata_base.DataMetadata, column_index: int, hyperparams: Hyperparams) -> bool: - column_metadata = inputs_metadata.query((metadata_base.ALL_ELEMENTS, column_index)) - - accepted_structural_types = (int, float, numpy.integer, numpy.float64) - accepted_semantic_types = set() - accepted_semantic_types.add("https://metadata.datadrivendiscovery.org/types/Attribute") - if not issubclass(column_metadata['structural_type'], accepted_structural_types): - return False - - semantic_types = set(column_metadata.get('semantic_types', [])) - - if len(semantic_types) == 0: - cls.logger.warning("No semantic types found in column metadata") - return False - # Making sure all accepted_semantic_types are available in semantic_types - if len(accepted_semantic_types - semantic_types) == 0: - return True - - return False - - @classmethod - def _get_targets(cls, data: d3m_dataframe, hyperparams: Hyperparams): - if not hyperparams['use_semantic_types']: - return data, list(data.columns), list(range(len(data.columns))) - - metadata = data.metadata - - def can_produce_column(column_index: int) -> bool: - accepted_semantic_types = set() - accepted_semantic_types.add("https://metadata.datadrivendiscovery.org/types/TrueTarget") - column_metadata = metadata.query((metadata_base.ALL_ELEMENTS, column_index)) - semantic_types = set(column_metadata.get('semantic_types', [])) - if len(semantic_types) == 0: - cls.logger.warning("No semantic types found in column metadata") - return False - # Making sure all accepted_semantic_types are available in semantic_types - if len(accepted_semantic_types - semantic_types) == 0: - return True - return False - - target_column_indices, target_columns_not_to_produce = base_utils.get_columns_to_use(metadata, - use_columns=hyperparams[ - 'use_outputs_columns'], - exclude_columns= - hyperparams[ - 'exclude_outputs_columns'], - can_use_column=can_produce_column) - targets = [] - if target_column_indices: - targets = data.select_columns(target_column_indices) - target_column_names = [] - for idx in target_column_indices: - target_column_names.append(data.columns[idx]) - return targets, target_column_names, target_column_indices - - @classmethod - def _get_target_columns_metadata(cls, outputs_metadata: metadata_base.DataMetadata, hyperparams) -> List[OrderedDict]: - outputs_length = outputs_metadata.query((metadata_base.ALL_ELEMENTS,))['dimension']['length'] - - target_columns_metadata: List[OrderedDict] = [] - for column_index in range(outputs_length): - column_metadata = OrderedDict(outputs_metadata.query_column(column_index)) - - # Update semantic types and prepare it for predicted targets. - semantic_types = set(column_metadata.get('semantic_types', [])) - semantic_types_to_remove = set(["https://metadata.datadrivendiscovery.org/types/TrueTarget","https://metadata.datadrivendiscovery.org/types/SuggestedTarget",]) - add_semantic_types = set(["https://metadata.datadrivendiscovery.org/types/PredictedTarget",]) - add_semantic_types.add(hyperparams["return_semantic_type"]) - semantic_types = semantic_types - semantic_types_to_remove - semantic_types = semantic_types.union(add_semantic_types) - column_metadata['semantic_types'] = list(semantic_types) - - target_columns_metadata.append(column_metadata) - - return target_columns_metadata - - @classmethod - def _update_predictions_metadata(cls, inputs_metadata: metadata_base.DataMetadata, outputs: Optional[Outputs], - target_columns_metadata: List[OrderedDict]) -> metadata_base.DataMetadata: - outputs_metadata = metadata_base.DataMetadata().generate(value=outputs) - - for column_index, column_metadata in enumerate(target_columns_metadata): - column_metadata.pop("structural_type", None) - outputs_metadata = outputs_metadata.update_column(column_index, column_metadata) - - return outputs_metadata - - def _wrap_predictions(self, inputs: Inputs, predictions: ndarray) -> Outputs: - outputs = d3m_dataframe(predictions, generate_metadata=False) - outputs.metadata = self._update_predictions_metadata(inputs.metadata, outputs, self._target_columns_metadata) - return outputs - - - @classmethod - def _add_target_columns_metadata(cls, outputs_metadata: metadata_base.DataMetadata): - outputs_length = outputs_metadata.query((metadata_base.ALL_ELEMENTS,))['dimension']['length'] - - target_columns_metadata: List[OrderedDict] = [] - for column_index in range(outputs_length): - column_metadata = OrderedDict() - semantic_types = [] - semantic_types.append('https://metadata.datadrivendiscovery.org/types/PredictedTarget') - column_name = outputs_metadata.query((metadata_base.ALL_ELEMENTS, column_index)).get("name") - if column_name is None: - column_name = "output_{}".format(column_index) - column_metadata["semantic_types"] = semantic_types - column_metadata["name"] = str(column_name) - target_columns_metadata.append(column_metadata) - - return target_columns_metadata - - -SKRandomForestRegressor.__doc__ = RandomForestRegressor.__doc__ diff --git a/common-primitives/sklearn-wrap/sklearn_wrap/SKRandomTreesEmbedding.py b/common-primitives/sklearn-wrap/sklearn_wrap/SKRandomTreesEmbedding.py deleted file mode 100644 index c4f7adf..0000000 --- a/common-primitives/sklearn-wrap/sklearn_wrap/SKRandomTreesEmbedding.py +++ /dev/null @@ -1,482 +0,0 @@ -from typing import Any, Callable, List, Dict, Union, Optional, Sequence, Tuple -from numpy import ndarray -from collections import OrderedDict -from scipy import sparse -import os -import sklearn -import numpy -import typing - -# Custom import commands if any -from sklearn.ensemble.forest import RandomTreesEmbedding - - -from d3m.container.numpy import ndarray as d3m_ndarray -from d3m.container import DataFrame as d3m_dataframe -from d3m.metadata import hyperparams, params, base as metadata_base -from d3m import utils -from d3m.base import utils as base_utils -from d3m.exceptions import PrimitiveNotFittedError -from d3m.primitive_interfaces.base import CallResult, DockerContainer -from d3m.primitive_interfaces.unsupervised_learning import UnsupervisedLearnerPrimitiveBase - - -Inputs = d3m_dataframe -Outputs = d3m_dataframe - - -class Params(params.Params): - estimators_: Optional[Sequence[sklearn.base.BaseEstimator]] - one_hot_encoder_: Optional[object] - input_column_names: Optional[Any] - target_names_: Optional[Sequence[Any]] - training_indices_: Optional[Sequence[int]] - target_column_indices_: Optional[Sequence[int]] - target_columns_metadata_: Optional[List[OrderedDict]] - - - -class Hyperparams(hyperparams.Hyperparams): - n_estimators = hyperparams.Bounded[int]( - default=10, - lower=1, - upper=None, - description='Number of trees in the forest.', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] - ) - max_depth = hyperparams.Union( - configuration=OrderedDict({ - 'int': hyperparams.Bounded[int]( - lower=0, - upper=None, - default=5, - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'], - ), - 'none': hyperparams.Constant( - default=None, - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'], - ) - }), - default='int', - description='The maximum depth of each tree. If None, then nodes are expanded until all leaves are pure or until all leaves contain less than min_samples_split samples.', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] - ) - min_samples_split = hyperparams.Union( - configuration=OrderedDict({ - 'float': hyperparams.Bounded[float]( - lower=0, - upper=1, - default=1.0, - description='It\'s a percentage and `ceil(min_samples_split * n_samples)` is the minimum number of samples for each split.', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'], - ), - 'int': hyperparams.Bounded[int]( - lower=0, - upper=None, - default=2, - description='Minimum number.', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'], - ) - }), - default='int', - description='The minimum number of samples required to split an internal node: - If int, then consider `min_samples_split` as the minimum number. - If float, then `min_samples_split` is a percentage and `ceil(min_samples_split * n_samples)` is the minimum number of samples for each split. .. versionchanged:: 0.18 Added float values for percentages.', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] - ) - min_samples_leaf = hyperparams.Union( - configuration=OrderedDict({ - 'percent': hyperparams.Bounded[float]( - lower=0, - upper=0.5, - default=0.25, - description='It\'s a percentage and `ceil(min_samples_leaf * n_samples)` is the minimum number of samples for each node.', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'], - ), - 'absolute': hyperparams.Bounded[int]( - lower=1, - upper=None, - default=1, - description='Minimum number.', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'], - ) - }), - default='absolute', - description='The minimum number of samples required to be at a leaf node: - If int, then consider `min_samples_leaf` as the minimum number. - If float, then `min_samples_leaf` is a percentage and `ceil(min_samples_leaf * n_samples)` is the minimum number of samples for each node. .. versionchanged:: 0.18 Added float values for percentages.', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] - ) - min_weight_fraction_leaf = hyperparams.Bounded[float]( - default=0, - lower=0, - upper=0.5, - description='The minimum weighted fraction of the sum total of weights (of all the input samples) required to be at a leaf node. Samples have equal weight when sample_weight is not provided.', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] - ) - max_leaf_nodes = hyperparams.Union( - configuration=OrderedDict({ - 'int': hyperparams.Bounded[int]( - lower=0, - upper=None, - default=10, - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'], - ), - 'none': hyperparams.Constant( - default=None, - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'], - ) - }), - default='none', - description='Grow trees with ``max_leaf_nodes`` in best-first fashion. Best nodes are defined as relative reduction in impurity. If None then unlimited number of leaf nodes.', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] - ) - min_impurity_split = hyperparams.Union( - configuration=OrderedDict({ - 'float': hyperparams.Bounded[float]( - lower=0, - upper=None, - default=1, - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'], - ), - 'none': hyperparams.Constant( - default=None, - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'], - ) - }), - default='none', - description='Threshold for early stopping in tree growth. A node will split if its impurity is above the threshold, otherwise it is a leaf. .. versionadded:: 0.18', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] - ) - min_impurity_decrease = hyperparams.Bounded[float]( - default=0, - lower=0, - upper=None, - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] - ) - n_jobs = hyperparams.Union( - configuration=OrderedDict({ - 'limit': hyperparams.Bounded[int]( - default=1, - lower=1, - upper=None, - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'], - ), - 'all_cores': hyperparams.Constant( - default=-1, - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'], - ) - }), - default='limit', - description='The number of jobs to run in parallel for both `fit` and `predict`. If -1, then the number of jobs is set to the number of cores.', - semantic_types=['https://metadata.datadrivendiscovery.org/types/ResourcesUseParameter'] - ) - warm_start = hyperparams.UniformBool( - default=False, - description='When set to ``True``, reuse the solution of the previous call to fit and add more estimators to the ensemble, otherwise, just fit a whole new forest.', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] - ) - - use_columns = hyperparams.Set( - elements=hyperparams.Hyperparameter[int](-1), - default=(), - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="A set of column indices to force primitive to operate on. If any specified column cannot be parsed, it is skipped.", - ) - exclude_columns = hyperparams.Set( - elements=hyperparams.Hyperparameter[int](-1), - default=(), - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="A set of column indices to not operate on. Applicable only if \"use_columns\" is not provided.", - ) - return_result = hyperparams.Enumeration( - values=['append', 'replace', 'new'], - default='new', - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="Should parsed columns be appended, should they replace original columns, or should only parsed columns be returned? This hyperparam is ignored if use_semantic_types is set to false.", - ) - use_semantic_types = hyperparams.UniformBool( - default=False, - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="Controls whether semantic_types metadata will be used for filtering columns in input dataframe. Setting this to false makes the code ignore return_result and will produce only the output dataframe" - ) - add_index_columns = hyperparams.UniformBool( - default=False, - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="Also include primary index columns if input data has them. Applicable only if \"return_result\" is set to \"new\".", - ) - error_on_no_input = hyperparams.UniformBool( - default=True, - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="Throw an exception if no input column is selected/provided. Defaults to true to behave like sklearn. To prevent pipelines from breaking set this to False.", - ) - - return_semantic_type = hyperparams.Enumeration[str]( - values=['https://metadata.datadrivendiscovery.org/types/Attribute', 'https://metadata.datadrivendiscovery.org/types/ConstructedAttribute'], - default='https://metadata.datadrivendiscovery.org/types/Attribute', - description='Decides what semantic type to attach to generated attributes', - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'] - ) - -class SKRandomTreesEmbedding(UnsupervisedLearnerPrimitiveBase[Inputs, Outputs, Params, Hyperparams]): - """ - Primitive wrapping for sklearn RandomTreesEmbedding - `sklearn documentation `_ - - """ - - __author__ = "JPL MARVIN" - metadata = metadata_base.PrimitiveMetadata({ - "algorithm_types": [metadata_base.PrimitiveAlgorithmType.RANDOM_FOREST, ], - "name": "sklearn.ensemble.forest.RandomTreesEmbedding", - "primitive_family": metadata_base.PrimitiveFamily.DATA_PREPROCESSING, - "python_path": "d3m.primitives.data_preprocessing.random_trees_embedding.SKlearn", - "source": {'name': 'JPL', 'contact': 'mailto:shah@jpl.nasa.gov', 'uris': ['https://gitlab.com/datadrivendiscovery/sklearn-wrap/issues', 'https://scikit-learn.org/stable/modules/generated/sklearn.ensemble.RandomTreesEmbedding.html']}, - "version": "2019.11.13", - "id": "8889ff47-1d2e-3a80-bdef-8397a95e1c6e", - "hyperparams_to_tune": ['n_estimators', 'max_depth', 'min_samples_split', 'min_samples_leaf'], - 'installation': [ - {'type': metadata_base.PrimitiveInstallationType.PIP, - 'package_uri': 'git+https://gitlab.com/datadrivendiscovery/sklearn-wrap.git@{git_commit}#egg=sklearn_wrap'.format( - git_commit=utils.current_git_commit(os.path.dirname(__file__)), - ), - }] - }) - - def __init__(self, *, - hyperparams: Hyperparams, - random_seed: int = 0, - docker_containers: Dict[str, DockerContainer] = None, - _verbose: int = 0) -> None: - - super().__init__(hyperparams=hyperparams, random_seed=random_seed, docker_containers=docker_containers) - - # False - self._clf = RandomTreesEmbedding( - n_estimators=self.hyperparams['n_estimators'], - max_depth=self.hyperparams['max_depth'], - min_samples_split=self.hyperparams['min_samples_split'], - min_samples_leaf=self.hyperparams['min_samples_leaf'], - min_weight_fraction_leaf=self.hyperparams['min_weight_fraction_leaf'], - max_leaf_nodes=self.hyperparams['max_leaf_nodes'], - min_impurity_split=self.hyperparams['min_impurity_split'], - min_impurity_decrease=self.hyperparams['min_impurity_decrease'], - n_jobs=self.hyperparams['n_jobs'], - warm_start=self.hyperparams['warm_start'], - random_state=self.random_seed, - verbose=_verbose - ) - - self._inputs = None - self._outputs = None - self._training_inputs = None - self._training_outputs = None - self._target_names = None - self._training_indices = None - self._target_column_indices = None - self._target_columns_metadata: List[OrderedDict] = None - self._input_column_names = None - self._fitted = False - - - def set_training_data(self, *, inputs: Inputs) -> None: - self._inputs = inputs - self._fitted = False - - def fit(self, *, timeout: float = None, iterations: int = None) -> CallResult[None]: - if self._fitted: - return CallResult(None) - - self._training_inputs, self._training_indices = self._get_columns_to_fit(self._inputs, self.hyperparams) - self._input_column_names = self._training_inputs.columns - - if self._training_inputs is None: - return CallResult(None) - - if len(self._training_indices) > 0: - self._clf.fit(self._training_inputs) - self._fitted = True - else: - if self.hyperparams['error_on_no_input']: - raise RuntimeError("No input columns were selected") - self.logger.warn("No input columns were selected") - return CallResult(None) - - def produce(self, *, inputs: Inputs, timeout: float = None, iterations: int = None) -> CallResult[Outputs]: - if not self._fitted: - raise PrimitiveNotFittedError("Primitive not fitted.") - sk_inputs = inputs - if self.hyperparams['use_semantic_types']: - sk_inputs = inputs.iloc[:, self._training_indices] - output_columns = [] - if len(self._training_indices) > 0: - sk_output = self._clf.transform(sk_inputs) - if sparse.issparse(sk_output): - sk_output = sk_output.toarray() - outputs = self._wrap_predictions(inputs, sk_output) - if len(outputs.columns) == len(self._input_column_names): - outputs.columns = self._input_column_names - output_columns = [outputs] - else: - if self.hyperparams['error_on_no_input']: - raise RuntimeError("No input columns were selected") - self.logger.warn("No input columns were selected") - outputs = base_utils.combine_columns(return_result=self.hyperparams['return_result'], - add_index_columns=self.hyperparams['add_index_columns'], - inputs=inputs, column_indices=self._training_indices, - columns_list=output_columns) - return CallResult(outputs) - - - def get_params(self) -> Params: - if not self._fitted: - return Params( - estimators_=None, - one_hot_encoder_=None, - input_column_names=self._input_column_names, - training_indices_=self._training_indices, - target_names_=self._target_names, - target_column_indices_=self._target_column_indices, - target_columns_metadata_=self._target_columns_metadata - ) - - return Params( - estimators_=getattr(self._clf, 'estimators_', None), - one_hot_encoder_=getattr(self._clf, 'one_hot_encoder_', None), - input_column_names=self._input_column_names, - training_indices_=self._training_indices, - target_names_=self._target_names, - target_column_indices_=self._target_column_indices, - target_columns_metadata_=self._target_columns_metadata - ) - - def set_params(self, *, params: Params) -> None: - self._clf.estimators_ = params['estimators_'] - self._clf.one_hot_encoder_ = params['one_hot_encoder_'] - self._input_column_names = params['input_column_names'] - self._training_indices = params['training_indices_'] - self._target_names = params['target_names_'] - self._target_column_indices = params['target_column_indices_'] - self._target_columns_metadata = params['target_columns_metadata_'] - - if params['estimators_'] is not None: - self._fitted = True - if params['one_hot_encoder_'] is not None: - self._fitted = True - - - - - - @classmethod - def _get_columns_to_fit(cls, inputs: Inputs, hyperparams: Hyperparams): - if not hyperparams['use_semantic_types']: - return inputs, list(range(len(inputs.columns))) - - inputs_metadata = inputs.metadata - - def can_produce_column(column_index: int) -> bool: - return cls._can_produce_column(inputs_metadata, column_index, hyperparams) - - columns_to_produce, columns_not_to_produce = base_utils.get_columns_to_use(inputs_metadata, - use_columns=hyperparams['use_columns'], - exclude_columns=hyperparams['exclude_columns'], - can_use_column=can_produce_column) - return inputs.iloc[:, columns_to_produce], columns_to_produce - # return columns_to_produce - - @classmethod - def _can_produce_column(cls, inputs_metadata: metadata_base.DataMetadata, column_index: int, hyperparams: Hyperparams) -> bool: - column_metadata = inputs_metadata.query((metadata_base.ALL_ELEMENTS, column_index)) - - accepted_structural_types = (int, float, numpy.integer, numpy.float64) - accepted_semantic_types = set() - accepted_semantic_types.add("https://metadata.datadrivendiscovery.org/types/Attribute") - if not issubclass(column_metadata['structural_type'], accepted_structural_types): - return False - - semantic_types = set(column_metadata.get('semantic_types', [])) - - if len(semantic_types) == 0: - cls.logger.warning("No semantic types found in column metadata") - return False - - # Making sure all accepted_semantic_types are available in semantic_types - if len(accepted_semantic_types - semantic_types) == 0: - return True - - return False - - - @classmethod - def _get_target_columns_metadata(cls, outputs_metadata: metadata_base.DataMetadata, hyperparams) -> List[OrderedDict]: - outputs_length = outputs_metadata.query((metadata_base.ALL_ELEMENTS,))['dimension']['length'] - - target_columns_metadata: List[OrderedDict] = [] - for column_index in range(outputs_length): - column_metadata = OrderedDict(outputs_metadata.query_column(column_index)) - - # Update semantic types and prepare it for predicted targets. - semantic_types = set(column_metadata.get('semantic_types', [])) - semantic_types_to_remove = set([]) - add_semantic_types = [] - add_semantic_types.add(hyperparams["return_semantic_type"]) - semantic_types = semantic_types - semantic_types_to_remove - semantic_types = semantic_types.union(add_semantic_types) - column_metadata['semantic_types'] = list(semantic_types) - - target_columns_metadata.append(column_metadata) - - return target_columns_metadata - - @classmethod - def _update_predictions_metadata(cls, inputs_metadata: metadata_base.DataMetadata, outputs: Optional[Outputs], - target_columns_metadata: List[OrderedDict]) -> metadata_base.DataMetadata: - outputs_metadata = metadata_base.DataMetadata().generate(value=outputs) - - for column_index, column_metadata in enumerate(target_columns_metadata): - column_metadata.pop("structural_type", None) - outputs_metadata = outputs_metadata.update_column(column_index, column_metadata) - - return outputs_metadata - - def _wrap_predictions(self, inputs: Inputs, predictions: ndarray) -> Outputs: - outputs = d3m_dataframe(predictions, generate_metadata=True) - target_columns_metadata = self._copy_inputs_metadata(inputs.metadata, self._training_indices, outputs.metadata, self.hyperparams) - outputs.metadata = self._update_predictions_metadata(inputs.metadata, outputs, target_columns_metadata) - return outputs - - - @classmethod - def _copy_inputs_metadata(cls, inputs_metadata: metadata_base.DataMetadata, input_indices: List[int], - outputs_metadata: metadata_base.DataMetadata, hyperparams): - outputs_length = outputs_metadata.query((metadata_base.ALL_ELEMENTS,))['dimension']['length'] - target_columns_metadata: List[OrderedDict] = [] - for column_index in input_indices: - column_name = inputs_metadata.query((metadata_base.ALL_ELEMENTS, column_index)).get("name") - if column_name is None: - column_name = "output_{}".format(column_index) - - column_metadata = OrderedDict(inputs_metadata.query_column(column_index)) - semantic_types = set(column_metadata.get('semantic_types', [])) - semantic_types_to_remove = set([]) - add_semantic_types = set() - add_semantic_types.add(hyperparams["return_semantic_type"]) - semantic_types = semantic_types - semantic_types_to_remove - semantic_types = semantic_types.union(add_semantic_types) - column_metadata['semantic_types'] = list(semantic_types) - - column_metadata["name"] = str(column_name) - target_columns_metadata.append(column_metadata) - - # If outputs has more columns than index, add Attribute Type to all remaining - if outputs_length > len(input_indices): - for column_index in range(len(input_indices), outputs_length): - column_metadata = OrderedDict() - semantic_types = set() - semantic_types.add(hyperparams["return_semantic_type"]) - column_name = "output_{}".format(column_index) - column_metadata["semantic_types"] = list(semantic_types) - column_metadata["name"] = str(column_name) - target_columns_metadata.append(column_metadata) - - return target_columns_metadata - - -SKRandomTreesEmbedding.__doc__ = RandomTreesEmbedding.__doc__ \ No newline at end of file diff --git a/common-primitives/sklearn-wrap/sklearn_wrap/SKRidge.py b/common-primitives/sklearn-wrap/sklearn_wrap/SKRidge.py deleted file mode 100644 index 3ca48ef..0000000 --- a/common-primitives/sklearn-wrap/sklearn_wrap/SKRidge.py +++ /dev/null @@ -1,444 +0,0 @@ -from typing import Any, Callable, List, Dict, Union, Optional, Sequence, Tuple -from numpy import ndarray -from collections import OrderedDict -from scipy import sparse -import os -import sklearn -import numpy -import typing - -# Custom import commands if any -from sklearn.linear_model.ridge import Ridge - - -from d3m.container.numpy import ndarray as d3m_ndarray -from d3m.container import DataFrame as d3m_dataframe -from d3m.metadata import hyperparams, params, base as metadata_base -from d3m import utils -from d3m.base import utils as base_utils -from d3m.exceptions import PrimitiveNotFittedError -from d3m.primitive_interfaces.base import CallResult, DockerContainer - -from d3m.primitive_interfaces.supervised_learning import SupervisedLearnerPrimitiveBase -from d3m.primitive_interfaces.base import ProbabilisticCompositionalityMixin, ContinueFitMixin -from d3m import exceptions -import pandas - - - -Inputs = d3m_dataframe -Outputs = d3m_dataframe - - -class Params(params.Params): - coef_: Optional[ndarray] - intercept_: Optional[Union[float, ndarray]] - n_iter_: Optional[ndarray] - input_column_names: Optional[Any] - target_names_: Optional[Sequence[Any]] - training_indices_: Optional[Sequence[int]] - target_column_indices_: Optional[Sequence[int]] - target_columns_metadata_: Optional[List[OrderedDict]] - - - -class Hyperparams(hyperparams.Hyperparams): - alpha = hyperparams.Bounded[float]( - lower=0, - upper=None, - default=1, - description='Regularization strength; must be a positive float. Regularization improves the conditioning of the problem and reduces the variance of the estimates. Larger values specify stronger regularization. Alpha corresponds to ``C^-1`` in other linear models such as LogisticRegression or LinearSVC. If an array is passed, penalties are assumed to be specific to the targets. Hence they must correspond in number. copy_X : boolean, optional, default True If True, X will be copied; else, it may be overwritten.', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] - ) - fit_intercept = hyperparams.UniformBool( - default=True, - description='Whether to calculate the intercept for this model. If set to false, no intercept will be used in calculations (e.g. data is expected to be already centered).', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] - ) - normalize = hyperparams.UniformBool( - default=False, - description='If True, the regressors X will be normalized before regression. This parameter is ignored when `fit_intercept` is set to False. When the regressors are normalized, note that this makes the hyperparameters learnt more robust and almost independent of the number of samples. The same property is not valid for standardized data. However, if you wish to standardize, please use `preprocessing.StandardScaler` before calling `fit` on an estimator with `normalize=False`.', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] - ) - max_iter = hyperparams.Union( - configuration=OrderedDict({ - 'int': hyperparams.Bounded[int]( - lower=0, - upper=None, - default=1000, - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'], - ), - 'none': hyperparams.Constant( - default=None, - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'], - ) - }), - default='none', - description='Maximum number of iterations for conjugate gradient solver. For \'sparse_cg\' and \'lsqr\' solvers, the default value is determined by scipy.sparse.linalg. For \'sag\' solver, the default value is 1000.', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] - ) - tol = hyperparams.Bounded[float]( - default=0.001, - lower=0, - upper=None, - description='Precision of the solution.', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] - ) - solver = hyperparams.Enumeration[str]( - values=['auto', 'svd', 'cholesky', 'lsqr', 'sparse_cg', 'sag', 'saga'], - default='auto', - description='Solver to use in the computational routines: - \'auto\' chooses the solver automatically based on the type of data. - \'svd\' uses a Singular Value Decomposition of X to compute the Ridge coefficients. More stable for singular matrices than \'cholesky\'. - \'cholesky\' uses the standard scipy.linalg.solve function to obtain a closed-form solution. - \'sparse_cg\' uses the conjugate gradient solver as found in scipy.sparse.linalg.cg. As an iterative algorithm, this solver is more appropriate than \'cholesky\' for large-scale data (possibility to set `tol` and `max_iter`). - \'lsqr\' uses the dedicated regularized least-squares routine scipy.sparse.linalg.lsqr. It is the fastest but may not be available in old scipy versions. It also uses an iterative procedure. - \'sag\' uses a Stochastic Average Gradient descent. It also uses an iterative procedure, and is often faster than other solvers when both n_samples and n_features are large. Note that \'sag\' fast convergence is only guaranteed on features with approximately the same scale. You can preprocess the data with a scaler from sklearn.preprocessing. All last four solvers support both dense and sparse data. However, only \'sag\' supports sparse input when `fit_intercept` is True. .. versionadded:: 0.17 Stochastic Average Gradient descent solver.', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] - ) - - use_inputs_columns = hyperparams.Set( - elements=hyperparams.Hyperparameter[int](-1), - default=(), - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="A set of column indices to force primitive to use as training input. If any specified column cannot be parsed, it is skipped.", - ) - use_outputs_columns = hyperparams.Set( - elements=hyperparams.Hyperparameter[int](-1), - default=(), - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="A set of column indices to force primitive to use as training target. If any specified column cannot be parsed, it is skipped.", - ) - exclude_inputs_columns = hyperparams.Set( - elements=hyperparams.Hyperparameter[int](-1), - default=(), - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="A set of column indices to not use as training inputs. Applicable only if \"use_columns\" is not provided.", - ) - exclude_outputs_columns = hyperparams.Set( - elements=hyperparams.Hyperparameter[int](-1), - default=(), - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="A set of column indices to not use as training target. Applicable only if \"use_columns\" is not provided.", - ) - return_result = hyperparams.Enumeration( - values=['append', 'replace', 'new'], - default='new', - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="Should parsed columns be appended, should they replace original columns, or should only parsed columns be returned? This hyperparam is ignored if use_semantic_types is set to false.", - ) - use_semantic_types = hyperparams.UniformBool( - default=False, - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="Controls whether semantic_types metadata will be used for filtering columns in input dataframe. Setting this to false makes the code ignore return_result and will produce only the output dataframe" - ) - add_index_columns = hyperparams.UniformBool( - default=False, - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="Also include primary index columns if input data has them. Applicable only if \"return_result\" is set to \"new\".", - ) - error_on_no_input = hyperparams.UniformBool( - default=True, - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="Throw an exception if no input column is selected/provided. Defaults to true to behave like sklearn. To prevent pipelines from breaking set this to False.", - ) - - return_semantic_type = hyperparams.Enumeration[str]( - values=['https://metadata.datadrivendiscovery.org/types/Attribute', 'https://metadata.datadrivendiscovery.org/types/ConstructedAttribute', 'https://metadata.datadrivendiscovery.org/types/PredictedTarget'], - default='https://metadata.datadrivendiscovery.org/types/PredictedTarget', - description='Decides what semantic type to attach to generated output', - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'] - ) - -class SKRidge(SupervisedLearnerPrimitiveBase[Inputs, Outputs, Params, Hyperparams]): - """ - Primitive wrapping for sklearn Ridge - `sklearn documentation `_ - - """ - - __author__ = "JPL MARVIN" - metadata = metadata_base.PrimitiveMetadata({ - "algorithm_types": [metadata_base.PrimitiveAlgorithmType.TIKHONOV_REGULARIZATION, ], - "name": "sklearn.linear_model.ridge.Ridge", - "primitive_family": metadata_base.PrimitiveFamily.REGRESSION, - "python_path": "d3m.primitives.regression.ridge.SKlearn", - "source": {'name': 'JPL', 'contact': 'mailto:shah@jpl.nasa.gov', 'uris': ['https://gitlab.com/datadrivendiscovery/sklearn-wrap/issues', 'https://scikit-learn.org/stable/modules/generated/sklearn.linear_model.Ridge.html']}, - "version": "2019.11.13", - "id": "2fb16403-8509-3f02-bdbf-9696e2fcad55", - "hyperparams_to_tune": ['alpha', 'max_iter'], - 'installation': [ - {'type': metadata_base.PrimitiveInstallationType.PIP, - 'package_uri': 'git+https://gitlab.com/datadrivendiscovery/sklearn-wrap.git@{git_commit}#egg=sklearn_wrap'.format( - git_commit=utils.current_git_commit(os.path.dirname(__file__)), - ), - }] - }) - - def __init__(self, *, - hyperparams: Hyperparams, - random_seed: int = 0, - docker_containers: Dict[str, DockerContainer] = None) -> None: - - super().__init__(hyperparams=hyperparams, random_seed=random_seed, docker_containers=docker_containers) - - # False - self._clf = Ridge( - alpha=self.hyperparams['alpha'], - fit_intercept=self.hyperparams['fit_intercept'], - normalize=self.hyperparams['normalize'], - max_iter=self.hyperparams['max_iter'], - tol=self.hyperparams['tol'], - solver=self.hyperparams['solver'], - random_state=self.random_seed, - ) - - self._inputs = None - self._outputs = None - self._training_inputs = None - self._training_outputs = None - self._target_names = None - self._training_indices = None - self._target_column_indices = None - self._target_columns_metadata: List[OrderedDict] = None - self._input_column_names = None - self._fitted = False - self._new_training_data = False - - def set_training_data(self, *, inputs: Inputs, outputs: Outputs) -> None: - self._inputs = inputs - self._outputs = outputs - self._fitted = False - self._new_training_data = True - - def fit(self, *, timeout: float = None, iterations: int = None) -> CallResult[None]: - if self._inputs is None or self._outputs is None: - raise ValueError("Missing training data.") - - if not self._new_training_data: - return CallResult(None) - self._new_training_data = False - - self._training_inputs, self._training_indices = self._get_columns_to_fit(self._inputs, self.hyperparams) - self._training_outputs, self._target_names, self._target_column_indices = self._get_targets(self._outputs, self.hyperparams) - self._input_column_names = self._training_inputs.columns - - if len(self._training_indices) > 0 and len(self._target_column_indices) > 0: - self._target_columns_metadata = self._get_target_columns_metadata(self._training_outputs.metadata, self.hyperparams) - sk_training_output = self._training_outputs.values - - shape = sk_training_output.shape - if len(shape) == 2 and shape[1] == 1: - sk_training_output = numpy.ravel(sk_training_output) - - self._clf.fit(self._training_inputs, sk_training_output) - self._fitted = True - else: - if self.hyperparams['error_on_no_input']: - raise RuntimeError("No input columns were selected") - self.logger.warn("No input columns were selected") - - return CallResult(None) - - - - def produce(self, *, inputs: Inputs, timeout: float = None, iterations: int = None) -> CallResult[Outputs]: - sk_inputs, columns_to_use = self._get_columns_to_fit(inputs, self.hyperparams) - output = [] - if len(sk_inputs.columns): - try: - sk_output = self._clf.predict(sk_inputs) - except sklearn.exceptions.NotFittedError as error: - raise PrimitiveNotFittedError("Primitive not fitted.") from error - # For primitives that allow predicting without fitting like GaussianProcessRegressor - if not self._fitted: - raise PrimitiveNotFittedError("Primitive not fitted.") - if sparse.issparse(sk_output): - sk_output = sk_output.toarray() - output = self._wrap_predictions(inputs, sk_output) - output.columns = self._target_names - output = [output] - else: - if self.hyperparams['error_on_no_input']: - raise RuntimeError("No input columns were selected") - self.logger.warn("No input columns were selected") - outputs = base_utils.combine_columns(return_result=self.hyperparams['return_result'], - add_index_columns=self.hyperparams['add_index_columns'], - inputs=inputs, column_indices=self._target_column_indices, - columns_list=output) - - return CallResult(outputs) - - - def get_params(self) -> Params: - if not self._fitted: - return Params( - coef_=None, - intercept_=None, - n_iter_=None, - input_column_names=self._input_column_names, - training_indices_=self._training_indices, - target_names_=self._target_names, - target_column_indices_=self._target_column_indices, - target_columns_metadata_=self._target_columns_metadata - ) - - return Params( - coef_=getattr(self._clf, 'coef_', None), - intercept_=getattr(self._clf, 'intercept_', None), - n_iter_=getattr(self._clf, 'n_iter_', None), - input_column_names=self._input_column_names, - training_indices_=self._training_indices, - target_names_=self._target_names, - target_column_indices_=self._target_column_indices, - target_columns_metadata_=self._target_columns_metadata - ) - - def set_params(self, *, params: Params) -> None: - self._clf.coef_ = params['coef_'] - self._clf.intercept_ = params['intercept_'] - self._clf.n_iter_ = params['n_iter_'] - self._input_column_names = params['input_column_names'] - self._training_indices = params['training_indices_'] - self._target_names = params['target_names_'] - self._target_column_indices = params['target_column_indices_'] - self._target_columns_metadata = params['target_columns_metadata_'] - - if params['coef_'] is not None: - self._fitted = True - if params['intercept_'] is not None: - self._fitted = True - if params['n_iter_'] is not None: - self._fitted = True - - - - - - - - @classmethod - def _get_columns_to_fit(cls, inputs: Inputs, hyperparams: Hyperparams): - if not hyperparams['use_semantic_types']: - return inputs, list(range(len(inputs.columns))) - - inputs_metadata = inputs.metadata - - def can_produce_column(column_index: int) -> bool: - return cls._can_produce_column(inputs_metadata, column_index, hyperparams) - - columns_to_produce, columns_not_to_produce = base_utils.get_columns_to_use(inputs_metadata, - use_columns=hyperparams['use_inputs_columns'], - exclude_columns=hyperparams['exclude_inputs_columns'], - can_use_column=can_produce_column) - return inputs.iloc[:, columns_to_produce], columns_to_produce - # return columns_to_produce - - @classmethod - def _can_produce_column(cls, inputs_metadata: metadata_base.DataMetadata, column_index: int, hyperparams: Hyperparams) -> bool: - column_metadata = inputs_metadata.query((metadata_base.ALL_ELEMENTS, column_index)) - - accepted_structural_types = (int, float, numpy.integer, numpy.float64) - accepted_semantic_types = set() - accepted_semantic_types.add("https://metadata.datadrivendiscovery.org/types/Attribute") - if not issubclass(column_metadata['structural_type'], accepted_structural_types): - return False - - semantic_types = set(column_metadata.get('semantic_types', [])) - - if len(semantic_types) == 0: - cls.logger.warning("No semantic types found in column metadata") - return False - # Making sure all accepted_semantic_types are available in semantic_types - if len(accepted_semantic_types - semantic_types) == 0: - return True - - return False - - @classmethod - def _get_targets(cls, data: d3m_dataframe, hyperparams: Hyperparams): - if not hyperparams['use_semantic_types']: - return data, list(data.columns), list(range(len(data.columns))) - - metadata = data.metadata - - def can_produce_column(column_index: int) -> bool: - accepted_semantic_types = set() - accepted_semantic_types.add("https://metadata.datadrivendiscovery.org/types/TrueTarget") - column_metadata = metadata.query((metadata_base.ALL_ELEMENTS, column_index)) - semantic_types = set(column_metadata.get('semantic_types', [])) - if len(semantic_types) == 0: - cls.logger.warning("No semantic types found in column metadata") - return False - # Making sure all accepted_semantic_types are available in semantic_types - if len(accepted_semantic_types - semantic_types) == 0: - return True - return False - - target_column_indices, target_columns_not_to_produce = base_utils.get_columns_to_use(metadata, - use_columns=hyperparams[ - 'use_outputs_columns'], - exclude_columns= - hyperparams[ - 'exclude_outputs_columns'], - can_use_column=can_produce_column) - targets = [] - if target_column_indices: - targets = data.select_columns(target_column_indices) - target_column_names = [] - for idx in target_column_indices: - target_column_names.append(data.columns[idx]) - return targets, target_column_names, target_column_indices - - @classmethod - def _get_target_columns_metadata(cls, outputs_metadata: metadata_base.DataMetadata, hyperparams) -> List[OrderedDict]: - outputs_length = outputs_metadata.query((metadata_base.ALL_ELEMENTS,))['dimension']['length'] - - target_columns_metadata: List[OrderedDict] = [] - for column_index in range(outputs_length): - column_metadata = OrderedDict(outputs_metadata.query_column(column_index)) - - # Update semantic types and prepare it for predicted targets. - semantic_types = set(column_metadata.get('semantic_types', [])) - semantic_types_to_remove = set(["https://metadata.datadrivendiscovery.org/types/TrueTarget","https://metadata.datadrivendiscovery.org/types/SuggestedTarget",]) - add_semantic_types = set(["https://metadata.datadrivendiscovery.org/types/PredictedTarget",]) - add_semantic_types.add(hyperparams["return_semantic_type"]) - semantic_types = semantic_types - semantic_types_to_remove - semantic_types = semantic_types.union(add_semantic_types) - column_metadata['semantic_types'] = list(semantic_types) - - target_columns_metadata.append(column_metadata) - - return target_columns_metadata - - @classmethod - def _update_predictions_metadata(cls, inputs_metadata: metadata_base.DataMetadata, outputs: Optional[Outputs], - target_columns_metadata: List[OrderedDict]) -> metadata_base.DataMetadata: - outputs_metadata = metadata_base.DataMetadata().generate(value=outputs) - - for column_index, column_metadata in enumerate(target_columns_metadata): - column_metadata.pop("structural_type", None) - outputs_metadata = outputs_metadata.update_column(column_index, column_metadata) - - return outputs_metadata - - def _wrap_predictions(self, inputs: Inputs, predictions: ndarray) -> Outputs: - outputs = d3m_dataframe(predictions, generate_metadata=False) - outputs.metadata = self._update_predictions_metadata(inputs.metadata, outputs, self._target_columns_metadata) - return outputs - - - @classmethod - def _add_target_columns_metadata(cls, outputs_metadata: metadata_base.DataMetadata): - outputs_length = outputs_metadata.query((metadata_base.ALL_ELEMENTS,))['dimension']['length'] - - target_columns_metadata: List[OrderedDict] = [] - for column_index in range(outputs_length): - column_metadata = OrderedDict() - semantic_types = [] - semantic_types.append('https://metadata.datadrivendiscovery.org/types/PredictedTarget') - column_name = outputs_metadata.query((metadata_base.ALL_ELEMENTS, column_index)).get("name") - if column_name is None: - column_name = "output_{}".format(column_index) - column_metadata["semantic_types"] = semantic_types - column_metadata["name"] = str(column_name) - target_columns_metadata.append(column_metadata) - - return target_columns_metadata - - -SKRidge.__doc__ = Ridge.__doc__ \ No newline at end of file diff --git a/common-primitives/sklearn-wrap/sklearn_wrap/SKRobustScaler.py b/common-primitives/sklearn-wrap/sklearn_wrap/SKRobustScaler.py deleted file mode 100644 index 6b98060..0000000 --- a/common-primitives/sklearn-wrap/sklearn_wrap/SKRobustScaler.py +++ /dev/null @@ -1,354 +0,0 @@ -from typing import Any, Callable, List, Dict, Union, Optional, Sequence, Tuple -from numpy import ndarray -from collections import OrderedDict -from scipy import sparse -import os -import sklearn -import numpy -import typing - -# Custom import commands if any -from sklearn.preprocessing.data import RobustScaler - - -from d3m.container.numpy import ndarray as d3m_ndarray -from d3m.container import DataFrame as d3m_dataframe -from d3m.metadata import hyperparams, params, base as metadata_base -from d3m import utils -from d3m.base import utils as base_utils -from d3m.exceptions import PrimitiveNotFittedError -from d3m.primitive_interfaces.base import CallResult, DockerContainer -from d3m.primitive_interfaces.unsupervised_learning import UnsupervisedLearnerPrimitiveBase - - -Inputs = d3m_dataframe -Outputs = d3m_dataframe - - -class Params(params.Params): - center_: Optional[ndarray] - scale_: Optional[ndarray] - input_column_names: Optional[Any] - target_names_: Optional[Sequence[Any]] - training_indices_: Optional[Sequence[int]] - target_column_indices_: Optional[Sequence[int]] - target_columns_metadata_: Optional[List[OrderedDict]] - - - -class Hyperparams(hyperparams.Hyperparams): - with_centering = hyperparams.UniformBool( - default=True, - description='If True, center the data before scaling. This will cause ``transform`` to raise an exception when attempted on sparse matrices, because centering them entails building a dense matrix which in common use cases is likely to be too large to fit in memory.', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] - ) - with_scaling = hyperparams.UniformBool( - default=True, - description='If True, scale the data to interquartile range.', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] - ) - quantile_range = hyperparams.SortedSet( - elements=hyperparams.Uniform(0.0, 100.0, 50.0, lower_inclusive=False, upper_inclusive=False), - default=(25.0, 75.0), - min_size=2, - max_size=2, - description='Default: (25.0, 75.0) = (1st quantile, 3rd quantile) = IQR Quantile range used to calculate ``scale_``. .. versionadded:: 0.18', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] - ) - - use_columns = hyperparams.Set( - elements=hyperparams.Hyperparameter[int](-1), - default=(), - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="A set of column indices to force primitive to operate on. If any specified column cannot be parsed, it is skipped.", - ) - exclude_columns = hyperparams.Set( - elements=hyperparams.Hyperparameter[int](-1), - default=(), - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="A set of column indices to not operate on. Applicable only if \"use_columns\" is not provided.", - ) - return_result = hyperparams.Enumeration( - values=['append', 'replace', 'new'], - default='new', - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="Should parsed columns be appended, should they replace original columns, or should only parsed columns be returned? This hyperparam is ignored if use_semantic_types is set to false.", - ) - use_semantic_types = hyperparams.UniformBool( - default=False, - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="Controls whether semantic_types metadata will be used for filtering columns in input dataframe. Setting this to false makes the code ignore return_result and will produce only the output dataframe" - ) - add_index_columns = hyperparams.UniformBool( - default=False, - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="Also include primary index columns if input data has them. Applicable only if \"return_result\" is set to \"new\".", - ) - error_on_no_input = hyperparams.UniformBool( - default=True, - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="Throw an exception if no input column is selected/provided. Defaults to true to behave like sklearn. To prevent pipelines from breaking set this to False.", - ) - - return_semantic_type = hyperparams.Enumeration[str]( - values=['https://metadata.datadrivendiscovery.org/types/Attribute', 'https://metadata.datadrivendiscovery.org/types/ConstructedAttribute'], - default='https://metadata.datadrivendiscovery.org/types/Attribute', - description='Decides what semantic type to attach to generated attributes', - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'] - ) - -class SKRobustScaler(UnsupervisedLearnerPrimitiveBase[Inputs, Outputs, Params, Hyperparams]): - """ - Primitive wrapping for sklearn RobustScaler - `sklearn documentation `_ - - """ - - __author__ = "JPL MARVIN" - metadata = metadata_base.PrimitiveMetadata({ - "algorithm_types": [metadata_base.PrimitiveAlgorithmType.FEATURE_SCALING, ], - "name": "sklearn.preprocessing.data.RobustScaler", - "primitive_family": metadata_base.PrimitiveFamily.DATA_PREPROCESSING, - "python_path": "d3m.primitives.data_preprocessing.robust_scaler.SKlearn", - "source": {'name': 'JPL', 'contact': 'mailto:shah@jpl.nasa.gov', 'uris': ['https://gitlab.com/datadrivendiscovery/sklearn-wrap/issues', 'https://scikit-learn.org/stable/modules/generated/sklearn.preprocessing.RobustScaler.html']}, - "version": "2019.11.13", - "id": "854727ed-c82c-3137-ac59-fd52bc9ba385", - 'installation': [ - {'type': metadata_base.PrimitiveInstallationType.PIP, - 'package_uri': 'git+https://gitlab.com/datadrivendiscovery/sklearn-wrap.git@{git_commit}#egg=sklearn_wrap'.format( - git_commit=utils.current_git_commit(os.path.dirname(__file__)), - ), - }] - }) - - def __init__(self, *, - hyperparams: Hyperparams, - random_seed: int = 0, - docker_containers: Dict[str, DockerContainer] = None) -> None: - - super().__init__(hyperparams=hyperparams, random_seed=random_seed, docker_containers=docker_containers) - - # False - self._clf = RobustScaler( - with_centering=self.hyperparams['with_centering'], - with_scaling=self.hyperparams['with_scaling'], - quantile_range=self.hyperparams['quantile_range'], - ) - - self._inputs = None - self._outputs = None - self._training_inputs = None - self._training_outputs = None - self._target_names = None - self._training_indices = None - self._target_column_indices = None - self._target_columns_metadata: List[OrderedDict] = None - self._input_column_names = None - self._fitted = False - - - def set_training_data(self, *, inputs: Inputs) -> None: - self._inputs = inputs - self._fitted = False - - def fit(self, *, timeout: float = None, iterations: int = None) -> CallResult[None]: - if self._fitted: - return CallResult(None) - - self._training_inputs, self._training_indices = self._get_columns_to_fit(self._inputs, self.hyperparams) - self._input_column_names = self._training_inputs.columns - - if self._training_inputs is None: - return CallResult(None) - - if len(self._training_indices) > 0: - self._clf.fit(self._training_inputs) - self._fitted = True - else: - if self.hyperparams['error_on_no_input']: - raise RuntimeError("No input columns were selected") - self.logger.warn("No input columns were selected") - return CallResult(None) - - def produce(self, *, inputs: Inputs, timeout: float = None, iterations: int = None) -> CallResult[Outputs]: - if not self._fitted: - raise PrimitiveNotFittedError("Primitive not fitted.") - sk_inputs = inputs - if self.hyperparams['use_semantic_types']: - sk_inputs = inputs.iloc[:, self._training_indices] - output_columns = [] - if len(self._training_indices) > 0: - sk_output = self._clf.transform(sk_inputs) - if sparse.issparse(sk_output): - sk_output = sk_output.toarray() - outputs = self._wrap_predictions(inputs, sk_output) - if len(outputs.columns) == len(self._input_column_names): - outputs.columns = self._input_column_names - output_columns = [outputs] - else: - if self.hyperparams['error_on_no_input']: - raise RuntimeError("No input columns were selected") - self.logger.warn("No input columns were selected") - outputs = base_utils.combine_columns(return_result=self.hyperparams['return_result'], - add_index_columns=self.hyperparams['add_index_columns'], - inputs=inputs, column_indices=self._training_indices, - columns_list=output_columns) - return CallResult(outputs) - - - def get_params(self) -> Params: - if not self._fitted: - return Params( - center_=None, - scale_=None, - input_column_names=self._input_column_names, - training_indices_=self._training_indices, - target_names_=self._target_names, - target_column_indices_=self._target_column_indices, - target_columns_metadata_=self._target_columns_metadata - ) - - return Params( - center_=getattr(self._clf, 'center_', None), - scale_=getattr(self._clf, 'scale_', None), - input_column_names=self._input_column_names, - training_indices_=self._training_indices, - target_names_=self._target_names, - target_column_indices_=self._target_column_indices, - target_columns_metadata_=self._target_columns_metadata - ) - - def set_params(self, *, params: Params) -> None: - self._clf.center_ = params['center_'] - self._clf.scale_ = params['scale_'] - self._input_column_names = params['input_column_names'] - self._training_indices = params['training_indices_'] - self._target_names = params['target_names_'] - self._target_column_indices = params['target_column_indices_'] - self._target_columns_metadata = params['target_columns_metadata_'] - - if params['center_'] is not None: - self._fitted = True - if params['scale_'] is not None: - self._fitted = True - - - - - - @classmethod - def _get_columns_to_fit(cls, inputs: Inputs, hyperparams: Hyperparams): - if not hyperparams['use_semantic_types']: - return inputs, list(range(len(inputs.columns))) - - inputs_metadata = inputs.metadata - - def can_produce_column(column_index: int) -> bool: - return cls._can_produce_column(inputs_metadata, column_index, hyperparams) - - columns_to_produce, columns_not_to_produce = base_utils.get_columns_to_use(inputs_metadata, - use_columns=hyperparams['use_columns'], - exclude_columns=hyperparams['exclude_columns'], - can_use_column=can_produce_column) - return inputs.iloc[:, columns_to_produce], columns_to_produce - # return columns_to_produce - - @classmethod - def _can_produce_column(cls, inputs_metadata: metadata_base.DataMetadata, column_index: int, hyperparams: Hyperparams) -> bool: - column_metadata = inputs_metadata.query((metadata_base.ALL_ELEMENTS, column_index)) - - accepted_structural_types = (int, float, numpy.integer, numpy.float64) - accepted_semantic_types = set() - accepted_semantic_types.add("https://metadata.datadrivendiscovery.org/types/Attribute") - if not issubclass(column_metadata['structural_type'], accepted_structural_types): - return False - - semantic_types = set(column_metadata.get('semantic_types', [])) - - if len(semantic_types) == 0: - cls.logger.warning("No semantic types found in column metadata") - return False - - # Making sure all accepted_semantic_types are available in semantic_types - if len(accepted_semantic_types - semantic_types) == 0: - return True - - return False - - - @classmethod - def _get_target_columns_metadata(cls, outputs_metadata: metadata_base.DataMetadata, hyperparams) -> List[OrderedDict]: - outputs_length = outputs_metadata.query((metadata_base.ALL_ELEMENTS,))['dimension']['length'] - - target_columns_metadata: List[OrderedDict] = [] - for column_index in range(outputs_length): - column_metadata = OrderedDict(outputs_metadata.query_column(column_index)) - - # Update semantic types and prepare it for predicted targets. - semantic_types = set(column_metadata.get('semantic_types', [])) - semantic_types_to_remove = set([]) - add_semantic_types = [] - add_semantic_types.add(hyperparams["return_semantic_type"]) - semantic_types = semantic_types - semantic_types_to_remove - semantic_types = semantic_types.union(add_semantic_types) - column_metadata['semantic_types'] = list(semantic_types) - - target_columns_metadata.append(column_metadata) - - return target_columns_metadata - - @classmethod - def _update_predictions_metadata(cls, inputs_metadata: metadata_base.DataMetadata, outputs: Optional[Outputs], - target_columns_metadata: List[OrderedDict]) -> metadata_base.DataMetadata: - outputs_metadata = metadata_base.DataMetadata().generate(value=outputs) - - for column_index, column_metadata in enumerate(target_columns_metadata): - column_metadata.pop("structural_type", None) - outputs_metadata = outputs_metadata.update_column(column_index, column_metadata) - - return outputs_metadata - - def _wrap_predictions(self, inputs: Inputs, predictions: ndarray) -> Outputs: - outputs = d3m_dataframe(predictions, generate_metadata=True) - target_columns_metadata = self._copy_inputs_metadata(inputs.metadata, self._training_indices, outputs.metadata, self.hyperparams) - outputs.metadata = self._update_predictions_metadata(inputs.metadata, outputs, target_columns_metadata) - return outputs - - - @classmethod - def _copy_inputs_metadata(cls, inputs_metadata: metadata_base.DataMetadata, input_indices: List[int], - outputs_metadata: metadata_base.DataMetadata, hyperparams): - outputs_length = outputs_metadata.query((metadata_base.ALL_ELEMENTS,))['dimension']['length'] - target_columns_metadata: List[OrderedDict] = [] - for column_index in input_indices: - column_name = inputs_metadata.query((metadata_base.ALL_ELEMENTS, column_index)).get("name") - if column_name is None: - column_name = "output_{}".format(column_index) - - column_metadata = OrderedDict(inputs_metadata.query_column(column_index)) - semantic_types = set(column_metadata.get('semantic_types', [])) - semantic_types_to_remove = set([]) - add_semantic_types = set() - add_semantic_types.add(hyperparams["return_semantic_type"]) - semantic_types = semantic_types - semantic_types_to_remove - semantic_types = semantic_types.union(add_semantic_types) - column_metadata['semantic_types'] = list(semantic_types) - - column_metadata["name"] = str(column_name) - target_columns_metadata.append(column_metadata) - - # If outputs has more columns than index, add Attribute Type to all remaining - if outputs_length > len(input_indices): - for column_index in range(len(input_indices), outputs_length): - column_metadata = OrderedDict() - semantic_types = set() - semantic_types.add(hyperparams["return_semantic_type"]) - column_name = "output_{}".format(column_index) - column_metadata["semantic_types"] = list(semantic_types) - column_metadata["name"] = str(column_name) - target_columns_metadata.append(column_metadata) - - return target_columns_metadata - - -SKRobustScaler.__doc__ = RobustScaler.__doc__ \ No newline at end of file diff --git a/common-primitives/sklearn-wrap/sklearn_wrap/SKSGDClassifier.py b/common-primitives/sklearn-wrap/sklearn_wrap/SKSGDClassifier.py deleted file mode 100644 index e5f0422..0000000 --- a/common-primitives/sklearn-wrap/sklearn_wrap/SKSGDClassifier.py +++ /dev/null @@ -1,661 +0,0 @@ -from typing import Any, Callable, List, Dict, Union, Optional, Sequence, Tuple -from numpy import ndarray -from collections import OrderedDict -from scipy import sparse -import os -import sklearn -import numpy -import typing - -# Custom import commands if any -from sklearn.linear_model.stochastic_gradient import SGDClassifier - - -from d3m.container.numpy import ndarray as d3m_ndarray -from d3m.container import DataFrame as d3m_dataframe -from d3m.metadata import hyperparams, params, base as metadata_base -from d3m import utils -from d3m.base import utils as base_utils -from d3m.exceptions import PrimitiveNotFittedError -from d3m.primitive_interfaces.base import CallResult, DockerContainer - -from d3m.primitive_interfaces.supervised_learning import SupervisedLearnerPrimitiveBase -from d3m.primitive_interfaces.base import ProbabilisticCompositionalityMixin, ContinueFitMixin -from d3m import exceptions -import pandas - - - -Inputs = d3m_dataframe -Outputs = d3m_dataframe - - -class Params(params.Params): - coef_: Optional[ndarray] - intercept_: Optional[ndarray] - n_iter_: Optional[int] - loss_function_: Optional[object] - classes_: Optional[ndarray] - _expanded_class_weight: Optional[ndarray] - t_: Optional[float] - C: Optional[float] - average_coef_: Optional[ndarray] - average_intercept_: Optional[ndarray] - standard_coef_: Optional[ndarray] - standard_intercept_: Optional[ndarray] - input_column_names: Optional[Any] - target_names_: Optional[Sequence[Any]] - training_indices_: Optional[Sequence[int]] - target_column_indices_: Optional[Sequence[int]] - target_columns_metadata_: Optional[List[OrderedDict]] - - - -class Hyperparams(hyperparams.Hyperparams): - loss = hyperparams.Enumeration[str]( - values=['hinge', 'log', 'modified_huber', 'squared_hinge', 'perceptron', 'squared_loss', 'huber', 'epsilon_insensitive', 'squared_epsilon_insensitive'], - default='hinge', - description='The loss function to be used. Defaults to \'hinge\', which gives a linear SVM. The possible options are \'hinge\', \'log\', \'modified_huber\', \'squared_hinge\', \'perceptron\', or a regression loss: \'squared_loss\', \'huber\', \'epsilon_insensitive\', or \'squared_epsilon_insensitive\'. The \'log\' loss gives logistic regression, a probabilistic classifier. \'modified_huber\' is another smooth loss that brings tolerance to outliers as well as probability estimates. \'squared_hinge\' is like hinge but is quadratically penalized. \'perceptron\' is the linear loss used by the perceptron algorithm. The other losses are designed for regression but can be useful in classification as well; see SGDRegressor for a description.', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] - ) - penalty = hyperparams.Enumeration[str]( - values=['l1', 'l2', 'elasticnet', 'none'], - default='l2', - description='The penalty (aka regularization term) to be used. Defaults to \'l2\' which is the standard regularizer for linear SVM models. \'l1\' and \'elasticnet\' might bring sparsity to the model (feature selection) not achievable with \'l2\'.', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] - ) - alpha = hyperparams.Bounded[float]( - lower=0, - upper=None, - default=0.0001, - description='Constant that multiplies the regularization term. Defaults to 0.0001 Also used to compute learning_rate when set to \'optimal\'.', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] - ) - l1_ratio = hyperparams.Bounded[float]( - lower=0, - upper=1, - default=0.15, - description='The Elastic Net mixing parameter, with 0 <= l1_ratio <= 1. l1_ratio=0 corresponds to L2 penalty, l1_ratio=1 to L1. Defaults to 0.15.', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] - ) - fit_intercept = hyperparams.UniformBool( - default=True, - description='Whether the intercept should be estimated or not. If False, the data is assumed to be already centered. Defaults to True.', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] - ) - shuffle = hyperparams.UniformBool( - default=True, - description='Whether or not the training data should be shuffled after each epoch. Defaults to True.', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] - ) - epsilon = hyperparams.Bounded[float]( - lower=0, - upper=None, - default=0.1, - description='Epsilon in the epsilon-insensitive loss functions; only if `loss` is \'huber\', \'epsilon_insensitive\', or \'squared_epsilon_insensitive\'. For \'huber\', determines the threshold at which it becomes less important to get the prediction exactly right. For epsilon-insensitive, any differences between the current prediction and the correct label are ignored if they are less than this threshold.', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] - ) - n_jobs = hyperparams.Union( - configuration=OrderedDict({ - 'limit': hyperparams.Bounded[int]( - default=1, - lower=1, - upper=None, - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'], - ), - 'all_cores': hyperparams.Constant( - default=-1, - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'], - ) - }), - default='limit', - description='The number of CPUs to use to do the OVA (One Versus All, for multi-class problems) computation. -1 means \'all CPUs\'. Defaults to 1.', - semantic_types=['https://metadata.datadrivendiscovery.org/types/ResourcesUseParameter'] - ) - learning_rate = hyperparams.Enumeration[str]( - values=['optimal', 'invscaling', 'constant', 'adaptive'], - default='optimal', - description='The learning rate schedule: - \'constant\': eta = eta0 - \'optimal\': eta = 1.0 / (alpha * (t + t0)) [default] - \'invscaling\': eta = eta0 / pow(t, power_t) where t0 is chosen by a heuristic proposed by Leon Bottou.', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] - ) - power_t = hyperparams.Bounded[float]( - lower=0, - upper=None, - default=0.5, - description='The exponent for inverse scaling learning rate [default 0.5].', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] - ) - warm_start = hyperparams.UniformBool( - default=False, - description='When set to True, reuse the solution of the previous call to fit as initialization, otherwise, just erase the previous solution.', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] - ) - average = hyperparams.Union( - configuration=OrderedDict({ - 'int': hyperparams.Bounded[int]( - default=2, - lower=2, - upper=None, - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'], - ), - 'bool': hyperparams.UniformBool( - default=False, - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'], - ) - }), - default='bool', - description='When set to True, computes the averaged SGD weights and stores the result in the ``coef_`` attribute. If set to an int greater than 1, averaging will begin once the total number of samples seen reaches average. So ``average=10`` will begin averaging after seeing 10 samples.', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] - ) - eta0 = hyperparams.Bounded[float]( - lower=0, - upper=None, - default=0.0, - description='The initial learning rate for the \'constant\' or \'invscaling\' schedules. The default value is 0.0 as eta0 is not used by the default schedule \'optimal\'.', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] - ) - max_iter = hyperparams.Union( - configuration=OrderedDict({ - 'int': hyperparams.Bounded[int]( - lower=0, - upper=None, - default=1000, - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'], - ), - 'none': hyperparams.Constant( - default=None, - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'], - ) - }), - default='int', - description='The maximum number of passes over the training data (aka epochs). It only impacts the behavior in the ``fit`` method, and not the `partial_fit`. Defaults to 5. Defaults to 1000 from 0.21, or if tol is not None. .. versionadded:: 0.19', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] - ) - tol = hyperparams.Union( - configuration=OrderedDict({ - 'float': hyperparams.Bounded[float]( - lower=0, - upper=None, - default=0.001, - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'], - ), - 'none': hyperparams.Constant( - default=None, - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'], - ) - }), - default='float', - description='The stopping criterion. If it is not None, the iterations will stop when (loss > previous_loss - tol). Defaults to None. Defaults to 1e-3 from 0.21. .. versionadded:: 0.19', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] - ) - class_weight = hyperparams.Union( - configuration=OrderedDict({ - 'str': hyperparams.Constant( - default='balanced', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'], - ), - 'none': hyperparams.Constant( - default=None, - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'], - ) - }), - default='none', - description='Preset for the class_weight fit parameter. Weights associated with classes. If not given, all classes are supposed to have weight one. The "balanced" mode uses the values of y to automatically adjust weights inversely proportional to class frequencies in the input data as ``n_samples / (n_classes * np.bincount(y))``', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] - ) - early_stopping = hyperparams.UniformBool( - default=False, - description='Whether to use early stopping to terminate training when validation score is not improving. If set to True, it will automatically set asid a fraction of training data as validation and terminate training whe validation score is not improving by at least tol fo n_iter_no_change consecutive epochs.', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] - ) - validation_fraction = hyperparams.Bounded[float]( - default=0.1, - lower=0, - upper=1, - description='The proportion of training data to set aside as validation set for early stopping. Must be between 0 and 1. Only used if early_stopping is True.', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] - ) - n_iter_no_change = hyperparams.Bounded[int]( - default=5, - lower=0, - upper=None, - description='Number of iterations with no improvement to wait before early stopping.', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] - ) - - use_inputs_columns = hyperparams.Set( - elements=hyperparams.Hyperparameter[int](-1), - default=(), - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="A set of column indices to force primitive to use as training input. If any specified column cannot be parsed, it is skipped.", - ) - use_outputs_columns = hyperparams.Set( - elements=hyperparams.Hyperparameter[int](-1), - default=(), - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="A set of column indices to force primitive to use as training target. If any specified column cannot be parsed, it is skipped.", - ) - exclude_inputs_columns = hyperparams.Set( - elements=hyperparams.Hyperparameter[int](-1), - default=(), - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="A set of column indices to not use as training inputs. Applicable only if \"use_columns\" is not provided.", - ) - exclude_outputs_columns = hyperparams.Set( - elements=hyperparams.Hyperparameter[int](-1), - default=(), - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="A set of column indices to not use as training target. Applicable only if \"use_columns\" is not provided.", - ) - return_result = hyperparams.Enumeration( - values=['append', 'replace', 'new'], - default='new', - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="Should parsed columns be appended, should they replace original columns, or should only parsed columns be returned? This hyperparam is ignored if use_semantic_types is set to false.", - ) - use_semantic_types = hyperparams.UniformBool( - default=False, - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="Controls whether semantic_types metadata will be used for filtering columns in input dataframe. Setting this to false makes the code ignore return_result and will produce only the output dataframe" - ) - add_index_columns = hyperparams.UniformBool( - default=False, - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="Also include primary index columns if input data has them. Applicable only if \"return_result\" is set to \"new\".", - ) - error_on_no_input = hyperparams.UniformBool( - default=True, - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="Throw an exception if no input column is selected/provided. Defaults to true to behave like sklearn. To prevent pipelines from breaking set this to False.", - ) - - return_semantic_type = hyperparams.Enumeration[str]( - values=['https://metadata.datadrivendiscovery.org/types/Attribute', 'https://metadata.datadrivendiscovery.org/types/ConstructedAttribute', 'https://metadata.datadrivendiscovery.org/types/PredictedTarget'], - default='https://metadata.datadrivendiscovery.org/types/PredictedTarget', - description='Decides what semantic type to attach to generated output', - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'] - ) - -class SKSGDClassifier(SupervisedLearnerPrimitiveBase[Inputs, Outputs, Params, Hyperparams], - ContinueFitMixin[Inputs, Outputs, Params, Hyperparams]): - """ - Primitive wrapping for sklearn SGDClassifier - `sklearn documentation `_ - - """ - - __author__ = "JPL MARVIN" - metadata = metadata_base.PrimitiveMetadata({ - "algorithm_types": [metadata_base.PrimitiveAlgorithmType.GRADIENT_DESCENT, ], - "name": "sklearn.linear_model.stochastic_gradient.SGDClassifier", - "primitive_family": metadata_base.PrimitiveFamily.CLASSIFICATION, - "python_path": "d3m.primitives.classification.sgd.SKlearn", - "source": {'name': 'JPL', 'contact': 'mailto:shah@jpl.nasa.gov', 'uris': ['https://gitlab.com/datadrivendiscovery/sklearn-wrap/issues', 'https://scikit-learn.org/stable/modules/generated/sklearn.linear_model.SGDClassifier.html']}, - "version": "2019.11.13", - "id": "2305e400-131e-356d-bf77-e8db19517b7a", - "hyperparams_to_tune": ['max_iter', 'penalty', 'alpha'], - 'installation': [ - {'type': metadata_base.PrimitiveInstallationType.PIP, - 'package_uri': 'git+https://gitlab.com/datadrivendiscovery/sklearn-wrap.git@{git_commit}#egg=sklearn_wrap'.format( - git_commit=utils.current_git_commit(os.path.dirname(__file__)), - ), - }] - }) - - def __init__(self, *, - hyperparams: Hyperparams, - random_seed: int = 0, - docker_containers: Dict[str, DockerContainer] = None, - _verbose: int = 0) -> None: - - super().__init__(hyperparams=hyperparams, random_seed=random_seed, docker_containers=docker_containers) - - # False - self._clf = SGDClassifier( - loss=self.hyperparams['loss'], - penalty=self.hyperparams['penalty'], - alpha=self.hyperparams['alpha'], - l1_ratio=self.hyperparams['l1_ratio'], - fit_intercept=self.hyperparams['fit_intercept'], - shuffle=self.hyperparams['shuffle'], - epsilon=self.hyperparams['epsilon'], - n_jobs=self.hyperparams['n_jobs'], - learning_rate=self.hyperparams['learning_rate'], - power_t=self.hyperparams['power_t'], - warm_start=self.hyperparams['warm_start'], - average=self.hyperparams['average'], - eta0=self.hyperparams['eta0'], - max_iter=self.hyperparams['max_iter'], - tol=self.hyperparams['tol'], - class_weight=self.hyperparams['class_weight'], - early_stopping=self.hyperparams['early_stopping'], - validation_fraction=self.hyperparams['validation_fraction'], - n_iter_no_change=self.hyperparams['n_iter_no_change'], - verbose=_verbose, - random_state=self.random_seed, - ) - - self._inputs = None - self._outputs = None - self._training_inputs = None - self._training_outputs = None - self._target_names = None - self._training_indices = None - self._target_column_indices = None - self._target_columns_metadata: List[OrderedDict] = None - self._input_column_names = None - self._fitted = False - self._new_training_data = False - - def set_training_data(self, *, inputs: Inputs, outputs: Outputs) -> None: - self._inputs = inputs - self._outputs = outputs - self._fitted = False - self._new_training_data = True - - def fit(self, *, timeout: float = None, iterations: int = None) -> CallResult[None]: - if self._inputs is None or self._outputs is None: - raise ValueError("Missing training data.") - - if not self._new_training_data: - return CallResult(None) - self._new_training_data = False - - self._training_inputs, self._training_indices = self._get_columns_to_fit(self._inputs, self.hyperparams) - self._training_outputs, self._target_names, self._target_column_indices = self._get_targets(self._outputs, self.hyperparams) - self._input_column_names = self._training_inputs.columns - - if len(self._training_indices) > 0 and len(self._target_column_indices) > 0: - self._target_columns_metadata = self._get_target_columns_metadata(self._training_outputs.metadata, self.hyperparams) - sk_training_output = self._training_outputs.values - - shape = sk_training_output.shape - if len(shape) == 2 and shape[1] == 1: - sk_training_output = numpy.ravel(sk_training_output) - - self._clf.fit(self._training_inputs, sk_training_output) - self._fitted = True - else: - if self.hyperparams['error_on_no_input']: - raise RuntimeError("No input columns were selected") - self.logger.warn("No input columns were selected") - - return CallResult(None) - - def continue_fit(self, *, timeout: float = None, iterations: int = None) -> CallResult[None]: - if self._training_inputs is None or self._training_outputs is None: - raise ValueError("Missing training data.") - - if not self._new_training_data: - return CallResult(None) - self._new_training_data = False - - if len(self._training_indices) > 0 and len(self._target_column_indices) > 0: - self._target_columns_metadata = self._get_target_columns_metadata(self._training_outputs.metadata, self.hyperparams) - sk_training_output = self._training_outputs.values - - shape = sk_training_output.shape - if len(shape) == 2 and shape[1] == 1: - sk_training_output = numpy.ravel(sk_training_output) - - self._clf.partial_fit(self._training_inputs, sk_training_output) - self._fitted = True - else: - if self.hyperparams['error_on_no_input']: - raise RuntimeError("No input columns were selected") - self.logger.warn("No input columns were selected") - - return CallResult(None) - - - def produce(self, *, inputs: Inputs, timeout: float = None, iterations: int = None) -> CallResult[Outputs]: - sk_inputs, columns_to_use = self._get_columns_to_fit(inputs, self.hyperparams) - output = [] - if len(sk_inputs.columns): - try: - sk_output = self._clf.predict(sk_inputs) - except sklearn.exceptions.NotFittedError as error: - raise PrimitiveNotFittedError("Primitive not fitted.") from error - # For primitives that allow predicting without fitting like GaussianProcessRegressor - if not self._fitted: - raise PrimitiveNotFittedError("Primitive not fitted.") - if sparse.issparse(sk_output): - sk_output = sk_output.toarray() - output = self._wrap_predictions(inputs, sk_output) - output.columns = self._target_names - output = [output] - else: - if self.hyperparams['error_on_no_input']: - raise RuntimeError("No input columns were selected") - self.logger.warn("No input columns were selected") - outputs = base_utils.combine_columns(return_result=self.hyperparams['return_result'], - add_index_columns=self.hyperparams['add_index_columns'], - inputs=inputs, column_indices=self._target_column_indices, - columns_list=output) - - return CallResult(outputs) - - - def get_params(self) -> Params: - if not self._fitted: - return Params( - coef_=None, - intercept_=None, - n_iter_=None, - loss_function_=None, - classes_=None, - _expanded_class_weight=None, - t_=None, - C=None, - average_coef_=None, - average_intercept_=None, - standard_coef_=None, - standard_intercept_=None, - input_column_names=self._input_column_names, - training_indices_=self._training_indices, - target_names_=self._target_names, - target_column_indices_=self._target_column_indices, - target_columns_metadata_=self._target_columns_metadata - ) - - return Params( - coef_=getattr(self._clf, 'coef_', None), - intercept_=getattr(self._clf, 'intercept_', None), - n_iter_=getattr(self._clf, 'n_iter_', None), - loss_function_=getattr(self._clf, 'loss_function_', None), - classes_=getattr(self._clf, 'classes_', None), - _expanded_class_weight=getattr(self._clf, '_expanded_class_weight', None), - t_=getattr(self._clf, 't_', None), - C=getattr(self._clf, 'C', None), - average_coef_=getattr(self._clf, 'average_coef_', None), - average_intercept_=getattr(self._clf, 'average_intercept_', None), - standard_coef_=getattr(self._clf, 'standard_coef_', None), - standard_intercept_=getattr(self._clf, 'standard_intercept_', None), - input_column_names=self._input_column_names, - training_indices_=self._training_indices, - target_names_=self._target_names, - target_column_indices_=self._target_column_indices, - target_columns_metadata_=self._target_columns_metadata - ) - - def set_params(self, *, params: Params) -> None: - self._clf.coef_ = params['coef_'] - self._clf.intercept_ = params['intercept_'] - self._clf.n_iter_ = params['n_iter_'] - self._clf.loss_function_ = params['loss_function_'] - self._clf.classes_ = params['classes_'] - self._clf._expanded_class_weight = params['_expanded_class_weight'] - self._clf.t_ = params['t_'] - self._clf.C = params['C'] - self._clf.average_coef_ = params['average_coef_'] - self._clf.average_intercept_ = params['average_intercept_'] - self._clf.standard_coef_ = params['standard_coef_'] - self._clf.standard_intercept_ = params['standard_intercept_'] - self._input_column_names = params['input_column_names'] - self._training_indices = params['training_indices_'] - self._target_names = params['target_names_'] - self._target_column_indices = params['target_column_indices_'] - self._target_columns_metadata = params['target_columns_metadata_'] - - if params['coef_'] is not None: - self._fitted = True - if params['intercept_'] is not None: - self._fitted = True - if params['n_iter_'] is not None: - self._fitted = True - if params['loss_function_'] is not None: - self._fitted = True - if params['classes_'] is not None: - self._fitted = True - if params['_expanded_class_weight'] is not None: - self._fitted = True - if params['t_'] is not None: - self._fitted = True - if params['C'] is not None: - self._fitted = True - if params['average_coef_'] is not None: - self._fitted = True - if params['average_intercept_'] is not None: - self._fitted = True - if params['standard_coef_'] is not None: - self._fitted = True - if params['standard_intercept_'] is not None: - self._fitted = True - - - - - - - - @classmethod - def _get_columns_to_fit(cls, inputs: Inputs, hyperparams: Hyperparams): - if not hyperparams['use_semantic_types']: - return inputs, list(range(len(inputs.columns))) - - inputs_metadata = inputs.metadata - - def can_produce_column(column_index: int) -> bool: - return cls._can_produce_column(inputs_metadata, column_index, hyperparams) - - columns_to_produce, columns_not_to_produce = base_utils.get_columns_to_use(inputs_metadata, - use_columns=hyperparams['use_inputs_columns'], - exclude_columns=hyperparams['exclude_inputs_columns'], - can_use_column=can_produce_column) - return inputs.iloc[:, columns_to_produce], columns_to_produce - # return columns_to_produce - - @classmethod - def _can_produce_column(cls, inputs_metadata: metadata_base.DataMetadata, column_index: int, hyperparams: Hyperparams) -> bool: - column_metadata = inputs_metadata.query((metadata_base.ALL_ELEMENTS, column_index)) - - accepted_structural_types = (int, float, numpy.integer, numpy.float64) - accepted_semantic_types = set() - accepted_semantic_types.add("https://metadata.datadrivendiscovery.org/types/Attribute") - if not issubclass(column_metadata['structural_type'], accepted_structural_types): - return False - - semantic_types = set(column_metadata.get('semantic_types', [])) - - if len(semantic_types) == 0: - cls.logger.warning("No semantic types found in column metadata") - return False - # Making sure all accepted_semantic_types are available in semantic_types - if len(accepted_semantic_types - semantic_types) == 0: - return True - - return False - - @classmethod - def _get_targets(cls, data: d3m_dataframe, hyperparams: Hyperparams): - if not hyperparams['use_semantic_types']: - return data, list(data.columns), list(range(len(data.columns))) - - metadata = data.metadata - - def can_produce_column(column_index: int) -> bool: - accepted_semantic_types = set() - accepted_semantic_types.add("https://metadata.datadrivendiscovery.org/types/TrueTarget") - column_metadata = metadata.query((metadata_base.ALL_ELEMENTS, column_index)) - semantic_types = set(column_metadata.get('semantic_types', [])) - if len(semantic_types) == 0: - cls.logger.warning("No semantic types found in column metadata") - return False - # Making sure all accepted_semantic_types are available in semantic_types - if len(accepted_semantic_types - semantic_types) == 0: - return True - return False - - target_column_indices, target_columns_not_to_produce = base_utils.get_columns_to_use(metadata, - use_columns=hyperparams[ - 'use_outputs_columns'], - exclude_columns= - hyperparams[ - 'exclude_outputs_columns'], - can_use_column=can_produce_column) - targets = [] - if target_column_indices: - targets = data.select_columns(target_column_indices) - target_column_names = [] - for idx in target_column_indices: - target_column_names.append(data.columns[idx]) - return targets, target_column_names, target_column_indices - - @classmethod - def _get_target_columns_metadata(cls, outputs_metadata: metadata_base.DataMetadata, hyperparams) -> List[OrderedDict]: - outputs_length = outputs_metadata.query((metadata_base.ALL_ELEMENTS,))['dimension']['length'] - - target_columns_metadata: List[OrderedDict] = [] - for column_index in range(outputs_length): - column_metadata = OrderedDict(outputs_metadata.query_column(column_index)) - - # Update semantic types and prepare it for predicted targets. - semantic_types = set(column_metadata.get('semantic_types', [])) - semantic_types_to_remove = set(["https://metadata.datadrivendiscovery.org/types/TrueTarget","https://metadata.datadrivendiscovery.org/types/SuggestedTarget",]) - add_semantic_types = set(["https://metadata.datadrivendiscovery.org/types/PredictedTarget",]) - add_semantic_types.add(hyperparams["return_semantic_type"]) - semantic_types = semantic_types - semantic_types_to_remove - semantic_types = semantic_types.union(add_semantic_types) - column_metadata['semantic_types'] = list(semantic_types) - - target_columns_metadata.append(column_metadata) - - return target_columns_metadata - - @classmethod - def _update_predictions_metadata(cls, inputs_metadata: metadata_base.DataMetadata, outputs: Optional[Outputs], - target_columns_metadata: List[OrderedDict]) -> metadata_base.DataMetadata: - outputs_metadata = metadata_base.DataMetadata().generate(value=outputs) - - for column_index, column_metadata in enumerate(target_columns_metadata): - column_metadata.pop("structural_type", None) - outputs_metadata = outputs_metadata.update_column(column_index, column_metadata) - - return outputs_metadata - - def _wrap_predictions(self, inputs: Inputs, predictions: ndarray) -> Outputs: - outputs = d3m_dataframe(predictions, generate_metadata=False) - outputs.metadata = self._update_predictions_metadata(inputs.metadata, outputs, self._target_columns_metadata) - return outputs - - - @classmethod - def _add_target_columns_metadata(cls, outputs_metadata: metadata_base.DataMetadata): - outputs_length = outputs_metadata.query((metadata_base.ALL_ELEMENTS,))['dimension']['length'] - - target_columns_metadata: List[OrderedDict] = [] - for column_index in range(outputs_length): - column_metadata = OrderedDict() - semantic_types = [] - semantic_types.append('https://metadata.datadrivendiscovery.org/types/PredictedTarget') - column_name = outputs_metadata.query((metadata_base.ALL_ELEMENTS, column_index)).get("name") - if column_name is None: - column_name = "output_{}".format(column_index) - column_metadata["semantic_types"] = semantic_types - column_metadata["name"] = str(column_name) - target_columns_metadata.append(column_metadata) - - return target_columns_metadata - - -SKSGDClassifier.__doc__ = SGDClassifier.__doc__ \ No newline at end of file diff --git a/common-primitives/sklearn-wrap/sklearn_wrap/SKSGDRegressor.py b/common-primitives/sklearn-wrap/sklearn_wrap/SKSGDRegressor.py deleted file mode 100644 index a6361ef..0000000 --- a/common-primitives/sklearn-wrap/sklearn_wrap/SKSGDRegressor.py +++ /dev/null @@ -1,643 +0,0 @@ -from typing import Any, Callable, List, Dict, Union, Optional, Sequence, Tuple -from numpy import ndarray -from collections import OrderedDict -from scipy import sparse -import os -import sklearn -import numpy -import typing - -# Custom import commands if any -from sklearn.linear_model.stochastic_gradient import SGDRegressor - - -from d3m.container.numpy import ndarray as d3m_ndarray -from d3m.container import DataFrame as d3m_dataframe -from d3m.metadata import hyperparams, params, base as metadata_base -from d3m import utils -from d3m.base import utils as base_utils -from d3m.exceptions import PrimitiveNotFittedError -from d3m.primitive_interfaces.base import CallResult, DockerContainer - -from d3m.primitive_interfaces.supervised_learning import SupervisedLearnerPrimitiveBase -from d3m.primitive_interfaces.base import ProbabilisticCompositionalityMixin, ContinueFitMixin -from d3m import exceptions -import pandas - - - -Inputs = d3m_dataframe -Outputs = d3m_dataframe - - -class Params(params.Params): - coef_: Optional[ndarray] - intercept_: Optional[ndarray] - average_coef_: Optional[ndarray] - average_intercept_: Optional[ndarray] - t_: Optional[float] - n_iter_: Optional[int] - C: Optional[float] - standard_coef_: Optional[ndarray] - standard_intercept_: Optional[ndarray] - input_column_names: Optional[Any] - target_names_: Optional[Sequence[Any]] - training_indices_: Optional[Sequence[int]] - target_column_indices_: Optional[Sequence[int]] - target_columns_metadata_: Optional[List[OrderedDict]] - - - -class Hyperparams(hyperparams.Hyperparams): - loss = hyperparams.Choice( - choices={ - 'squared_loss': hyperparams.Hyperparams.define( - configuration=OrderedDict({}) - ), - 'huber': hyperparams.Hyperparams.define( - configuration=OrderedDict({ - 'epsilon': hyperparams.Bounded[float]( - default=0.1, - lower=0, - upper=None, - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] - ) - }) - ), - 'epsilon_insensitive': hyperparams.Hyperparams.define( - configuration=OrderedDict({ - 'epsilon': hyperparams.Bounded[float]( - default=0.1, - lower=0, - upper=None, - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] - ) - }) - ), - 'squared_epsilon_insensitive': hyperparams.Hyperparams.define( - configuration=OrderedDict({ - 'epsilon': hyperparams.Bounded[float]( - default=0.1, - lower=0, - upper=None, - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] - ) - }) - ) - }, - default='squared_loss', - description='The loss function to be used. Defaults to \'squared_loss\' which refers to the ordinary least squares fit. \'huber\' modifies \'squared_loss\' to focus less on getting outliers correct by switching from squared to linear loss past a distance of epsilon. \'epsilon_insensitive\' ignores errors less than epsilon and is linear past that; this is the loss function used in SVR. \'squared_epsilon_insensitive\' is the same but becomes squared loss past a tolerance of epsilon.', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] - ) - penalty = hyperparams.Union( - configuration=OrderedDict({ - 'str': hyperparams.Enumeration[str]( - values=['l1', 'l2', 'elasticnet'], - default='l2', - description='The penalty (aka regularization term) to be used. Defaults to \'l2\' which is the standard regularizer for linear SVM models. \'l1\' and \'elasticnet\' might bring sparsity to the model (feature selection) not achievable with \'l2\'.', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'], - ), - 'none': hyperparams.Constant( - default=None, - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'], - ) - }), - default='str', - description='The penalty (aka regularization term) to be used. Defaults to \'l2\' which is the standard regularizer for linear SVM models. \'l1\' and \'elasticnet\' might bring sparsity to the model (feature selection) not achievable with \'l2\'.', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] - ) - alpha = hyperparams.Bounded[float]( - lower=0, - upper=None, - default=0.0001, - description='Constant that multiplies the regularization term. Defaults to 0.0001 Also used to compute learning_rate when set to \'optimal\'. l1_ratio : float The Elastic Net mixing parameter, with 0 <= l1_ratio <= 1. l1_ratio=0 corresponds to L2 penalty, l1_ratio=1 to L1. Defaults to 0.15.', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] - ) - l1_ratio = hyperparams.Bounded[float]( - lower=0, - upper=1, - default=0.15, - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] - ) - fit_intercept = hyperparams.UniformBool( - default=True, - description='Whether the intercept should be estimated or not. If False, the data is assumed to be already centered. Defaults to True.', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] - ) - max_iter = hyperparams.Union( - configuration=OrderedDict({ - 'int': hyperparams.Bounded[int]( - lower=0, - upper=None, - default=1000, - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'], - ), - 'none': hyperparams.Constant( - default=None, - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'], - ) - }), - default='int', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] - ) - tol = hyperparams.Union( - configuration=OrderedDict({ - 'float': hyperparams.Bounded[float]( - lower=0, - upper=None, - default=0.001, - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'], - ), - 'none': hyperparams.Constant( - default=None, - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'], - ) - }), - default='float', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] - ) - shuffle = hyperparams.UniformBool( - default=True, - description='Whether or not the training data should be shuffled after each epoch. Defaults to True.', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] - ) - learning_rate = hyperparams.Enumeration[str]( - values=['optimal', 'invscaling', 'constant', 'adaptive'], - default='invscaling', - description='The learning rate schedule: - \'constant\': eta = eta0 - \'optimal\': eta = 1.0 / (alpha * (t + t0)) [default] - \'invscaling\': eta = eta0 / pow(t, power_t) where t0 is chosen by a heuristic proposed by Leon Bottou. eta0 : double, optional The initial learning rate [default 0.01].', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] - ) - eta0 = hyperparams.Bounded[float]( - lower=0, - upper=None, - default=0.01, - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] - ) - power_t = hyperparams.Bounded[float]( - lower=0, - upper=None, - default=0.25, - description='The exponent for inverse scaling learning rate [default 0.25].', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] - ) - warm_start = hyperparams.UniformBool( - default=False, - description='When set to True, reuse the solution of the previous call to fit as initialization, otherwise, just erase the previous solution.', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] - ) - average = hyperparams.Union( - configuration=OrderedDict({ - 'int': hyperparams.Bounded[int]( - default=2, - lower=2, - upper=None, - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'], - ), - 'bool': hyperparams.UniformBool( - default=False, - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'], - ) - }), - default='bool', - description='When set to True, computes the averaged SGD weights and stores the result in the ``coef_`` attribute. If set to an int greater than 1, averaging will begin once the total number of samples seen reaches average. So ``average=10`` will begin averaging after seeing 10 samples.', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] - ) - early_stopping = hyperparams.UniformBool( - default=False, - description='Whether to use early stopping to terminate training when validation score is not improving. If set to True, it will automatically set asid a fraction of training data as validation and terminate training whe validation score is not improving by at least tol fo n_iter_no_change consecutive epochs.', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] - ) - validation_fraction = hyperparams.Bounded[float]( - default=0.1, - lower=0, - upper=1, - description='The proportion of training data to set aside as validation set for early stopping. Must be between 0 and 1. Only used if early_stopping is True.', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] - ) - n_iter_no_change = hyperparams.Bounded[int]( - default=5, - lower=0, - upper=None, - description='Number of iterations with no improvement to wait before early stopping.', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] - ) - - use_inputs_columns = hyperparams.Set( - elements=hyperparams.Hyperparameter[int](-1), - default=(), - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="A set of column indices to force primitive to use as training input. If any specified column cannot be parsed, it is skipped.", - ) - use_outputs_columns = hyperparams.Set( - elements=hyperparams.Hyperparameter[int](-1), - default=(), - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="A set of column indices to force primitive to use as training target. If any specified column cannot be parsed, it is skipped.", - ) - exclude_inputs_columns = hyperparams.Set( - elements=hyperparams.Hyperparameter[int](-1), - default=(), - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="A set of column indices to not use as training inputs. Applicable only if \"use_columns\" is not provided.", - ) - exclude_outputs_columns = hyperparams.Set( - elements=hyperparams.Hyperparameter[int](-1), - default=(), - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="A set of column indices to not use as training target. Applicable only if \"use_columns\" is not provided.", - ) - return_result = hyperparams.Enumeration( - values=['append', 'replace', 'new'], - default='new', - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="Should parsed columns be appended, should they replace original columns, or should only parsed columns be returned? This hyperparam is ignored if use_semantic_types is set to false.", - ) - use_semantic_types = hyperparams.UniformBool( - default=False, - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="Controls whether semantic_types metadata will be used for filtering columns in input dataframe. Setting this to false makes the code ignore return_result and will produce only the output dataframe" - ) - add_index_columns = hyperparams.UniformBool( - default=False, - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="Also include primary index columns if input data has them. Applicable only if \"return_result\" is set to \"new\".", - ) - error_on_no_input = hyperparams.UniformBool( - default=True, - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="Throw an exception if no input column is selected/provided. Defaults to true to behave like sklearn. To prevent pipelines from breaking set this to False.", - ) - - return_semantic_type = hyperparams.Enumeration[str]( - values=['https://metadata.datadrivendiscovery.org/types/Attribute', 'https://metadata.datadrivendiscovery.org/types/ConstructedAttribute', 'https://metadata.datadrivendiscovery.org/types/PredictedTarget'], - default='https://metadata.datadrivendiscovery.org/types/PredictedTarget', - description='Decides what semantic type to attach to generated output', - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'] - ) - -class SKSGDRegressor(SupervisedLearnerPrimitiveBase[Inputs, Outputs, Params, Hyperparams], - ContinueFitMixin[Inputs, Outputs, Params, Hyperparams]): - """ - Primitive wrapping for sklearn SGDRegressor - `sklearn documentation `_ - - """ - - __author__ = "JPL MARVIN" - metadata = metadata_base.PrimitiveMetadata({ - "algorithm_types": [metadata_base.PrimitiveAlgorithmType.GRADIENT_DESCENT, ], - "name": "sklearn.linear_model.stochastic_gradient.SGDRegressor", - "primitive_family": metadata_base.PrimitiveFamily.REGRESSION, - "python_path": "d3m.primitives.regression.sgd.SKlearn", - "source": {'name': 'JPL', 'contact': 'mailto:shah@jpl.nasa.gov', 'uris': ['https://gitlab.com/datadrivendiscovery/sklearn-wrap/issues', 'https://scikit-learn.org/stable/modules/generated/sklearn.linear_model.SGDRegressor.html']}, - "version": "2019.11.13", - "id": "db3a7669-72e1-3c95-91c1-0c2a3f137d78", - "hyperparams_to_tune": ['max_iter', 'penalty', 'alpha'], - 'installation': [ - {'type': metadata_base.PrimitiveInstallationType.PIP, - 'package_uri': 'git+https://gitlab.com/datadrivendiscovery/sklearn-wrap.git@{git_commit}#egg=sklearn_wrap'.format( - git_commit=utils.current_git_commit(os.path.dirname(__file__)), - ), - }] - }) - - def __init__(self, *, - hyperparams: Hyperparams, - random_seed: int = 0, - docker_containers: Dict[str, DockerContainer] = None, - _verbose: int = 0) -> None: - - super().__init__(hyperparams=hyperparams, random_seed=random_seed, docker_containers=docker_containers) - - # False - self._clf = SGDRegressor( - loss=self.hyperparams['loss']['choice'], - epsilon=self.hyperparams['loss'].get('epsilon', 0.1), - penalty=self.hyperparams['penalty'], - alpha=self.hyperparams['alpha'], - l1_ratio=self.hyperparams['l1_ratio'], - fit_intercept=self.hyperparams['fit_intercept'], - max_iter=self.hyperparams['max_iter'], - tol=self.hyperparams['tol'], - shuffle=self.hyperparams['shuffle'], - learning_rate=self.hyperparams['learning_rate'], - eta0=self.hyperparams['eta0'], - power_t=self.hyperparams['power_t'], - warm_start=self.hyperparams['warm_start'], - average=self.hyperparams['average'], - early_stopping=self.hyperparams['early_stopping'], - validation_fraction=self.hyperparams['validation_fraction'], - n_iter_no_change=self.hyperparams['n_iter_no_change'], - verbose=_verbose, - random_state=self.random_seed, - ) - - self._inputs = None - self._outputs = None - self._training_inputs = None - self._training_outputs = None - self._target_names = None - self._training_indices = None - self._target_column_indices = None - self._target_columns_metadata: List[OrderedDict] = None - self._input_column_names = None - self._fitted = False - self._new_training_data = False - - def set_training_data(self, *, inputs: Inputs, outputs: Outputs) -> None: - self._inputs = inputs - self._outputs = outputs - self._fitted = False - self._new_training_data = True - - def fit(self, *, timeout: float = None, iterations: int = None) -> CallResult[None]: - if self._inputs is None or self._outputs is None: - raise ValueError("Missing training data.") - - if not self._new_training_data: - return CallResult(None) - self._new_training_data = False - - self._training_inputs, self._training_indices = self._get_columns_to_fit(self._inputs, self.hyperparams) - self._training_outputs, self._target_names, self._target_column_indices = self._get_targets(self._outputs, self.hyperparams) - self._input_column_names = self._training_inputs.columns - - if len(self._training_indices) > 0 and len(self._target_column_indices) > 0: - self._target_columns_metadata = self._get_target_columns_metadata(self._training_outputs.metadata, self.hyperparams) - sk_training_output = self._training_outputs.values - - shape = sk_training_output.shape - if len(shape) == 2 and shape[1] == 1: - sk_training_output = numpy.ravel(sk_training_output) - - self._clf.fit(self._training_inputs, sk_training_output) - self._fitted = True - else: - if self.hyperparams['error_on_no_input']: - raise RuntimeError("No input columns were selected") - self.logger.warn("No input columns were selected") - - return CallResult(None) - - def continue_fit(self, *, timeout: float = None, iterations: int = None) -> CallResult[None]: - if self._training_inputs is None or self._training_outputs is None: - raise ValueError("Missing training data.") - - if not self._new_training_data: - return CallResult(None) - self._new_training_data = False - - if len(self._training_indices) > 0 and len(self._target_column_indices) > 0: - self._target_columns_metadata = self._get_target_columns_metadata(self._training_outputs.metadata, self.hyperparams) - sk_training_output = self._training_outputs.values - - shape = sk_training_output.shape - if len(shape) == 2 and shape[1] == 1: - sk_training_output = numpy.ravel(sk_training_output) - - self._clf.partial_fit(self._training_inputs, sk_training_output) - self._fitted = True - else: - if self.hyperparams['error_on_no_input']: - raise RuntimeError("No input columns were selected") - self.logger.warn("No input columns were selected") - - return CallResult(None) - - - def produce(self, *, inputs: Inputs, timeout: float = None, iterations: int = None) -> CallResult[Outputs]: - sk_inputs, columns_to_use = self._get_columns_to_fit(inputs, self.hyperparams) - output = [] - if len(sk_inputs.columns): - try: - sk_output = self._clf.predict(sk_inputs) - except sklearn.exceptions.NotFittedError as error: - raise PrimitiveNotFittedError("Primitive not fitted.") from error - # For primitives that allow predicting without fitting like GaussianProcessRegressor - if not self._fitted: - raise PrimitiveNotFittedError("Primitive not fitted.") - if sparse.issparse(sk_output): - sk_output = sk_output.toarray() - output = self._wrap_predictions(inputs, sk_output) - output.columns = self._target_names - output = [output] - else: - if self.hyperparams['error_on_no_input']: - raise RuntimeError("No input columns were selected") - self.logger.warn("No input columns were selected") - outputs = base_utils.combine_columns(return_result=self.hyperparams['return_result'], - add_index_columns=self.hyperparams['add_index_columns'], - inputs=inputs, column_indices=self._target_column_indices, - columns_list=output) - - return CallResult(outputs) - - - def get_params(self) -> Params: - if not self._fitted: - return Params( - coef_=None, - intercept_=None, - average_coef_=None, - average_intercept_=None, - t_=None, - n_iter_=None, - C=None, - standard_coef_=None, - standard_intercept_=None, - input_column_names=self._input_column_names, - training_indices_=self._training_indices, - target_names_=self._target_names, - target_column_indices_=self._target_column_indices, - target_columns_metadata_=self._target_columns_metadata - ) - - return Params( - coef_=getattr(self._clf, 'coef_', None), - intercept_=getattr(self._clf, 'intercept_', None), - average_coef_=getattr(self._clf, 'average_coef_', None), - average_intercept_=getattr(self._clf, 'average_intercept_', None), - t_=getattr(self._clf, 't_', None), - n_iter_=getattr(self._clf, 'n_iter_', None), - C=getattr(self._clf, 'C', None), - standard_coef_=getattr(self._clf, 'standard_coef_', None), - standard_intercept_=getattr(self._clf, 'standard_intercept_', None), - input_column_names=self._input_column_names, - training_indices_=self._training_indices, - target_names_=self._target_names, - target_column_indices_=self._target_column_indices, - target_columns_metadata_=self._target_columns_metadata - ) - - def set_params(self, *, params: Params) -> None: - self._clf.coef_ = params['coef_'] - self._clf.intercept_ = params['intercept_'] - self._clf.average_coef_ = params['average_coef_'] - self._clf.average_intercept_ = params['average_intercept_'] - self._clf.t_ = params['t_'] - self._clf.n_iter_ = params['n_iter_'] - self._clf.C = params['C'] - self._clf.standard_coef_ = params['standard_coef_'] - self._clf.standard_intercept_ = params['standard_intercept_'] - self._input_column_names = params['input_column_names'] - self._training_indices = params['training_indices_'] - self._target_names = params['target_names_'] - self._target_column_indices = params['target_column_indices_'] - self._target_columns_metadata = params['target_columns_metadata_'] - - if params['coef_'] is not None: - self._fitted = True - if params['intercept_'] is not None: - self._fitted = True - if params['average_coef_'] is not None: - self._fitted = True - if params['average_intercept_'] is not None: - self._fitted = True - if params['t_'] is not None: - self._fitted = True - if params['n_iter_'] is not None: - self._fitted = True - if params['C'] is not None: - self._fitted = True - if params['standard_coef_'] is not None: - self._fitted = True - if params['standard_intercept_'] is not None: - self._fitted = True - - - - - - - - @classmethod - def _get_columns_to_fit(cls, inputs: Inputs, hyperparams: Hyperparams): - if not hyperparams['use_semantic_types']: - return inputs, list(range(len(inputs.columns))) - - inputs_metadata = inputs.metadata - - def can_produce_column(column_index: int) -> bool: - return cls._can_produce_column(inputs_metadata, column_index, hyperparams) - - columns_to_produce, columns_not_to_produce = base_utils.get_columns_to_use(inputs_metadata, - use_columns=hyperparams['use_inputs_columns'], - exclude_columns=hyperparams['exclude_inputs_columns'], - can_use_column=can_produce_column) - return inputs.iloc[:, columns_to_produce], columns_to_produce - # return columns_to_produce - - @classmethod - def _can_produce_column(cls, inputs_metadata: metadata_base.DataMetadata, column_index: int, hyperparams: Hyperparams) -> bool: - column_metadata = inputs_metadata.query((metadata_base.ALL_ELEMENTS, column_index)) - - accepted_structural_types = (int, float, numpy.integer, numpy.float64) - accepted_semantic_types = set() - accepted_semantic_types.add("https://metadata.datadrivendiscovery.org/types/Attribute") - if not issubclass(column_metadata['structural_type'], accepted_structural_types): - return False - - semantic_types = set(column_metadata.get('semantic_types', [])) - - if len(semantic_types) == 0: - cls.logger.warning("No semantic types found in column metadata") - return False - # Making sure all accepted_semantic_types are available in semantic_types - if len(accepted_semantic_types - semantic_types) == 0: - return True - - return False - - @classmethod - def _get_targets(cls, data: d3m_dataframe, hyperparams: Hyperparams): - if not hyperparams['use_semantic_types']: - return data, list(data.columns), list(range(len(data.columns))) - - metadata = data.metadata - - def can_produce_column(column_index: int) -> bool: - accepted_semantic_types = set() - accepted_semantic_types.add("https://metadata.datadrivendiscovery.org/types/TrueTarget") - column_metadata = metadata.query((metadata_base.ALL_ELEMENTS, column_index)) - semantic_types = set(column_metadata.get('semantic_types', [])) - if len(semantic_types) == 0: - cls.logger.warning("No semantic types found in column metadata") - return False - # Making sure all accepted_semantic_types are available in semantic_types - if len(accepted_semantic_types - semantic_types) == 0: - return True - return False - - target_column_indices, target_columns_not_to_produce = base_utils.get_columns_to_use(metadata, - use_columns=hyperparams[ - 'use_outputs_columns'], - exclude_columns= - hyperparams[ - 'exclude_outputs_columns'], - can_use_column=can_produce_column) - targets = [] - if target_column_indices: - targets = data.select_columns(target_column_indices) - target_column_names = [] - for idx in target_column_indices: - target_column_names.append(data.columns[idx]) - return targets, target_column_names, target_column_indices - - @classmethod - def _get_target_columns_metadata(cls, outputs_metadata: metadata_base.DataMetadata, hyperparams) -> List[OrderedDict]: - outputs_length = outputs_metadata.query((metadata_base.ALL_ELEMENTS,))['dimension']['length'] - - target_columns_metadata: List[OrderedDict] = [] - for column_index in range(outputs_length): - column_metadata = OrderedDict(outputs_metadata.query_column(column_index)) - - # Update semantic types and prepare it for predicted targets. - semantic_types = set(column_metadata.get('semantic_types', [])) - semantic_types_to_remove = set(["https://metadata.datadrivendiscovery.org/types/TrueTarget","https://metadata.datadrivendiscovery.org/types/SuggestedTarget",]) - add_semantic_types = set(["https://metadata.datadrivendiscovery.org/types/PredictedTarget",]) - add_semantic_types.add(hyperparams["return_semantic_type"]) - semantic_types = semantic_types - semantic_types_to_remove - semantic_types = semantic_types.union(add_semantic_types) - column_metadata['semantic_types'] = list(semantic_types) - - target_columns_metadata.append(column_metadata) - - return target_columns_metadata - - @classmethod - def _update_predictions_metadata(cls, inputs_metadata: metadata_base.DataMetadata, outputs: Optional[Outputs], - target_columns_metadata: List[OrderedDict]) -> metadata_base.DataMetadata: - outputs_metadata = metadata_base.DataMetadata().generate(value=outputs) - - for column_index, column_metadata in enumerate(target_columns_metadata): - column_metadata.pop("structural_type", None) - outputs_metadata = outputs_metadata.update_column(column_index, column_metadata) - - return outputs_metadata - - def _wrap_predictions(self, inputs: Inputs, predictions: ndarray) -> Outputs: - outputs = d3m_dataframe(predictions, generate_metadata=False) - outputs.metadata = self._update_predictions_metadata(inputs.metadata, outputs, self._target_columns_metadata) - return outputs - - - @classmethod - def _add_target_columns_metadata(cls, outputs_metadata: metadata_base.DataMetadata): - outputs_length = outputs_metadata.query((metadata_base.ALL_ELEMENTS,))['dimension']['length'] - - target_columns_metadata: List[OrderedDict] = [] - for column_index in range(outputs_length): - column_metadata = OrderedDict() - semantic_types = [] - semantic_types.append('https://metadata.datadrivendiscovery.org/types/PredictedTarget') - column_name = outputs_metadata.query((metadata_base.ALL_ELEMENTS, column_index)).get("name") - if column_name is None: - column_name = "output_{}".format(column_index) - column_metadata["semantic_types"] = semantic_types - column_metadata["name"] = str(column_name) - target_columns_metadata.append(column_metadata) - - return target_columns_metadata - - -SKSGDRegressor.__doc__ = SGDRegressor.__doc__ \ No newline at end of file diff --git a/common-primitives/sklearn-wrap/sklearn_wrap/SKSVC.py b/common-primitives/sklearn-wrap/sklearn_wrap/SKSVC.py deleted file mode 100644 index c8f60e5..0000000 --- a/common-primitives/sklearn-wrap/sklearn_wrap/SKSVC.py +++ /dev/null @@ -1,635 +0,0 @@ -from typing import Any, Callable, List, Dict, Union, Optional, Sequence, Tuple -from numpy import ndarray -from collections import OrderedDict -from scipy import sparse -import os -import sklearn -import numpy -import typing - -# Custom import commands if any -from sklearn.svm.classes import SVC - - -from d3m.container.numpy import ndarray as d3m_ndarray -from d3m.container import DataFrame as d3m_dataframe -from d3m.metadata import hyperparams, params, base as metadata_base -from d3m import utils -from d3m.base import utils as base_utils -from d3m.exceptions import PrimitiveNotFittedError -from d3m.primitive_interfaces.base import CallResult, DockerContainer - -from d3m.primitive_interfaces.supervised_learning import SupervisedLearnerPrimitiveBase -from d3m.primitive_interfaces.base import ProbabilisticCompositionalityMixin, ContinueFitMixin -from d3m import exceptions -import pandas - - - -Inputs = d3m_dataframe -Outputs = d3m_dataframe - - -class Params(params.Params): - support_: Optional[ndarray] - support_vectors_: Optional[ndarray] - n_support_: Optional[ndarray] - dual_coef_: Optional[ndarray] - intercept_: Optional[ndarray] - _sparse: Optional[bool] - shape_fit_: Optional[tuple] - _dual_coef_: Optional[ndarray] - _intercept_: Optional[ndarray] - probA_: Optional[ndarray] - probB_: Optional[ndarray] - _gamma: Optional[float] - classes_: Optional[ndarray] - class_weight_: Optional[ndarray] - fit_status_: Optional[int] - epsilon: Optional[float] - nu: Optional[float] - input_column_names: Optional[Any] - target_names_: Optional[Sequence[Any]] - training_indices_: Optional[Sequence[int]] - target_column_indices_: Optional[Sequence[int]] - target_columns_metadata_: Optional[List[OrderedDict]] - - - -class Hyperparams(hyperparams.Hyperparams): - C = hyperparams.Bounded[float]( - default=1, - lower=0, - upper=None, - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'], - description='Penalty parameter C of the error term.' - ) - kernel = hyperparams.Choice( - choices={ - 'linear': hyperparams.Hyperparams.define( - configuration=OrderedDict({}) - ), - 'poly': hyperparams.Hyperparams.define( - configuration=OrderedDict({ - 'degree': hyperparams.Bounded[int]( - default=3, - lower=0, - upper=None, - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] - ), - 'gamma': hyperparams.Union( - configuration=OrderedDict({ - 'float': hyperparams.Bounded[float]( - default=0.1, - lower=0, - upper=None, - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'], - ), - 'auto': hyperparams.Constant( - default='auto', - description='1/n_features will be used.', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'], - ) - }), - default='auto', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] - ), - 'coef0': hyperparams.Constant( - default=0, - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] - ) - }) - ), - 'rbf': hyperparams.Hyperparams.define( - configuration=OrderedDict({ - 'gamma': hyperparams.Union( - configuration=OrderedDict({ - 'float': hyperparams.Bounded[float]( - default=0.1, - lower=0, - upper=None, - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'], - ), - 'auto': hyperparams.Constant( - default='auto', - description='1/n_features will be used.', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'], - ) - }), - default='auto', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] - ) - }) - ), - 'sigmoid': hyperparams.Hyperparams.define( - configuration=OrderedDict({ - 'gamma': hyperparams.Union( - configuration=OrderedDict({ - 'float': hyperparams.Bounded[float]( - default=0.1, - lower=0, - upper=None, - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'], - ), - 'auto': hyperparams.Constant( - default='auto', - description='1/n_features will be used.', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'], - ) - }), - default='auto', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] - ), - 'coef0': hyperparams.Constant( - default=0, - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] - ) - }) - ) - }, - default='rbf', - description='Specifies the kernel type to be used in the algorithm. It must be one of \'linear\', \'poly\', \'rbf\', \'sigmoid\', \'precomputed\' or a callable. If none is given, \'rbf\' will be used. If a callable is given it is used to pre-compute the kernel matrix from data matrices; that matrix should be an array of shape ``(n_samples, n_samples)``.', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] - ) - probability = hyperparams.UniformBool( - default=False, - description='Whether to enable probability estimates. This must be enabled prior to calling `fit`, and will slow down that method.', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] - ) - shrinking = hyperparams.UniformBool( - default=True, - description='Whether to use the shrinking heuristic.', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] - ) - tol = hyperparams.Bounded[float]( - default=0.001, - lower=0, - upper=None, - description='Tolerance for stopping criterion.', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] - ) - cache_size = hyperparams.Bounded[float]( - default=200, - lower=0, - upper=None, - description='Specify the size of the kernel cache (in MB).', - semantic_types=['https://metadata.datadrivendiscovery.org/types/ResourcesUseParameter'] - ) - class_weight = hyperparams.Union( - configuration=OrderedDict({ - 'str': hyperparams.Constant( - default='balanced', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'], - ), - 'none': hyperparams.Constant( - default=None, - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'], - ) - }), - default='none', - description='Set the parameter C of class i to class_weight[i]*C for SVC. If not given, all classes are supposed to have weight one. The "balanced" mode uses the values of y to automatically adjust weights inversely proportional to class frequencies in the input data as ``n_samples / (n_classes * np.bincount(y))``', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] - ) - max_iter = hyperparams.Bounded[int]( - default=-1, - lower=-1, - upper=None, - description='Hard limit on iterations within solver, or -1 for no limit.', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] - ) - decision_function_shape = hyperparams.Enumeration[str]( - values=['ovr', 'ovo'], - default='ovr', - description='Whether to return a one-vs-rest (\'ovr\') decision function of shape (n_samples, n_classes) as all other classifiers, or the original one-vs-one (\'ovo\') decision function of libsvm which has shape (n_samples, n_classes * (n_classes - 1) / 2). The default of None will currently behave as \'ovo\' for backward compatibility and raise a deprecation warning, but will change \'ovr\' in 0.19. .. versionadded:: 0.17 *decision_function_shape=\'ovr\'* is recommended. .. versionchanged:: 0.17 Deprecated *decision_function_shape=\'ovo\' and None*.', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] - ) - - use_inputs_columns = hyperparams.Set( - elements=hyperparams.Hyperparameter[int](-1), - default=(), - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="A set of column indices to force primitive to use as training input. If any specified column cannot be parsed, it is skipped.", - ) - use_outputs_columns = hyperparams.Set( - elements=hyperparams.Hyperparameter[int](-1), - default=(), - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="A set of column indices to force primitive to use as training target. If any specified column cannot be parsed, it is skipped.", - ) - exclude_inputs_columns = hyperparams.Set( - elements=hyperparams.Hyperparameter[int](-1), - default=(), - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="A set of column indices to not use as training inputs. Applicable only if \"use_columns\" is not provided.", - ) - exclude_outputs_columns = hyperparams.Set( - elements=hyperparams.Hyperparameter[int](-1), - default=(), - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="A set of column indices to not use as training target. Applicable only if \"use_columns\" is not provided.", - ) - return_result = hyperparams.Enumeration( - values=['append', 'replace', 'new'], - default='new', - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="Should parsed columns be appended, should they replace original columns, or should only parsed columns be returned? This hyperparam is ignored if use_semantic_types is set to false.", - ) - use_semantic_types = hyperparams.UniformBool( - default=False, - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="Controls whether semantic_types metadata will be used for filtering columns in input dataframe. Setting this to false makes the code ignore return_result and will produce only the output dataframe" - ) - add_index_columns = hyperparams.UniformBool( - default=False, - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="Also include primary index columns if input data has them. Applicable only if \"return_result\" is set to \"new\".", - ) - error_on_no_input = hyperparams.UniformBool( - default=True, - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="Throw an exception if no input column is selected/provided. Defaults to true to behave like sklearn. To prevent pipelines from breaking set this to False.", - ) - - return_semantic_type = hyperparams.Enumeration[str]( - values=['https://metadata.datadrivendiscovery.org/types/Attribute', 'https://metadata.datadrivendiscovery.org/types/ConstructedAttribute', 'https://metadata.datadrivendiscovery.org/types/PredictedTarget'], - default='https://metadata.datadrivendiscovery.org/types/PredictedTarget', - description='Decides what semantic type to attach to generated output', - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'] - ) - -class SKSVC(SupervisedLearnerPrimitiveBase[Inputs, Outputs, Params, Hyperparams]): - """ - Primitive wrapping for sklearn SVC - `sklearn documentation `_ - - """ - - __author__ = "JPL MARVIN" - metadata = metadata_base.PrimitiveMetadata({ - "algorithm_types": [metadata_base.PrimitiveAlgorithmType.SUPPORT_VECTOR_MACHINE, ], - "name": "sklearn.svm.classes.SVC", - "primitive_family": metadata_base.PrimitiveFamily.CLASSIFICATION, - "python_path": "d3m.primitives.classification.svc.SKlearn", - "source": {'name': 'JPL', 'contact': 'mailto:shah@jpl.nasa.gov', 'uris': ['https://gitlab.com/datadrivendiscovery/sklearn-wrap/issues', 'https://scikit-learn.org/stable/modules/generated/sklearn.svm.SVC.html']}, - "version": "2019.11.13", - "id": "0ae7d42d-f765-3348-a28c-57d94880aa6a", - "hyperparams_to_tune": ['C', 'kernel'], - 'installation': [ - {'type': metadata_base.PrimitiveInstallationType.PIP, - 'package_uri': 'git+https://gitlab.com/datadrivendiscovery/sklearn-wrap.git@{git_commit}#egg=sklearn_wrap'.format( - git_commit=utils.current_git_commit(os.path.dirname(__file__)), - ), - }] - }) - - def __init__(self, *, - hyperparams: Hyperparams, - random_seed: int = 0, - docker_containers: Dict[str, DockerContainer] = None, - _verbose: int = 0) -> None: - - super().__init__(hyperparams=hyperparams, random_seed=random_seed, docker_containers=docker_containers) - - # False - self._clf = SVC( - C=self.hyperparams['C'], - kernel=self.hyperparams['kernel']['choice'], - degree=self.hyperparams['kernel'].get('degree', 3), - gamma=self.hyperparams['kernel'].get('gamma', 'auto'), - coef0=self.hyperparams['kernel'].get('coef0', 0), - probability=self.hyperparams['probability'], - shrinking=self.hyperparams['shrinking'], - tol=self.hyperparams['tol'], - cache_size=self.hyperparams['cache_size'], - class_weight=self.hyperparams['class_weight'], - max_iter=self.hyperparams['max_iter'], - decision_function_shape=self.hyperparams['decision_function_shape'], - verbose=_verbose, - random_state=self.random_seed, - ) - - self._inputs = None - self._outputs = None - self._training_inputs = None - self._training_outputs = None - self._target_names = None - self._training_indices = None - self._target_column_indices = None - self._target_columns_metadata: List[OrderedDict] = None - self._input_column_names = None - self._fitted = False - self._new_training_data = False - - def set_training_data(self, *, inputs: Inputs, outputs: Outputs) -> None: - self._inputs = inputs - self._outputs = outputs - self._fitted = False - self._new_training_data = True - - def fit(self, *, timeout: float = None, iterations: int = None) -> CallResult[None]: - if self._inputs is None or self._outputs is None: - raise ValueError("Missing training data.") - - if not self._new_training_data: - return CallResult(None) - self._new_training_data = False - - self._training_inputs, self._training_indices = self._get_columns_to_fit(self._inputs, self.hyperparams) - self._training_outputs, self._target_names, self._target_column_indices = self._get_targets(self._outputs, self.hyperparams) - self._input_column_names = self._training_inputs.columns - - if len(self._training_indices) > 0 and len(self._target_column_indices) > 0: - self._target_columns_metadata = self._get_target_columns_metadata(self._training_outputs.metadata, self.hyperparams) - sk_training_output = self._training_outputs.values - - shape = sk_training_output.shape - if len(shape) == 2 and shape[1] == 1: - sk_training_output = numpy.ravel(sk_training_output) - - self._clf.fit(self._training_inputs, sk_training_output) - self._fitted = True - else: - if self.hyperparams['error_on_no_input']: - raise RuntimeError("No input columns were selected") - self.logger.warn("No input columns were selected") - - return CallResult(None) - - - - def produce(self, *, inputs: Inputs, timeout: float = None, iterations: int = None) -> CallResult[Outputs]: - sk_inputs, columns_to_use = self._get_columns_to_fit(inputs, self.hyperparams) - output = [] - if len(sk_inputs.columns): - try: - sk_output = self._clf.predict(sk_inputs) - except sklearn.exceptions.NotFittedError as error: - raise PrimitiveNotFittedError("Primitive not fitted.") from error - # For primitives that allow predicting without fitting like GaussianProcessRegressor - if not self._fitted: - raise PrimitiveNotFittedError("Primitive not fitted.") - if sparse.issparse(sk_output): - sk_output = sk_output.toarray() - output = self._wrap_predictions(inputs, sk_output) - output.columns = self._target_names - output = [output] - else: - if self.hyperparams['error_on_no_input']: - raise RuntimeError("No input columns were selected") - self.logger.warn("No input columns were selected") - outputs = base_utils.combine_columns(return_result=self.hyperparams['return_result'], - add_index_columns=self.hyperparams['add_index_columns'], - inputs=inputs, column_indices=self._target_column_indices, - columns_list=output) - - return CallResult(outputs) - - - def get_params(self) -> Params: - if not self._fitted: - return Params( - support_=None, - support_vectors_=None, - n_support_=None, - dual_coef_=None, - intercept_=None, - _sparse=None, - shape_fit_=None, - _dual_coef_=None, - _intercept_=None, - probA_=None, - probB_=None, - _gamma=None, - classes_=None, - class_weight_=None, - fit_status_=None, - epsilon=None, - nu=None, - input_column_names=self._input_column_names, - training_indices_=self._training_indices, - target_names_=self._target_names, - target_column_indices_=self._target_column_indices, - target_columns_metadata_=self._target_columns_metadata - ) - - return Params( - support_=getattr(self._clf, 'support_', None), - support_vectors_=getattr(self._clf, 'support_vectors_', None), - n_support_=getattr(self._clf, 'n_support_', None), - dual_coef_=getattr(self._clf, 'dual_coef_', None), - intercept_=getattr(self._clf, 'intercept_', None), - _sparse=getattr(self._clf, '_sparse', None), - shape_fit_=getattr(self._clf, 'shape_fit_', None), - _dual_coef_=getattr(self._clf, '_dual_coef_', None), - _intercept_=getattr(self._clf, '_intercept_', None), - probA_=getattr(self._clf, 'probA_', None), - probB_=getattr(self._clf, 'probB_', None), - _gamma=getattr(self._clf, '_gamma', None), - classes_=getattr(self._clf, 'classes_', None), - class_weight_=getattr(self._clf, 'class_weight_', None), - fit_status_=getattr(self._clf, 'fit_status_', None), - epsilon=getattr(self._clf, 'epsilon', None), - nu=getattr(self._clf, 'nu', None), - input_column_names=self._input_column_names, - training_indices_=self._training_indices, - target_names_=self._target_names, - target_column_indices_=self._target_column_indices, - target_columns_metadata_=self._target_columns_metadata - ) - - def set_params(self, *, params: Params) -> None: - self._clf.support_ = params['support_'] - self._clf.support_vectors_ = params['support_vectors_'] - self._clf.n_support_ = params['n_support_'] - self._clf.dual_coef_ = params['dual_coef_'] - self._clf.intercept_ = params['intercept_'] - self._clf._sparse = params['_sparse'] - self._clf.shape_fit_ = params['shape_fit_'] - self._clf._dual_coef_ = params['_dual_coef_'] - self._clf._intercept_ = params['_intercept_'] - self._clf.probA_ = params['probA_'] - self._clf.probB_ = params['probB_'] - self._clf._gamma = params['_gamma'] - self._clf.classes_ = params['classes_'] - self._clf.class_weight_ = params['class_weight_'] - self._clf.fit_status_ = params['fit_status_'] - self._clf.epsilon = params['epsilon'] - self._clf.nu = params['nu'] - self._input_column_names = params['input_column_names'] - self._training_indices = params['training_indices_'] - self._target_names = params['target_names_'] - self._target_column_indices = params['target_column_indices_'] - self._target_columns_metadata = params['target_columns_metadata_'] - - if params['support_'] is not None: - self._fitted = True - if params['support_vectors_'] is not None: - self._fitted = True - if params['n_support_'] is not None: - self._fitted = True - if params['dual_coef_'] is not None: - self._fitted = True - if params['intercept_'] is not None: - self._fitted = True - if params['_sparse'] is not None: - self._fitted = True - if params['shape_fit_'] is not None: - self._fitted = True - if params['_dual_coef_'] is not None: - self._fitted = True - if params['_intercept_'] is not None: - self._fitted = True - if params['probA_'] is not None: - self._fitted = True - if params['probB_'] is not None: - self._fitted = True - if params['_gamma'] is not None: - self._fitted = True - if params['classes_'] is not None: - self._fitted = True - if params['class_weight_'] is not None: - self._fitted = True - if params['fit_status_'] is not None: - self._fitted = True - if params['epsilon'] is not None: - self._fitted = True - if params['nu'] is not None: - self._fitted = True - - - - - - - - @classmethod - def _get_columns_to_fit(cls, inputs: Inputs, hyperparams: Hyperparams): - if not hyperparams['use_semantic_types']: - return inputs, list(range(len(inputs.columns))) - - inputs_metadata = inputs.metadata - - def can_produce_column(column_index: int) -> bool: - return cls._can_produce_column(inputs_metadata, column_index, hyperparams) - - columns_to_produce, columns_not_to_produce = base_utils.get_columns_to_use(inputs_metadata, - use_columns=hyperparams['use_inputs_columns'], - exclude_columns=hyperparams['exclude_inputs_columns'], - can_use_column=can_produce_column) - return inputs.iloc[:, columns_to_produce], columns_to_produce - # return columns_to_produce - - @classmethod - def _can_produce_column(cls, inputs_metadata: metadata_base.DataMetadata, column_index: int, hyperparams: Hyperparams) -> bool: - column_metadata = inputs_metadata.query((metadata_base.ALL_ELEMENTS, column_index)) - - accepted_structural_types = (int, float, numpy.integer, numpy.float64) - accepted_semantic_types = set() - accepted_semantic_types.add("https://metadata.datadrivendiscovery.org/types/Attribute") - if not issubclass(column_metadata['structural_type'], accepted_structural_types): - return False - - semantic_types = set(column_metadata.get('semantic_types', [])) - - if len(semantic_types) == 0: - cls.logger.warning("No semantic types found in column metadata") - return False - # Making sure all accepted_semantic_types are available in semantic_types - if len(accepted_semantic_types - semantic_types) == 0: - return True - - return False - - @classmethod - def _get_targets(cls, data: d3m_dataframe, hyperparams: Hyperparams): - if not hyperparams['use_semantic_types']: - return data, list(data.columns), list(range(len(data.columns))) - - metadata = data.metadata - - def can_produce_column(column_index: int) -> bool: - accepted_semantic_types = set() - accepted_semantic_types.add("https://metadata.datadrivendiscovery.org/types/TrueTarget") - column_metadata = metadata.query((metadata_base.ALL_ELEMENTS, column_index)) - semantic_types = set(column_metadata.get('semantic_types', [])) - if len(semantic_types) == 0: - cls.logger.warning("No semantic types found in column metadata") - return False - # Making sure all accepted_semantic_types are available in semantic_types - if len(accepted_semantic_types - semantic_types) == 0: - return True - return False - - target_column_indices, target_columns_not_to_produce = base_utils.get_columns_to_use(metadata, - use_columns=hyperparams[ - 'use_outputs_columns'], - exclude_columns= - hyperparams[ - 'exclude_outputs_columns'], - can_use_column=can_produce_column) - targets = [] - if target_column_indices: - targets = data.select_columns(target_column_indices) - target_column_names = [] - for idx in target_column_indices: - target_column_names.append(data.columns[idx]) - return targets, target_column_names, target_column_indices - - @classmethod - def _get_target_columns_metadata(cls, outputs_metadata: metadata_base.DataMetadata, hyperparams) -> List[OrderedDict]: - outputs_length = outputs_metadata.query((metadata_base.ALL_ELEMENTS,))['dimension']['length'] - - target_columns_metadata: List[OrderedDict] = [] - for column_index in range(outputs_length): - column_metadata = OrderedDict(outputs_metadata.query_column(column_index)) - - # Update semantic types and prepare it for predicted targets. - semantic_types = set(column_metadata.get('semantic_types', [])) - semantic_types_to_remove = set(["https://metadata.datadrivendiscovery.org/types/TrueTarget","https://metadata.datadrivendiscovery.org/types/SuggestedTarget",]) - add_semantic_types = set(["https://metadata.datadrivendiscovery.org/types/PredictedTarget",]) - add_semantic_types.add(hyperparams["return_semantic_type"]) - semantic_types = semantic_types - semantic_types_to_remove - semantic_types = semantic_types.union(add_semantic_types) - column_metadata['semantic_types'] = list(semantic_types) - - target_columns_metadata.append(column_metadata) - - return target_columns_metadata - - @classmethod - def _update_predictions_metadata(cls, inputs_metadata: metadata_base.DataMetadata, outputs: Optional[Outputs], - target_columns_metadata: List[OrderedDict]) -> metadata_base.DataMetadata: - outputs_metadata = metadata_base.DataMetadata().generate(value=outputs) - - for column_index, column_metadata in enumerate(target_columns_metadata): - column_metadata.pop("structural_type", None) - outputs_metadata = outputs_metadata.update_column(column_index, column_metadata) - - return outputs_metadata - - def _wrap_predictions(self, inputs: Inputs, predictions: ndarray) -> Outputs: - outputs = d3m_dataframe(predictions, generate_metadata=False) - outputs.metadata = self._update_predictions_metadata(inputs.metadata, outputs, self._target_columns_metadata) - return outputs - - - @classmethod - def _add_target_columns_metadata(cls, outputs_metadata: metadata_base.DataMetadata): - outputs_length = outputs_metadata.query((metadata_base.ALL_ELEMENTS,))['dimension']['length'] - - target_columns_metadata: List[OrderedDict] = [] - for column_index in range(outputs_length): - column_metadata = OrderedDict() - semantic_types = [] - semantic_types.append('https://metadata.datadrivendiscovery.org/types/PredictedTarget') - column_name = outputs_metadata.query((metadata_base.ALL_ELEMENTS, column_index)).get("name") - if column_name is None: - column_name = "output_{}".format(column_index) - column_metadata["semantic_types"] = semantic_types - column_metadata["name"] = str(column_name) - target_columns_metadata.append(column_metadata) - - return target_columns_metadata - - -SKSVC.__doc__ = SVC.__doc__ \ No newline at end of file diff --git a/common-primitives/sklearn-wrap/sklearn_wrap/SKSVR.py b/common-primitives/sklearn-wrap/sklearn_wrap/SKSVR.py deleted file mode 100644 index 8f17ca5..0000000 --- a/common-primitives/sklearn-wrap/sklearn_wrap/SKSVR.py +++ /dev/null @@ -1,616 +0,0 @@ -from typing import Any, Callable, List, Dict, Union, Optional, Sequence, Tuple -from numpy import ndarray -from collections import OrderedDict -from scipy import sparse -import os -import sklearn -import numpy -import typing - -# Custom import commands if any -from sklearn.svm.classes import SVR - - -from d3m.container.numpy import ndarray as d3m_ndarray -from d3m.container import DataFrame as d3m_dataframe -from d3m.metadata import hyperparams, params, base as metadata_base -from d3m import utils -from d3m.base import utils as base_utils -from d3m.exceptions import PrimitiveNotFittedError -from d3m.primitive_interfaces.base import CallResult, DockerContainer - -from d3m.primitive_interfaces.supervised_learning import SupervisedLearnerPrimitiveBase -from d3m.primitive_interfaces.base import ProbabilisticCompositionalityMixin, ContinueFitMixin -from d3m import exceptions -import pandas - - - -Inputs = d3m_dataframe -Outputs = d3m_dataframe - - -class Params(params.Params): - support_: Optional[ndarray] - support_vectors_: Optional[ndarray] - dual_coef_: Optional[ndarray] - intercept_: Optional[ndarray] - _sparse: Optional[bool] - shape_fit_: Optional[tuple] - n_support_: Optional[ndarray] - probA_: Optional[ndarray] - probB_: Optional[ndarray] - _gamma: Optional[float] - _dual_coef_: Optional[ndarray] - _intercept_: Optional[ndarray] - class_weight_: Optional[ndarray] - fit_status_: Optional[int] - class_weight: Optional[Union[str, Dict, List[Dict]]] - nu: Optional[float] - probability: Optional[bool] - input_column_names: Optional[Any] - target_names_: Optional[Sequence[Any]] - training_indices_: Optional[Sequence[int]] - target_column_indices_: Optional[Sequence[int]] - target_columns_metadata_: Optional[List[OrderedDict]] - - - -class Hyperparams(hyperparams.Hyperparams): - C = hyperparams.Bounded[float]( - default=1, - lower=0, - upper=None, - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'], - description='Penalty parameter C of the error term.' - ) - epsilon = hyperparams.Bounded[float]( - lower=0, - upper=None, - default=0.1, - description='Epsilon in the epsilon-SVR model. It specifies the epsilon-tube within which no penalty is associated in the training loss function with points predicted within a distance epsilon from the actual value.', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] - ) - kernel = hyperparams.Choice( - choices={ - 'linear': hyperparams.Hyperparams.define( - configuration=OrderedDict({}) - ), - 'poly': hyperparams.Hyperparams.define( - configuration=OrderedDict({ - 'degree': hyperparams.Bounded[int]( - default=3, - lower=0, - upper=None, - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] - ), - 'gamma': hyperparams.Union( - configuration=OrderedDict({ - 'float': hyperparams.Bounded[float]( - default=0.1, - lower=0, - upper=None, - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'], - ), - 'auto': hyperparams.Constant( - default='auto', - description='1/n_features will be used.', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'], - ) - }), - default='auto', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] - ), - 'coef0': hyperparams.Constant( - default=0, - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] - ) - }) - ), - 'rbf': hyperparams.Hyperparams.define( - configuration=OrderedDict({ - 'gamma': hyperparams.Union( - configuration=OrderedDict({ - 'float': hyperparams.Bounded[float]( - default=0.1, - lower=0, - upper=None, - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'], - ), - 'auto': hyperparams.Constant( - default='auto', - description='1/n_features will be used.', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'], - ) - }), - default='auto', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] - ) - }) - ), - 'sigmoid': hyperparams.Hyperparams.define( - configuration=OrderedDict({ - 'gamma': hyperparams.Union( - configuration=OrderedDict({ - 'float': hyperparams.Bounded[float]( - default=0.1, - lower=0, - upper=None, - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'], - ), - 'auto': hyperparams.Constant( - default='auto', - description='1/n_features will be used.', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'], - ) - }), - default='auto', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] - ), - 'coef0': hyperparams.Constant( - default=0, - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] - ) - }) - ), - 'precomputed': hyperparams.Hyperparams.define( - configuration=OrderedDict({}) - ) - }, - default='rbf', - description='Specifies the kernel type to be used in the algorithm. It must be one of \'linear\', \'poly\', \'rbf\', \'sigmoid\', \'precomputed\' or a callable. If none is given, \'rbf\' will be used. If a callable is given it is used to precompute the kernel matrix.', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] - ) - shrinking = hyperparams.UniformBool( - default=True, - description='Whether to use the shrinking heuristic.', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] - ) - tol = hyperparams.Bounded[float]( - default=0.001, - lower=0, - upper=None, - description='Tolerance for stopping criterion.', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] - ) - cache_size = hyperparams.Bounded[float]( - default=200, - lower=0, - upper=None, - description='Specify the size of the kernel cache (in MB).', - semantic_types=['https://metadata.datadrivendiscovery.org/types/ResourcesUseParameter'] - ) - max_iter = hyperparams.Bounded[int]( - default=-1, - lower=-1, - upper=None, - description='Hard limit on iterations within solver, or -1 for no limit.', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] - ) - - use_inputs_columns = hyperparams.Set( - elements=hyperparams.Hyperparameter[int](-1), - default=(), - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="A set of column indices to force primitive to use as training input. If any specified column cannot be parsed, it is skipped.", - ) - use_outputs_columns = hyperparams.Set( - elements=hyperparams.Hyperparameter[int](-1), - default=(), - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="A set of column indices to force primitive to use as training target. If any specified column cannot be parsed, it is skipped.", - ) - exclude_inputs_columns = hyperparams.Set( - elements=hyperparams.Hyperparameter[int](-1), - default=(), - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="A set of column indices to not use as training inputs. Applicable only if \"use_columns\" is not provided.", - ) - exclude_outputs_columns = hyperparams.Set( - elements=hyperparams.Hyperparameter[int](-1), - default=(), - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="A set of column indices to not use as training target. Applicable only if \"use_columns\" is not provided.", - ) - return_result = hyperparams.Enumeration( - values=['append', 'replace', 'new'], - default='new', - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="Should parsed columns be appended, should they replace original columns, or should only parsed columns be returned? This hyperparam is ignored if use_semantic_types is set to false.", - ) - use_semantic_types = hyperparams.UniformBool( - default=False, - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="Controls whether semantic_types metadata will be used for filtering columns in input dataframe. Setting this to false makes the code ignore return_result and will produce only the output dataframe" - ) - add_index_columns = hyperparams.UniformBool( - default=False, - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="Also include primary index columns if input data has them. Applicable only if \"return_result\" is set to \"new\".", - ) - error_on_no_input = hyperparams.UniformBool( - default=True, - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="Throw an exception if no input column is selected/provided. Defaults to true to behave like sklearn. To prevent pipelines from breaking set this to False.", - ) - - return_semantic_type = hyperparams.Enumeration[str]( - values=['https://metadata.datadrivendiscovery.org/types/Attribute', 'https://metadata.datadrivendiscovery.org/types/ConstructedAttribute', 'https://metadata.datadrivendiscovery.org/types/PredictedTarget'], - default='https://metadata.datadrivendiscovery.org/types/PredictedTarget', - description='Decides what semantic type to attach to generated output', - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'] - ) - -class SKSVR(SupervisedLearnerPrimitiveBase[Inputs, Outputs, Params, Hyperparams]): - """ - Primitive wrapping for sklearn SVR - `sklearn documentation `_ - - """ - - __author__ = "JPL MARVIN" - metadata = metadata_base.PrimitiveMetadata({ - "algorithm_types": [metadata_base.PrimitiveAlgorithmType.SUPPORT_VECTOR_MACHINE, ], - "name": "sklearn.svm.classes.SVR", - "primitive_family": metadata_base.PrimitiveFamily.REGRESSION, - "python_path": "d3m.primitives.regression.svr.SKlearn", - "source": {'name': 'JPL', 'contact': 'mailto:shah@jpl.nasa.gov', 'uris': ['https://gitlab.com/datadrivendiscovery/sklearn-wrap/issues', 'https://scikit-learn.org/stable/modules/generated/sklearn.svm.SVR.html']}, - "version": "2019.11.13", - "id": "ebbc3404-902d-33cc-a10c-e42b06dfe60c", - "hyperparams_to_tune": ['C', 'kernel'], - 'installation': [ - {'type': metadata_base.PrimitiveInstallationType.PIP, - 'package_uri': 'git+https://gitlab.com/datadrivendiscovery/sklearn-wrap.git@{git_commit}#egg=sklearn_wrap'.format( - git_commit=utils.current_git_commit(os.path.dirname(__file__)), - ), - }] - }) - - def __init__(self, *, - hyperparams: Hyperparams, - random_seed: int = 0, - docker_containers: Dict[str, DockerContainer] = None, - _verbose: int = 0) -> None: - - super().__init__(hyperparams=hyperparams, random_seed=random_seed, docker_containers=docker_containers) - - # False - self._clf = SVR( - C=self.hyperparams['C'], - epsilon=self.hyperparams['epsilon'], - kernel=self.hyperparams['kernel']['choice'], - degree=self.hyperparams['kernel'].get('degree', 3), - gamma=self.hyperparams['kernel'].get('gamma', 'auto'), - coef0=self.hyperparams['kernel'].get('coef0', 0), - shrinking=self.hyperparams['shrinking'], - tol=self.hyperparams['tol'], - cache_size=self.hyperparams['cache_size'], - max_iter=self.hyperparams['max_iter'], - verbose=_verbose - ) - - self._inputs = None - self._outputs = None - self._training_inputs = None - self._training_outputs = None - self._target_names = None - self._training_indices = None - self._target_column_indices = None - self._target_columns_metadata: List[OrderedDict] = None - self._input_column_names = None - self._fitted = False - self._new_training_data = False - - def set_training_data(self, *, inputs: Inputs, outputs: Outputs) -> None: - self._inputs = inputs - self._outputs = outputs - self._fitted = False - self._new_training_data = True - - def fit(self, *, timeout: float = None, iterations: int = None) -> CallResult[None]: - if self._inputs is None or self._outputs is None: - raise ValueError("Missing training data.") - - if not self._new_training_data: - return CallResult(None) - self._new_training_data = False - - self._training_inputs, self._training_indices = self._get_columns_to_fit(self._inputs, self.hyperparams) - self._training_outputs, self._target_names, self._target_column_indices = self._get_targets(self._outputs, self.hyperparams) - self._input_column_names = self._training_inputs.columns - - if len(self._training_indices) > 0 and len(self._target_column_indices) > 0: - self._target_columns_metadata = self._get_target_columns_metadata(self._training_outputs.metadata, self.hyperparams) - sk_training_output = self._training_outputs.values - - shape = sk_training_output.shape - if len(shape) == 2 and shape[1] == 1: - sk_training_output = numpy.ravel(sk_training_output) - - self._clf.fit(self._training_inputs, sk_training_output) - self._fitted = True - else: - if self.hyperparams['error_on_no_input']: - raise RuntimeError("No input columns were selected") - self.logger.warn("No input columns were selected") - - return CallResult(None) - - - - def produce(self, *, inputs: Inputs, timeout: float = None, iterations: int = None) -> CallResult[Outputs]: - sk_inputs, columns_to_use = self._get_columns_to_fit(inputs, self.hyperparams) - output = [] - if len(sk_inputs.columns): - try: - sk_output = self._clf.predict(sk_inputs) - except sklearn.exceptions.NotFittedError as error: - raise PrimitiveNotFittedError("Primitive not fitted.") from error - # For primitives that allow predicting without fitting like GaussianProcessRegressor - if not self._fitted: - raise PrimitiveNotFittedError("Primitive not fitted.") - if sparse.issparse(sk_output): - sk_output = sk_output.toarray() - output = self._wrap_predictions(inputs, sk_output) - output.columns = self._target_names - output = [output] - else: - if self.hyperparams['error_on_no_input']: - raise RuntimeError("No input columns were selected") - self.logger.warn("No input columns were selected") - outputs = base_utils.combine_columns(return_result=self.hyperparams['return_result'], - add_index_columns=self.hyperparams['add_index_columns'], - inputs=inputs, column_indices=self._target_column_indices, - columns_list=output) - - return CallResult(outputs) - - - def get_params(self) -> Params: - if not self._fitted: - return Params( - support_=None, - support_vectors_=None, - dual_coef_=None, - intercept_=None, - _sparse=None, - shape_fit_=None, - n_support_=None, - probA_=None, - probB_=None, - _gamma=None, - _dual_coef_=None, - _intercept_=None, - class_weight_=None, - fit_status_=None, - class_weight=None, - nu=None, - probability=None, - input_column_names=self._input_column_names, - training_indices_=self._training_indices, - target_names_=self._target_names, - target_column_indices_=self._target_column_indices, - target_columns_metadata_=self._target_columns_metadata - ) - - return Params( - support_=getattr(self._clf, 'support_', None), - support_vectors_=getattr(self._clf, 'support_vectors_', None), - dual_coef_=getattr(self._clf, 'dual_coef_', None), - intercept_=getattr(self._clf, 'intercept_', None), - _sparse=getattr(self._clf, '_sparse', None), - shape_fit_=getattr(self._clf, 'shape_fit_', None), - n_support_=getattr(self._clf, 'n_support_', None), - probA_=getattr(self._clf, 'probA_', None), - probB_=getattr(self._clf, 'probB_', None), - _gamma=getattr(self._clf, '_gamma', None), - _dual_coef_=getattr(self._clf, '_dual_coef_', None), - _intercept_=getattr(self._clf, '_intercept_', None), - class_weight_=getattr(self._clf, 'class_weight_', None), - fit_status_=getattr(self._clf, 'fit_status_', None), - class_weight=getattr(self._clf, 'class_weight', None), - nu=getattr(self._clf, 'nu', None), - probability=getattr(self._clf, 'probability', None), - input_column_names=self._input_column_names, - training_indices_=self._training_indices, - target_names_=self._target_names, - target_column_indices_=self._target_column_indices, - target_columns_metadata_=self._target_columns_metadata - ) - - def set_params(self, *, params: Params) -> None: - self._clf.support_ = params['support_'] - self._clf.support_vectors_ = params['support_vectors_'] - self._clf.dual_coef_ = params['dual_coef_'] - self._clf.intercept_ = params['intercept_'] - self._clf._sparse = params['_sparse'] - self._clf.shape_fit_ = params['shape_fit_'] - self._clf.n_support_ = params['n_support_'] - self._clf.probA_ = params['probA_'] - self._clf.probB_ = params['probB_'] - self._clf._gamma = params['_gamma'] - self._clf._dual_coef_ = params['_dual_coef_'] - self._clf._intercept_ = params['_intercept_'] - self._clf.class_weight_ = params['class_weight_'] - self._clf.fit_status_ = params['fit_status_'] - self._clf.class_weight = params['class_weight'] - self._clf.nu = params['nu'] - self._clf.probability = params['probability'] - self._input_column_names = params['input_column_names'] - self._training_indices = params['training_indices_'] - self._target_names = params['target_names_'] - self._target_column_indices = params['target_column_indices_'] - self._target_columns_metadata = params['target_columns_metadata_'] - - if params['support_'] is not None: - self._fitted = True - if params['support_vectors_'] is not None: - self._fitted = True - if params['dual_coef_'] is not None: - self._fitted = True - if params['intercept_'] is not None: - self._fitted = True - if params['_sparse'] is not None: - self._fitted = True - if params['shape_fit_'] is not None: - self._fitted = True - if params['n_support_'] is not None: - self._fitted = True - if params['probA_'] is not None: - self._fitted = True - if params['probB_'] is not None: - self._fitted = True - if params['_gamma'] is not None: - self._fitted = True - if params['_dual_coef_'] is not None: - self._fitted = True - if params['_intercept_'] is not None: - self._fitted = True - if params['class_weight_'] is not None: - self._fitted = True - if params['fit_status_'] is not None: - self._fitted = True - if params['class_weight'] is not None: - self._fitted = True - if params['nu'] is not None: - self._fitted = True - if params['probability'] is not None: - self._fitted = True - - - - - - - - @classmethod - def _get_columns_to_fit(cls, inputs: Inputs, hyperparams: Hyperparams): - if not hyperparams['use_semantic_types']: - return inputs, list(range(len(inputs.columns))) - - inputs_metadata = inputs.metadata - - def can_produce_column(column_index: int) -> bool: - return cls._can_produce_column(inputs_metadata, column_index, hyperparams) - - columns_to_produce, columns_not_to_produce = base_utils.get_columns_to_use(inputs_metadata, - use_columns=hyperparams['use_inputs_columns'], - exclude_columns=hyperparams['exclude_inputs_columns'], - can_use_column=can_produce_column) - return inputs.iloc[:, columns_to_produce], columns_to_produce - # return columns_to_produce - - @classmethod - def _can_produce_column(cls, inputs_metadata: metadata_base.DataMetadata, column_index: int, hyperparams: Hyperparams) -> bool: - column_metadata = inputs_metadata.query((metadata_base.ALL_ELEMENTS, column_index)) - - accepted_structural_types = (int, float, numpy.integer, numpy.float64) - accepted_semantic_types = set() - accepted_semantic_types.add("https://metadata.datadrivendiscovery.org/types/Attribute") - if not issubclass(column_metadata['structural_type'], accepted_structural_types): - return False - - semantic_types = set(column_metadata.get('semantic_types', [])) - - if len(semantic_types) == 0: - cls.logger.warning("No semantic types found in column metadata") - return False - # Making sure all accepted_semantic_types are available in semantic_types - if len(accepted_semantic_types - semantic_types) == 0: - return True - - return False - - @classmethod - def _get_targets(cls, data: d3m_dataframe, hyperparams: Hyperparams): - if not hyperparams['use_semantic_types']: - return data, list(data.columns), list(range(len(data.columns))) - - metadata = data.metadata - - def can_produce_column(column_index: int) -> bool: - accepted_semantic_types = set() - accepted_semantic_types.add("https://metadata.datadrivendiscovery.org/types/TrueTarget") - column_metadata = metadata.query((metadata_base.ALL_ELEMENTS, column_index)) - semantic_types = set(column_metadata.get('semantic_types', [])) - if len(semantic_types) == 0: - cls.logger.warning("No semantic types found in column metadata") - return False - # Making sure all accepted_semantic_types are available in semantic_types - if len(accepted_semantic_types - semantic_types) == 0: - return True - return False - - target_column_indices, target_columns_not_to_produce = base_utils.get_columns_to_use(metadata, - use_columns=hyperparams[ - 'use_outputs_columns'], - exclude_columns= - hyperparams[ - 'exclude_outputs_columns'], - can_use_column=can_produce_column) - targets = [] - if target_column_indices: - targets = data.select_columns(target_column_indices) - target_column_names = [] - for idx in target_column_indices: - target_column_names.append(data.columns[idx]) - return targets, target_column_names, target_column_indices - - @classmethod - def _get_target_columns_metadata(cls, outputs_metadata: metadata_base.DataMetadata, hyperparams) -> List[OrderedDict]: - outputs_length = outputs_metadata.query((metadata_base.ALL_ELEMENTS,))['dimension']['length'] - - target_columns_metadata: List[OrderedDict] = [] - for column_index in range(outputs_length): - column_metadata = OrderedDict(outputs_metadata.query_column(column_index)) - - # Update semantic types and prepare it for predicted targets. - semantic_types = set(column_metadata.get('semantic_types', [])) - semantic_types_to_remove = set(["https://metadata.datadrivendiscovery.org/types/TrueTarget","https://metadata.datadrivendiscovery.org/types/SuggestedTarget",]) - add_semantic_types = set(["https://metadata.datadrivendiscovery.org/types/PredictedTarget",]) - add_semantic_types.add(hyperparams["return_semantic_type"]) - semantic_types = semantic_types - semantic_types_to_remove - semantic_types = semantic_types.union(add_semantic_types) - column_metadata['semantic_types'] = list(semantic_types) - - target_columns_metadata.append(column_metadata) - - return target_columns_metadata - - @classmethod - def _update_predictions_metadata(cls, inputs_metadata: metadata_base.DataMetadata, outputs: Optional[Outputs], - target_columns_metadata: List[OrderedDict]) -> metadata_base.DataMetadata: - outputs_metadata = metadata_base.DataMetadata().generate(value=outputs) - - for column_index, column_metadata in enumerate(target_columns_metadata): - column_metadata.pop("structural_type", None) - outputs_metadata = outputs_metadata.update_column(column_index, column_metadata) - - return outputs_metadata - - def _wrap_predictions(self, inputs: Inputs, predictions: ndarray) -> Outputs: - outputs = d3m_dataframe(predictions, generate_metadata=False) - outputs.metadata = self._update_predictions_metadata(inputs.metadata, outputs, self._target_columns_metadata) - return outputs - - - @classmethod - def _add_target_columns_metadata(cls, outputs_metadata: metadata_base.DataMetadata): - outputs_length = outputs_metadata.query((metadata_base.ALL_ELEMENTS,))['dimension']['length'] - - target_columns_metadata: List[OrderedDict] = [] - for column_index in range(outputs_length): - column_metadata = OrderedDict() - semantic_types = [] - semantic_types.append('https://metadata.datadrivendiscovery.org/types/PredictedTarget') - column_name = outputs_metadata.query((metadata_base.ALL_ELEMENTS, column_index)).get("name") - if column_name is None: - column_name = "output_{}".format(column_index) - column_metadata["semantic_types"] = semantic_types - column_metadata["name"] = str(column_name) - target_columns_metadata.append(column_metadata) - - return target_columns_metadata - - -SKSVR.__doc__ = SVR.__doc__ \ No newline at end of file diff --git a/common-primitives/sklearn-wrap/sklearn_wrap/SKSelectFwe.py b/common-primitives/sklearn-wrap/sklearn_wrap/SKSelectFwe.py deleted file mode 100644 index b7e534c..0000000 --- a/common-primitives/sklearn-wrap/sklearn_wrap/SKSelectFwe.py +++ /dev/null @@ -1,428 +0,0 @@ -from typing import Any, Callable, List, Dict, Union, Optional, Sequence, Tuple -from numpy import ndarray -from collections import OrderedDict -from scipy import sparse -import os -import sklearn -import numpy -import typing - -# Custom import commands if any -from sklearn.feature_selection.univariate_selection import SelectFwe -from sklearn.feature_selection import f_classif, f_regression, chi2 - - -from d3m.container.numpy import ndarray as d3m_ndarray -from d3m.container import DataFrame as d3m_dataframe -from d3m.metadata import hyperparams, params, base as metadata_base -from d3m import utils -from d3m.base import utils as base_utils -from d3m.exceptions import PrimitiveNotFittedError -from d3m.primitive_interfaces.base import CallResult, DockerContainer - -from d3m.primitive_interfaces.supervised_learning import SupervisedLearnerPrimitiveBase -from d3m.primitive_interfaces.base import ProbabilisticCompositionalityMixin, ContinueFitMixin -from d3m import exceptions -import pandas - - - -Inputs = d3m_dataframe -Outputs = d3m_dataframe - - -class Params(params.Params): - scores_: Optional[ndarray] - pvalues_: Optional[ndarray] - input_column_names: Optional[Any] - target_names_: Optional[Sequence[Any]] - training_indices_: Optional[Sequence[int]] - target_column_indices_: Optional[Sequence[int]] - target_columns_metadata_: Optional[List[OrderedDict]] - - - -class Hyperparams(hyperparams.Hyperparams): - score_func = hyperparams.Enumeration[str]( - default='f_classif', - values=['f_classif', 'f_regression', 'chi2'], - description='Function taking two arrays X and y, and returning a pair of arrays (scores, pvalues). Default is f_classif (see below "See also"). The default function only works with classification tasks.', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] - ) - alpha = hyperparams.Bounded[float]( - default=0.05, - lower=0, - upper=None, - description='The highest uncorrected p-value for features to keep.', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] - ) - - use_inputs_columns = hyperparams.Set( - elements=hyperparams.Hyperparameter[int](-1), - default=(), - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="A set of column indices to force primitive to use as training input. If any specified column cannot be parsed, it is skipped.", - ) - use_outputs_columns = hyperparams.Set( - elements=hyperparams.Hyperparameter[int](-1), - default=(), - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="A set of column indices to force primitive to use as training target. If any specified column cannot be parsed, it is skipped.", - ) - exclude_inputs_columns = hyperparams.Set( - elements=hyperparams.Hyperparameter[int](-1), - default=(), - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="A set of column indices to not use as training inputs. Applicable only if \"use_columns\" is not provided.", - ) - exclude_outputs_columns = hyperparams.Set( - elements=hyperparams.Hyperparameter[int](-1), - default=(), - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="A set of column indices to not use as training target. Applicable only if \"use_columns\" is not provided.", - ) - return_result = hyperparams.Enumeration( - values=['update_semantic_types', 'replace', 'new'], - default='new', - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="Should parsed columns be appended, should they replace original columns, or should only parsed columns be returned? This hyperparam is ignored if use_semantic_types is set to false.", -) - use_semantic_types = hyperparams.UniformBool( - default=False, - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="Controls whether semantic_types metadata will be used for filtering columns in input dataframe. Setting this to false makes the code ignore return_result and will produce only the output dataframe" - ) - add_index_columns = hyperparams.UniformBool( - default=False, - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="Also include primary index columns if input data has them. Applicable only if \"return_result\" is set to \"new\".", - ) - error_on_no_input = hyperparams.UniformBool( - default=True, - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="Throw an exception if no input column is selected/provided. Defaults to true to behave like sklearn. To prevent pipelines from breaking set this to False.", - ) - - return_semantic_type = hyperparams.Enumeration[str]( - values=['https://metadata.datadrivendiscovery.org/types/Attribute', 'https://metadata.datadrivendiscovery.org/types/ConstructedAttribute', 'https://metadata.datadrivendiscovery.org/types/PredictedTarget'], - default='https://metadata.datadrivendiscovery.org/types/PredictedTarget', - description='Decides what semantic type to attach to generated output', - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'] - ) - -class SKSelectFwe(SupervisedLearnerPrimitiveBase[Inputs, Outputs, Params, Hyperparams]): - """ - Primitive wrapping for sklearn SelectFwe - `sklearn documentation `_ - - """ - - __author__ = "JPL MARVIN" - metadata = metadata_base.PrimitiveMetadata({ - "algorithm_types": [metadata_base.PrimitiveAlgorithmType.FEATURE_SCALING, ], - "name": "sklearn.feature_selection.univariate_selection.SelectFwe", - "primitive_family": metadata_base.PrimitiveFamily.FEATURE_SELECTION, - "python_path": "d3m.primitives.feature_selection.select_fwe.SKlearn", - "source": {'name': 'JPL', 'contact': 'mailto:shah@jpl.nasa.gov', 'uris': ['https://gitlab.com/datadrivendiscovery/sklearn-wrap/issues', 'https://scikit-learn.org/stable/modules/generated/sklearn.feature_selection.SelectFwe.html']}, - "version": "2019.11.13", - "id": "09a4cffa-a59f-30ac-b78f-101c35b3f7c6", - "hyperparams_to_tune": ['alpha'], - 'installation': [ - {'type': metadata_base.PrimitiveInstallationType.PIP, - 'package_uri': 'git+https://gitlab.com/datadrivendiscovery/sklearn-wrap.git@{git_commit}#egg=sklearn_wrap'.format( - git_commit=utils.current_git_commit(os.path.dirname(__file__)), - ), - }] - }) - - def __init__(self, *, - hyperparams: Hyperparams, - random_seed: int = 0, - docker_containers: Dict[str, DockerContainer] = None) -> None: - - super().__init__(hyperparams=hyperparams, random_seed=random_seed, docker_containers=docker_containers) - - # False - self._clf = SelectFwe( - score_func=eval(self.hyperparams['score_func']), - alpha=self.hyperparams['alpha'], - ) - - self._inputs = None - self._outputs = None - self._training_inputs = None - self._training_outputs = None - self._target_names = None - self._training_indices = None - self._target_column_indices = None - self._target_columns_metadata: List[OrderedDict] = None - self._input_column_names = None - self._fitted = False - self._new_training_data = False - - def set_training_data(self, *, inputs: Inputs, outputs: Outputs) -> None: - self._inputs = inputs - self._outputs = outputs - self._fitted = False - self._new_training_data = True - - def fit(self, *, timeout: float = None, iterations: int = None) -> CallResult[None]: - if self._fitted: - return CallResult(None) - - self._training_inputs, self._training_indices = self._get_columns_to_fit(self._inputs, self.hyperparams) - self._training_outputs, self._target_names, self._target_column_indices = self._get_targets(self._outputs, self.hyperparams) - self._input_column_names = self._training_inputs.columns - - if self._training_inputs is None or self._training_outputs is None: - raise ValueError("Missing training data.") - - if len(self._training_indices) > 0 and len(self._target_column_indices) > 0: - sk_training_output = self._training_outputs.values - - shape = sk_training_output.shape - if len(shape) == 2 and shape[1] == 1: - sk_training_output = numpy.ravel(sk_training_output) - - self._clf.fit(self._training_inputs, sk_training_output) - self._fitted = True - else: - if self.hyperparams['error_on_no_input']: - raise RuntimeError("No input columns were selected") - self.logger.warn("No input columns were selected") - - return CallResult(None) - - def produce(self, *, inputs: Inputs, timeout: float = None, iterations: int = None) -> CallResult[Outputs]: - sk_inputs, columns_to_use = self._get_columns_to_fit(inputs, self.hyperparams) - output = [] - if len(sk_inputs.columns): - try: - sk_output = self._clf.transform(sk_inputs) - except sklearn.exceptions.NotFittedError as error: - raise PrimitiveNotFittedError("Primitive not fitted.") from error - if sparse.issparse(sk_output): - sk_output = sk_output.toarray() - target_columns_metadata = self._copy_columns_metadata(inputs.iloc[:, self._training_indices].metadata, - self.produce_support().value) - output = self._wrap_predictions(inputs, sk_output, target_columns_metadata) - output.columns = [inputs.columns[idx] for idx in range(len(inputs.columns)) if idx in self.produce_support().value] - output = [output] - else: - if self.hyperparams['error_on_no_input']: - raise RuntimeError("No input columns were selected") - self.logger.warn("No input columns were selected") - - if self.hyperparams['return_result'] == 'update_semantic_types': - temp_inputs = inputs.copy() - columns_not_selected = sorted(set(range(len(temp_inputs.columns))) - set(self.produce_support().value)) - - for idx in columns_not_selected: - temp_inputs.metadata = temp_inputs.metadata.remove_semantic_type((metadata_base.ALL_ELEMENTS, idx), - 'https://metadata.datadrivendiscovery.org/types/Attribute') - - temp_inputs = temp_inputs.select_columns(self._training_indices) - outputs = base_utils.combine_columns(return_result='replace', - add_index_columns=self.hyperparams['add_index_columns'], - inputs=inputs, column_indices=self._training_indices, - columns_list=[temp_inputs]) - return CallResult(outputs) - - outputs = base_utils.combine_columns(return_result=self.hyperparams['return_result'], - add_index_columns=self.hyperparams['add_index_columns'], - inputs=inputs, column_indices=self._training_indices, - columns_list=output) - - return CallResult(outputs) - - def produce_support(self, *, timeout: float = None, iterations: int = None) -> CallResult[Any]: - all_indices = self._training_indices - selected_indices = self._clf.get_support(indices=True).tolist() - indices = [all_indices[index] for index in selected_indices] - return CallResult(indices) - - - def get_params(self) -> Params: - if not self._fitted: - return Params( - scores_=None, - pvalues_=None, - input_column_names=self._input_column_names, - training_indices_=self._training_indices, - target_names_=self._target_names, - target_column_indices_=self._target_column_indices, - target_columns_metadata_=self._target_columns_metadata - ) - - return Params( - scores_=getattr(self._clf, 'scores_', None), - pvalues_=getattr(self._clf, 'pvalues_', None), - input_column_names=self._input_column_names, - training_indices_=self._training_indices, - target_names_=self._target_names, - target_column_indices_=self._target_column_indices, - target_columns_metadata_=self._target_columns_metadata - ) - - def set_params(self, *, params: Params) -> None: - self._clf.scores_ = params['scores_'] - self._clf.pvalues_ = params['pvalues_'] - self._input_column_names = params['input_column_names'] - self._training_indices = params['training_indices_'] - self._target_names = params['target_names_'] - self._target_column_indices = params['target_column_indices_'] - self._target_columns_metadata = params['target_columns_metadata_'] - - if params['scores_'] is not None: - self._fitted = True - if params['pvalues_'] is not None: - self._fitted = True - - - - - - - - @classmethod - def _get_columns_to_fit(cls, inputs: Inputs, hyperparams: Hyperparams): - if not hyperparams['use_semantic_types']: - return inputs, list(range(len(inputs.columns))) - - inputs_metadata = inputs.metadata - - def can_produce_column(column_index: int) -> bool: - return cls._can_produce_column(inputs_metadata, column_index, hyperparams) - - columns_to_produce, columns_not_to_produce = base_utils.get_columns_to_use(inputs_metadata, - use_columns=hyperparams['use_inputs_columns'], - exclude_columns=hyperparams['exclude_inputs_columns'], - can_use_column=can_produce_column) - return inputs.iloc[:, columns_to_produce], columns_to_produce - # return columns_to_produce - - @classmethod - def _can_produce_column(cls, inputs_metadata: metadata_base.DataMetadata, column_index: int, hyperparams: Hyperparams) -> bool: - column_metadata = inputs_metadata.query((metadata_base.ALL_ELEMENTS, column_index)) - - accepted_structural_types = (int, float, numpy.integer, numpy.float64) - accepted_semantic_types = set() - accepted_semantic_types.add("https://metadata.datadrivendiscovery.org/types/Attribute") - if not issubclass(column_metadata['structural_type'], accepted_structural_types): - return False - - semantic_types = set(column_metadata.get('semantic_types', [])) - - if len(semantic_types) == 0: - cls.logger.warning("No semantic types found in column metadata") - return False - # Making sure all accepted_semantic_types are available in semantic_types - if len(accepted_semantic_types - semantic_types) == 0: - return True - - return False - - @classmethod - def _get_targets(cls, data: d3m_dataframe, hyperparams: Hyperparams): - if not hyperparams['use_semantic_types']: - return data, list(data.columns), list(range(len(data.columns))) - - metadata = data.metadata - - def can_produce_column(column_index: int) -> bool: - accepted_semantic_types = set() - accepted_semantic_types.add("https://metadata.datadrivendiscovery.org/types/TrueTarget") - column_metadata = metadata.query((metadata_base.ALL_ELEMENTS, column_index)) - semantic_types = set(column_metadata.get('semantic_types', [])) - if len(semantic_types) == 0: - cls.logger.warning("No semantic types found in column metadata") - return False - # Making sure all accepted_semantic_types are available in semantic_types - if len(accepted_semantic_types - semantic_types) == 0: - return True - return False - - target_column_indices, target_columns_not_to_produce = base_utils.get_columns_to_use(metadata, - use_columns=hyperparams[ - 'use_outputs_columns'], - exclude_columns= - hyperparams[ - 'exclude_outputs_columns'], - can_use_column=can_produce_column) - targets = [] - if target_column_indices: - targets = data.select_columns(target_column_indices) - target_column_names = [] - for idx in target_column_indices: - target_column_names.append(data.columns[idx]) - return targets, target_column_names, target_column_indices - - @classmethod - def _get_target_columns_metadata(cls, outputs_metadata: metadata_base.DataMetadata, hyperparams) -> List[OrderedDict]: - outputs_length = outputs_metadata.query((metadata_base.ALL_ELEMENTS,))['dimension']['length'] - - target_columns_metadata: List[OrderedDict] = [] - for column_index in range(outputs_length): - column_metadata = OrderedDict(outputs_metadata.query_column(column_index)) - - # Update semantic types and prepare it for predicted targets. - semantic_types = set(column_metadata.get('semantic_types', [])) - semantic_types_to_remove = set([]) - add_semantic_types = [] - add_semantic_types.add(hyperparams["return_semantic_type"]) - semantic_types = semantic_types - semantic_types_to_remove - semantic_types = semantic_types.union(add_semantic_types) - column_metadata['semantic_types'] = list(semantic_types) - - target_columns_metadata.append(column_metadata) - - return target_columns_metadata - - @classmethod - def _update_predictions_metadata(cls, inputs_metadata: metadata_base.DataMetadata, outputs: Optional[Outputs], - target_columns_metadata: List[OrderedDict]) -> metadata_base.DataMetadata: - outputs_metadata = metadata_base.DataMetadata().generate(value=outputs) - - for column_index, column_metadata in enumerate(target_columns_metadata): - if len(target_columns_metadata) == 1: - name = column_metadata.get("name") - for idx in range(len(outputs.columns)): - outputs_metadata = outputs_metadata.update_column(idx, column_metadata) - if len(outputs.columns) > 1: - # Updating column names. - outputs_metadata = outputs_metadata.update((metadata_base.ALL_ELEMENTS, idx), {'name': "{}_{}".format(name, idx)}) - else: - outputs_metadata = outputs_metadata.update_column(column_index, column_metadata) - - return outputs_metadata - - - def _wrap_predictions(self, inputs: Inputs, predictions: ndarray, target_columns_metadata) -> Outputs: - outputs = d3m_dataframe(predictions, generate_metadata=False) - outputs.metadata = self._update_predictions_metadata(inputs.metadata, outputs, target_columns_metadata) - return outputs - - - - @classmethod - def _copy_columns_metadata(cls, inputs_metadata: metadata_base.DataMetadata, column_indices) -> List[OrderedDict]: - outputs_length = inputs_metadata.query((metadata_base.ALL_ELEMENTS,))['dimension']['length'] - - target_columns_metadata: List[OrderedDict] = [] - for column_index in column_indices: - column_name = inputs_metadata.query((metadata_base.ALL_ELEMENTS, column_index)).get("name") - column_metadata = OrderedDict(inputs_metadata.query_column(column_index)) - semantic_types = set(column_metadata.get('semantic_types', [])) - semantic_types_to_remove = set([]) - add_semantic_types = [] - semantic_types = semantic_types - semantic_types_to_remove - semantic_types = semantic_types.union(add_semantic_types) - column_metadata['semantic_types'] = list(semantic_types) - - column_metadata["name"] = str(column_name) - target_columns_metadata.append(column_metadata) - - return target_columns_metadata - - -SKSelectFwe.__doc__ = SelectFwe.__doc__ \ No newline at end of file diff --git a/common-primitives/sklearn-wrap/sklearn_wrap/SKSelectPercentile.py b/common-primitives/sklearn-wrap/sklearn_wrap/SKSelectPercentile.py deleted file mode 100644 index 05044c1..0000000 --- a/common-primitives/sklearn-wrap/sklearn_wrap/SKSelectPercentile.py +++ /dev/null @@ -1,428 +0,0 @@ -from typing import Any, Callable, List, Dict, Union, Optional, Sequence, Tuple -from numpy import ndarray -from collections import OrderedDict -from scipy import sparse -import os -import sklearn -import numpy -import typing - -# Custom import commands if any -from sklearn.feature_selection.univariate_selection import SelectPercentile -from sklearn.feature_selection import f_classif, f_regression, chi2 - - -from d3m.container.numpy import ndarray as d3m_ndarray -from d3m.container import DataFrame as d3m_dataframe -from d3m.metadata import hyperparams, params, base as metadata_base -from d3m import utils -from d3m.base import utils as base_utils -from d3m.exceptions import PrimitiveNotFittedError -from d3m.primitive_interfaces.base import CallResult, DockerContainer - -from d3m.primitive_interfaces.supervised_learning import SupervisedLearnerPrimitiveBase -from d3m.primitive_interfaces.base import ProbabilisticCompositionalityMixin, ContinueFitMixin -from d3m import exceptions -import pandas - - - -Inputs = d3m_dataframe -Outputs = d3m_dataframe - - -class Params(params.Params): - scores_: Optional[ndarray] - pvalues_: Optional[ndarray] - input_column_names: Optional[Any] - target_names_: Optional[Sequence[Any]] - training_indices_: Optional[Sequence[int]] - target_column_indices_: Optional[Sequence[int]] - target_columns_metadata_: Optional[List[OrderedDict]] - - - -class Hyperparams(hyperparams.Hyperparams): - score_func = hyperparams.Enumeration[str]( - default='f_classif', - values=['f_classif', 'f_regression', 'chi2'], - description='Function taking two arrays X and y, and returning a pair of arrays (scores, pvalues) or a single array with scores. Default is f_classif (see below "See also"). The default function only works with classification tasks.', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] - ) - percentile = hyperparams.Bounded[int]( - default=10, - lower=0, - upper=100, - description='Percent of features to keep.', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] - ) - - use_inputs_columns = hyperparams.Set( - elements=hyperparams.Hyperparameter[int](-1), - default=(), - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="A set of column indices to force primitive to use as training input. If any specified column cannot be parsed, it is skipped.", - ) - use_outputs_columns = hyperparams.Set( - elements=hyperparams.Hyperparameter[int](-1), - default=(), - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="A set of column indices to force primitive to use as training target. If any specified column cannot be parsed, it is skipped.", - ) - exclude_inputs_columns = hyperparams.Set( - elements=hyperparams.Hyperparameter[int](-1), - default=(), - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="A set of column indices to not use as training inputs. Applicable only if \"use_columns\" is not provided.", - ) - exclude_outputs_columns = hyperparams.Set( - elements=hyperparams.Hyperparameter[int](-1), - default=(), - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="A set of column indices to not use as training target. Applicable only if \"use_columns\" is not provided.", - ) - return_result = hyperparams.Enumeration( - values=['update_semantic_types', 'replace', 'new'], - default='new', - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="Should parsed columns be appended, should they replace original columns, or should only parsed columns be returned? This hyperparam is ignored if use_semantic_types is set to false.", -) - use_semantic_types = hyperparams.UniformBool( - default=False, - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="Controls whether semantic_types metadata will be used for filtering columns in input dataframe. Setting this to false makes the code ignore return_result and will produce only the output dataframe" - ) - add_index_columns = hyperparams.UniformBool( - default=False, - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="Also include primary index columns if input data has them. Applicable only if \"return_result\" is set to \"new\".", - ) - error_on_no_input = hyperparams.UniformBool( - default=True, - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="Throw an exception if no input column is selected/provided. Defaults to true to behave like sklearn. To prevent pipelines from breaking set this to False.", - ) - - return_semantic_type = hyperparams.Enumeration[str]( - values=['https://metadata.datadrivendiscovery.org/types/Attribute', 'https://metadata.datadrivendiscovery.org/types/ConstructedAttribute', 'https://metadata.datadrivendiscovery.org/types/PredictedTarget'], - default='https://metadata.datadrivendiscovery.org/types/PredictedTarget', - description='Decides what semantic type to attach to generated output', - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'] - ) - -class SKSelectPercentile(SupervisedLearnerPrimitiveBase[Inputs, Outputs, Params, Hyperparams]): - """ - Primitive wrapping for sklearn SelectPercentile - `sklearn documentation `_ - - """ - - __author__ = "JPL MARVIN" - metadata = metadata_base.PrimitiveMetadata({ - "algorithm_types": [metadata_base.PrimitiveAlgorithmType.STATISTICAL_MOMENT_ANALYSIS, ], - "name": "sklearn.feature_selection.univariate_selection.SelectPercentile", - "primitive_family": metadata_base.PrimitiveFamily.FEATURE_SELECTION, - "python_path": "d3m.primitives.feature_selection.select_percentile.SKlearn", - "source": {'name': 'JPL', 'contact': 'mailto:shah@jpl.nasa.gov', 'uris': ['https://gitlab.com/datadrivendiscovery/sklearn-wrap/issues', 'https://scikit-learn.org/stable/modules/generated/sklearn.feature_selection.SelectPercentile.html']}, - "version": "2019.11.13", - "id": "16696c4d-bed9-34a2-b9ae-b882c069512d", - "hyperparams_to_tune": ['percentile'], - 'installation': [ - {'type': metadata_base.PrimitiveInstallationType.PIP, - 'package_uri': 'git+https://gitlab.com/datadrivendiscovery/sklearn-wrap.git@{git_commit}#egg=sklearn_wrap'.format( - git_commit=utils.current_git_commit(os.path.dirname(__file__)), - ), - }] - }) - - def __init__(self, *, - hyperparams: Hyperparams, - random_seed: int = 0, - docker_containers: Dict[str, DockerContainer] = None) -> None: - - super().__init__(hyperparams=hyperparams, random_seed=random_seed, docker_containers=docker_containers) - - # False - self._clf = SelectPercentile( - score_func=eval(self.hyperparams['score_func']), - percentile=self.hyperparams['percentile'], - ) - - self._inputs = None - self._outputs = None - self._training_inputs = None - self._training_outputs = None - self._target_names = None - self._training_indices = None - self._target_column_indices = None - self._target_columns_metadata: List[OrderedDict] = None - self._input_column_names = None - self._fitted = False - self._new_training_data = False - - def set_training_data(self, *, inputs: Inputs, outputs: Outputs) -> None: - self._inputs = inputs - self._outputs = outputs - self._fitted = False - self._new_training_data = True - - def fit(self, *, timeout: float = None, iterations: int = None) -> CallResult[None]: - if self._fitted: - return CallResult(None) - - self._training_inputs, self._training_indices = self._get_columns_to_fit(self._inputs, self.hyperparams) - self._training_outputs, self._target_names, self._target_column_indices = self._get_targets(self._outputs, self.hyperparams) - self._input_column_names = self._training_inputs.columns - - if self._training_inputs is None or self._training_outputs is None: - raise ValueError("Missing training data.") - - if len(self._training_indices) > 0 and len(self._target_column_indices) > 0: - sk_training_output = self._training_outputs.values - - shape = sk_training_output.shape - if len(shape) == 2 and shape[1] == 1: - sk_training_output = numpy.ravel(sk_training_output) - - self._clf.fit(self._training_inputs, sk_training_output) - self._fitted = True - else: - if self.hyperparams['error_on_no_input']: - raise RuntimeError("No input columns were selected") - self.logger.warn("No input columns were selected") - - return CallResult(None) - - def produce(self, *, inputs: Inputs, timeout: float = None, iterations: int = None) -> CallResult[Outputs]: - sk_inputs, columns_to_use = self._get_columns_to_fit(inputs, self.hyperparams) - output = [] - if len(sk_inputs.columns): - try: - sk_output = self._clf.transform(sk_inputs) - except sklearn.exceptions.NotFittedError as error: - raise PrimitiveNotFittedError("Primitive not fitted.") from error - if sparse.issparse(sk_output): - sk_output = sk_output.toarray() - target_columns_metadata = self._copy_columns_metadata(inputs.iloc[:, self._training_indices].metadata, - self.produce_support().value) - output = self._wrap_predictions(inputs, sk_output, target_columns_metadata) - output.columns = [inputs.columns[idx] for idx in range(len(inputs.columns)) if idx in self.produce_support().value] - output = [output] - else: - if self.hyperparams['error_on_no_input']: - raise RuntimeError("No input columns were selected") - self.logger.warn("No input columns were selected") - - if self.hyperparams['return_result'] == 'update_semantic_types': - temp_inputs = inputs.copy() - columns_not_selected = sorted(set(range(len(temp_inputs.columns))) - set(self.produce_support().value)) - - for idx in columns_not_selected: - temp_inputs.metadata = temp_inputs.metadata.remove_semantic_type((metadata_base.ALL_ELEMENTS, idx), - 'https://metadata.datadrivendiscovery.org/types/Attribute') - - temp_inputs = temp_inputs.select_columns(self._training_indices) - outputs = base_utils.combine_columns(return_result='replace', - add_index_columns=self.hyperparams['add_index_columns'], - inputs=inputs, column_indices=self._training_indices, - columns_list=[temp_inputs]) - return CallResult(outputs) - - outputs = base_utils.combine_columns(return_result=self.hyperparams['return_result'], - add_index_columns=self.hyperparams['add_index_columns'], - inputs=inputs, column_indices=self._training_indices, - columns_list=output) - - return CallResult(outputs) - - def produce_support(self, *, timeout: float = None, iterations: int = None) -> CallResult[Any]: - all_indices = self._training_indices - selected_indices = self._clf.get_support(indices=True).tolist() - indices = [all_indices[index] for index in selected_indices] - return CallResult(indices) - - - def get_params(self) -> Params: - if not self._fitted: - return Params( - scores_=None, - pvalues_=None, - input_column_names=self._input_column_names, - training_indices_=self._training_indices, - target_names_=self._target_names, - target_column_indices_=self._target_column_indices, - target_columns_metadata_=self._target_columns_metadata - ) - - return Params( - scores_=getattr(self._clf, 'scores_', None), - pvalues_=getattr(self._clf, 'pvalues_', None), - input_column_names=self._input_column_names, - training_indices_=self._training_indices, - target_names_=self._target_names, - target_column_indices_=self._target_column_indices, - target_columns_metadata_=self._target_columns_metadata - ) - - def set_params(self, *, params: Params) -> None: - self._clf.scores_ = params['scores_'] - self._clf.pvalues_ = params['pvalues_'] - self._input_column_names = params['input_column_names'] - self._training_indices = params['training_indices_'] - self._target_names = params['target_names_'] - self._target_column_indices = params['target_column_indices_'] - self._target_columns_metadata = params['target_columns_metadata_'] - - if params['scores_'] is not None: - self._fitted = True - if params['pvalues_'] is not None: - self._fitted = True - - - - - - - - @classmethod - def _get_columns_to_fit(cls, inputs: Inputs, hyperparams: Hyperparams): - if not hyperparams['use_semantic_types']: - return inputs, list(range(len(inputs.columns))) - - inputs_metadata = inputs.metadata - - def can_produce_column(column_index: int) -> bool: - return cls._can_produce_column(inputs_metadata, column_index, hyperparams) - - columns_to_produce, columns_not_to_produce = base_utils.get_columns_to_use(inputs_metadata, - use_columns=hyperparams['use_inputs_columns'], - exclude_columns=hyperparams['exclude_inputs_columns'], - can_use_column=can_produce_column) - return inputs.iloc[:, columns_to_produce], columns_to_produce - # return columns_to_produce - - @classmethod - def _can_produce_column(cls, inputs_metadata: metadata_base.DataMetadata, column_index: int, hyperparams: Hyperparams) -> bool: - column_metadata = inputs_metadata.query((metadata_base.ALL_ELEMENTS, column_index)) - - accepted_structural_types = (int, float, numpy.integer, numpy.float64) - accepted_semantic_types = set() - accepted_semantic_types.add("https://metadata.datadrivendiscovery.org/types/Attribute") - if not issubclass(column_metadata['structural_type'], accepted_structural_types): - return False - - semantic_types = set(column_metadata.get('semantic_types', [])) - - if len(semantic_types) == 0: - cls.logger.warning("No semantic types found in column metadata") - return False - # Making sure all accepted_semantic_types are available in semantic_types - if len(accepted_semantic_types - semantic_types) == 0: - return True - - return False - - @classmethod - def _get_targets(cls, data: d3m_dataframe, hyperparams: Hyperparams): - if not hyperparams['use_semantic_types']: - return data, list(data.columns), list(range(len(data.columns))) - - metadata = data.metadata - - def can_produce_column(column_index: int) -> bool: - accepted_semantic_types = set() - accepted_semantic_types.add("https://metadata.datadrivendiscovery.org/types/TrueTarget") - column_metadata = metadata.query((metadata_base.ALL_ELEMENTS, column_index)) - semantic_types = set(column_metadata.get('semantic_types', [])) - if len(semantic_types) == 0: - cls.logger.warning("No semantic types found in column metadata") - return False - # Making sure all accepted_semantic_types are available in semantic_types - if len(accepted_semantic_types - semantic_types) == 0: - return True - return False - - target_column_indices, target_columns_not_to_produce = base_utils.get_columns_to_use(metadata, - use_columns=hyperparams[ - 'use_outputs_columns'], - exclude_columns= - hyperparams[ - 'exclude_outputs_columns'], - can_use_column=can_produce_column) - targets = [] - if target_column_indices: - targets = data.select_columns(target_column_indices) - target_column_names = [] - for idx in target_column_indices: - target_column_names.append(data.columns[idx]) - return targets, target_column_names, target_column_indices - - @classmethod - def _get_target_columns_metadata(cls, outputs_metadata: metadata_base.DataMetadata, hyperparams) -> List[OrderedDict]: - outputs_length = outputs_metadata.query((metadata_base.ALL_ELEMENTS,))['dimension']['length'] - - target_columns_metadata: List[OrderedDict] = [] - for column_index in range(outputs_length): - column_metadata = OrderedDict(outputs_metadata.query_column(column_index)) - - # Update semantic types and prepare it for predicted targets. - semantic_types = set(column_metadata.get('semantic_types', [])) - semantic_types_to_remove = set([]) - add_semantic_types = [] - add_semantic_types.add(hyperparams["return_semantic_type"]) - semantic_types = semantic_types - semantic_types_to_remove - semantic_types = semantic_types.union(add_semantic_types) - column_metadata['semantic_types'] = list(semantic_types) - - target_columns_metadata.append(column_metadata) - - return target_columns_metadata - - @classmethod - def _update_predictions_metadata(cls, inputs_metadata: metadata_base.DataMetadata, outputs: Optional[Outputs], - target_columns_metadata: List[OrderedDict]) -> metadata_base.DataMetadata: - outputs_metadata = metadata_base.DataMetadata().generate(value=outputs) - - for column_index, column_metadata in enumerate(target_columns_metadata): - if len(target_columns_metadata) == 1: - name = column_metadata.get("name") - for idx in range(len(outputs.columns)): - outputs_metadata = outputs_metadata.update_column(idx, column_metadata) - if len(outputs.columns) > 1: - # Updating column names. - outputs_metadata = outputs_metadata.update((metadata_base.ALL_ELEMENTS, idx), {'name': "{}_{}".format(name, idx)}) - else: - outputs_metadata = outputs_metadata.update_column(column_index, column_metadata) - - return outputs_metadata - - - def _wrap_predictions(self, inputs: Inputs, predictions: ndarray, target_columns_metadata) -> Outputs: - outputs = d3m_dataframe(predictions, generate_metadata=False) - outputs.metadata = self._update_predictions_metadata(inputs.metadata, outputs, target_columns_metadata) - return outputs - - - - @classmethod - def _copy_columns_metadata(cls, inputs_metadata: metadata_base.DataMetadata, column_indices) -> List[OrderedDict]: - outputs_length = inputs_metadata.query((metadata_base.ALL_ELEMENTS,))['dimension']['length'] - - target_columns_metadata: List[OrderedDict] = [] - for column_index in column_indices: - column_name = inputs_metadata.query((metadata_base.ALL_ELEMENTS, column_index)).get("name") - column_metadata = OrderedDict(inputs_metadata.query_column(column_index)) - semantic_types = set(column_metadata.get('semantic_types', [])) - semantic_types_to_remove = set([]) - add_semantic_types = [] - semantic_types = semantic_types - semantic_types_to_remove - semantic_types = semantic_types.union(add_semantic_types) - column_metadata['semantic_types'] = list(semantic_types) - - column_metadata["name"] = str(column_name) - target_columns_metadata.append(column_metadata) - - return target_columns_metadata - - -SKSelectPercentile.__doc__ = SelectPercentile.__doc__ \ No newline at end of file diff --git a/common-primitives/sklearn-wrap/sklearn_wrap/SKSparseRandomProjection.py b/common-primitives/sklearn-wrap/sklearn_wrap/SKSparseRandomProjection.py deleted file mode 100644 index 351f4d8..0000000 --- a/common-primitives/sklearn-wrap/sklearn_wrap/SKSparseRandomProjection.py +++ /dev/null @@ -1,375 +0,0 @@ -from typing import Any, Callable, List, Dict, Union, Optional, Sequence, Tuple -from numpy import ndarray -from collections import OrderedDict -from scipy import sparse -import os -import sklearn -import numpy -import typing - -# Custom import commands if any -from sklearn.random_projection import SparseRandomProjection - - -from d3m.container.numpy import ndarray as d3m_ndarray -from d3m.container import DataFrame as d3m_dataframe -from d3m.metadata import hyperparams, params, base as metadata_base -from d3m import utils -from d3m.base import utils as base_utils -from d3m.exceptions import PrimitiveNotFittedError -from d3m.primitive_interfaces.base import CallResult, DockerContainer -from d3m.primitive_interfaces.unsupervised_learning import UnsupervisedLearnerPrimitiveBase - - -Inputs = d3m_dataframe -Outputs = d3m_dataframe - - -class Params(params.Params): - n_component_: Optional[int] - components_: Optional[Union[ndarray, sparse.spmatrix]] - density_: Optional[float] - input_column_names: Optional[Any] - target_names_: Optional[Sequence[Any]] - training_indices_: Optional[Sequence[int]] - target_column_indices_: Optional[Sequence[int]] - target_columns_metadata_: Optional[List[OrderedDict]] - - - -class Hyperparams(hyperparams.Hyperparams): - n_components = hyperparams.Union( - configuration=OrderedDict({ - 'int': hyperparams.Bounded[int]( - lower=0, - upper=None, - default=100, - description='Number of components to keep.', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'], - ), - 'auto': hyperparams.Constant( - default='auto', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'], - ) - }), - default='auto', - description='Dimensionality of the target projection space. n_components can be automatically adjusted according to the number of samples in the dataset and the bound given by the Johnson-Lindenstrauss lemma. In that case the quality of the embedding is controlled by the ``eps`` parameter. It should be noted that Johnson-Lindenstrauss lemma can yield very conservative estimated of the required number of components as it makes no assumption on the structure of the dataset.', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] - ) - density = hyperparams.Union( - configuration=OrderedDict({ - 'float': hyperparams.Uniform( - lower=0, - upper=1, - default=0.3, - description='Number of components to keep.', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'], - ), - 'auto': hyperparams.Constant( - default='auto', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'], - ) - }), - default='auto', - description='Ratio of non-zero component in the random projection matrix. If density = \'auto\', the value is set to the minimum density as recommended by Ping Li et al.: 1 / sqrt(n_features). Use density = 1 / 3.0 if you want to reproduce the results from Achlioptas, 2001.', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] - ) - eps = hyperparams.Bounded[float]( - default=0.1, - lower=0, - upper=1, - description='Parameter to control the quality of the embedding according to the Johnson-Lindenstrauss lemma when n_components is set to \'auto\'. Smaller values lead to better embedding and higher number of dimensions (n_components) in the target projection space.', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] - ) - dense_output = hyperparams.UniformBool( - default=False, - description='If True, ensure that the output of the random projection is a dense numpy array even if the input and random projection matrix are both sparse. In practice, if the number of components is small the number of zero components in the projected data will be very small and it will be more CPU and memory efficient to use a dense representation. If False, the projected data uses a sparse representation if the input is sparse.', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] - ) - - use_columns = hyperparams.Set( - elements=hyperparams.Hyperparameter[int](-1), - default=(), - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="A set of column indices to force primitive to operate on. If any specified column cannot be parsed, it is skipped.", - ) - exclude_columns = hyperparams.Set( - elements=hyperparams.Hyperparameter[int](-1), - default=(), - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="A set of column indices to not operate on. Applicable only if \"use_columns\" is not provided.", - ) - return_result = hyperparams.Enumeration( - values=['append', 'replace', 'new'], - default='new', - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="Should parsed columns be appended, should they replace original columns, or should only parsed columns be returned? This hyperparam is ignored if use_semantic_types is set to false.", - ) - use_semantic_types = hyperparams.UniformBool( - default=False, - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="Controls whether semantic_types metadata will be used for filtering columns in input dataframe. Setting this to false makes the code ignore return_result and will produce only the output dataframe" - ) - add_index_columns = hyperparams.UniformBool( - default=False, - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="Also include primary index columns if input data has them. Applicable only if \"return_result\" is set to \"new\".", - ) - error_on_no_input = hyperparams.UniformBool( - default=True, - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="Throw an exception if no input column is selected/provided. Defaults to true to behave like sklearn. To prevent pipelines from breaking set this to False.", - ) - - return_semantic_type = hyperparams.Enumeration[str]( - values=['https://metadata.datadrivendiscovery.org/types/Attribute', 'https://metadata.datadrivendiscovery.org/types/ConstructedAttribute'], - default='https://metadata.datadrivendiscovery.org/types/Attribute', - description='Decides what semantic type to attach to generated attributes', - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'] - ) - -class SKSparseRandomProjection(UnsupervisedLearnerPrimitiveBase[Inputs, Outputs, Params, Hyperparams]): - """ - Primitive wrapping for sklearn SparseRandomProjection - `sklearn documentation `_ - - """ - - __author__ = "JPL MARVIN" - metadata = metadata_base.PrimitiveMetadata({ - "algorithm_types": [metadata_base.PrimitiveAlgorithmType.RANDOM_PROJECTION, ], - "name": "sklearn.random_projection.SparseRandomProjection", - "primitive_family": metadata_base.PrimitiveFamily.DATA_TRANSFORMATION, - "python_path": "d3m.primitives.data_transformation.sparse_random_projection.SKlearn", - "source": {'name': 'JPL', 'contact': 'mailto:shah@jpl.nasa.gov', 'uris': ['https://gitlab.com/datadrivendiscovery/sklearn-wrap/issues', 'https://scikit-learn.org/stable/modules/generated/sklearn.random_projection.SparseRandomProjection.html']}, - "version": "2019.11.13", - "id": "43ddd6be-bb4f-3fd0-8765-df961c16d7dc", - "hyperparams_to_tune": ['n_components'], - 'installation': [ - {'type': metadata_base.PrimitiveInstallationType.PIP, - 'package_uri': 'git+https://gitlab.com/datadrivendiscovery/sklearn-wrap.git@{git_commit}#egg=sklearn_wrap'.format( - git_commit=utils.current_git_commit(os.path.dirname(__file__)), - ), - }] - }) - - def __init__(self, *, - hyperparams: Hyperparams, - random_seed: int = 0, - docker_containers: Dict[str, DockerContainer] = None) -> None: - - super().__init__(hyperparams=hyperparams, random_seed=random_seed, docker_containers=docker_containers) - - # False - self._clf = SparseRandomProjection( - n_components=self.hyperparams['n_components'], - density=self.hyperparams['density'], - eps=self.hyperparams['eps'], - dense_output=self.hyperparams['dense_output'], - random_state=self.random_seed, - ) - - self._inputs = None - self._outputs = None - self._training_inputs = None - self._training_outputs = None - self._target_names = None - self._training_indices = None - self._target_column_indices = None - self._target_columns_metadata: List[OrderedDict] = None - self._input_column_names = None - self._fitted = False - - - def set_training_data(self, *, inputs: Inputs) -> None: - self._inputs = inputs - self._fitted = False - - def fit(self, *, timeout: float = None, iterations: int = None) -> CallResult[None]: - if self._fitted: - return CallResult(None) - - self._training_inputs, self._training_indices = self._get_columns_to_fit(self._inputs, self.hyperparams) - self._input_column_names = self._training_inputs.columns - - if self._training_inputs is None: - return CallResult(None) - - if len(self._training_indices) > 0: - self._clf.fit(self._training_inputs) - self._fitted = True - else: - if self.hyperparams['error_on_no_input']: - raise RuntimeError("No input columns were selected") - self.logger.warn("No input columns were selected") - return CallResult(None) - - def produce(self, *, inputs: Inputs, timeout: float = None, iterations: int = None) -> CallResult[Outputs]: - if not self._fitted: - raise PrimitiveNotFittedError("Primitive not fitted.") - sk_inputs = inputs - if self.hyperparams['use_semantic_types']: - sk_inputs = inputs.iloc[:, self._training_indices] - output_columns = [] - if len(self._training_indices) > 0: - sk_output = self._clf.transform(sk_inputs) - if sparse.issparse(sk_output): - sk_output = sk_output.toarray() - outputs = self._wrap_predictions(inputs, sk_output) - if len(outputs.columns) == len(self._input_column_names): - outputs.columns = self._input_column_names - output_columns = [outputs] - else: - if self.hyperparams['error_on_no_input']: - raise RuntimeError("No input columns were selected") - self.logger.warn("No input columns were selected") - outputs = base_utils.combine_columns(return_result=self.hyperparams['return_result'], - add_index_columns=self.hyperparams['add_index_columns'], - inputs=inputs, column_indices=self._training_indices, - columns_list=output_columns) - return CallResult(outputs) - - - def get_params(self) -> Params: - if not self._fitted: - return Params( - n_component_=None, - components_=None, - density_=None, - input_column_names=self._input_column_names, - training_indices_=self._training_indices, - target_names_=self._target_names, - target_column_indices_=self._target_column_indices, - target_columns_metadata_=self._target_columns_metadata - ) - - return Params( - n_component_=getattr(self._clf, 'n_component_', None), - components_=getattr(self._clf, 'components_', None), - density_=getattr(self._clf, 'density_', None), - input_column_names=self._input_column_names, - training_indices_=self._training_indices, - target_names_=self._target_names, - target_column_indices_=self._target_column_indices, - target_columns_metadata_=self._target_columns_metadata - ) - - def set_params(self, *, params: Params) -> None: - self._clf.n_component_ = params['n_component_'] - self._clf.components_ = params['components_'] - self._clf.density_ = params['density_'] - self._input_column_names = params['input_column_names'] - self._training_indices = params['training_indices_'] - self._target_names = params['target_names_'] - self._target_column_indices = params['target_column_indices_'] - self._target_columns_metadata = params['target_columns_metadata_'] - - if params['n_component_'] is not None: - self._fitted = True - if params['components_'] is not None: - self._fitted = True - if params['density_'] is not None: - self._fitted = True - - - - - - @classmethod - def _get_columns_to_fit(cls, inputs: Inputs, hyperparams: Hyperparams): - if not hyperparams['use_semantic_types']: - return inputs, list(range(len(inputs.columns))) - - inputs_metadata = inputs.metadata - - def can_produce_column(column_index: int) -> bool: - return cls._can_produce_column(inputs_metadata, column_index, hyperparams) - - columns_to_produce, columns_not_to_produce = base_utils.get_columns_to_use(inputs_metadata, - use_columns=hyperparams['use_columns'], - exclude_columns=hyperparams['exclude_columns'], - can_use_column=can_produce_column) - return inputs.iloc[:, columns_to_produce], columns_to_produce - # return columns_to_produce - - @classmethod - def _can_produce_column(cls, inputs_metadata: metadata_base.DataMetadata, column_index: int, hyperparams: Hyperparams) -> bool: - column_metadata = inputs_metadata.query((metadata_base.ALL_ELEMENTS, column_index)) - - accepted_structural_types = (int, float, numpy.integer, numpy.float64) - accepted_semantic_types = set() - accepted_semantic_types.add("https://metadata.datadrivendiscovery.org/types/Attribute") - if not issubclass(column_metadata['structural_type'], accepted_structural_types): - return False - - semantic_types = set(column_metadata.get('semantic_types', [])) - - if len(semantic_types) == 0: - cls.logger.warning("No semantic types found in column metadata") - return False - - # Making sure all accepted_semantic_types are available in semantic_types - if len(accepted_semantic_types - semantic_types) == 0: - return True - - return False - - - @classmethod - def _get_target_columns_metadata(cls, outputs_metadata: metadata_base.DataMetadata, hyperparams) -> List[OrderedDict]: - outputs_length = outputs_metadata.query((metadata_base.ALL_ELEMENTS,))['dimension']['length'] - - target_columns_metadata: List[OrderedDict] = [] - for column_index in range(outputs_length): - column_metadata = OrderedDict(outputs_metadata.query_column(column_index)) - - # Update semantic types and prepare it for predicted targets. - semantic_types = set(column_metadata.get('semantic_types', [])) - semantic_types_to_remove = set([]) - add_semantic_types = [] - add_semantic_types.add(hyperparams["return_semantic_type"]) - semantic_types = semantic_types - semantic_types_to_remove - semantic_types = semantic_types.union(add_semantic_types) - column_metadata['semantic_types'] = list(semantic_types) - - target_columns_metadata.append(column_metadata) - - return target_columns_metadata - - @classmethod - def _update_predictions_metadata(cls, inputs_metadata: metadata_base.DataMetadata, outputs: Optional[Outputs], - target_columns_metadata: List[OrderedDict]) -> metadata_base.DataMetadata: - outputs_metadata = metadata_base.DataMetadata().generate(value=outputs) - - for column_index, column_metadata in enumerate(target_columns_metadata): - column_metadata.pop("structural_type", None) - outputs_metadata = outputs_metadata.update_column(column_index, column_metadata) - - return outputs_metadata - - def _wrap_predictions(self, inputs: Inputs, predictions: ndarray) -> Outputs: - outputs = d3m_dataframe(predictions, generate_metadata=True) - target_columns_metadata = self._add_target_columns_metadata(outputs.metadata, self.hyperparams) - outputs.metadata = self._update_predictions_metadata(inputs.metadata, outputs, target_columns_metadata) - return outputs - - - @classmethod - def _add_target_columns_metadata(cls, outputs_metadata: metadata_base.DataMetadata, hyperparams): - - outputs_length = outputs_metadata.query((metadata_base.ALL_ELEMENTS,))['dimension']['length'] - target_columns_metadata: List[OrderedDict] = [] - for column_index in range(outputs_length): - column_name = "output_{}".format(column_index) - column_metadata = OrderedDict() - semantic_types = set() - semantic_types.add(hyperparams["return_semantic_type"]) - column_metadata['semantic_types'] = list(semantic_types) - - column_metadata["name"] = str(column_name) - target_columns_metadata.append(column_metadata) - - return target_columns_metadata - - -SKSparseRandomProjection.__doc__ = SparseRandomProjection.__doc__ \ No newline at end of file diff --git a/common-primitives/sklearn-wrap/sklearn_wrap/SKStandardScaler.py b/common-primitives/sklearn-wrap/sklearn_wrap/SKStandardScaler.py deleted file mode 100644 index f8491bb..0000000 --- a/common-primitives/sklearn-wrap/sklearn_wrap/SKStandardScaler.py +++ /dev/null @@ -1,357 +0,0 @@ -from typing import Any, Callable, List, Dict, Union, Optional, Sequence, Tuple -from numpy import ndarray -from collections import OrderedDict -from scipy import sparse -import os -import sklearn -import numpy -import typing - -# Custom import commands if any -from sklearn.preprocessing.data import StandardScaler - - -from d3m.container.numpy import ndarray as d3m_ndarray -from d3m.container import DataFrame as d3m_dataframe -from d3m.metadata import hyperparams, params, base as metadata_base -from d3m import utils -from d3m.base import utils as base_utils -from d3m.exceptions import PrimitiveNotFittedError -from d3m.primitive_interfaces.base import CallResult, DockerContainer -from d3m.primitive_interfaces.unsupervised_learning import UnsupervisedLearnerPrimitiveBase - - -Inputs = d3m_dataframe -Outputs = d3m_dataframe - - -class Params(params.Params): - scale_: Optional[ndarray] - mean_: Optional[ndarray] - var_: Optional[ndarray] - n_samples_seen_: Optional[Union[int, numpy.integer]] - input_column_names: Optional[Any] - target_names_: Optional[Sequence[Any]] - training_indices_: Optional[Sequence[int]] - target_column_indices_: Optional[Sequence[int]] - target_columns_metadata_: Optional[List[OrderedDict]] - - - -class Hyperparams(hyperparams.Hyperparams): - with_mean = hyperparams.UniformBool( - default=True, - description='If True, center the data before scaling. This does not work (and will raise an exception) when attempted on sparse matrices, because centering them entails building a dense matrix which in common use cases is likely to be too large to fit in memory.', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] - ) - with_std = hyperparams.UniformBool( - default=True, - description='If True, scale the data to unit variance (or equivalently, unit standard deviation).', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] - ) - - use_columns = hyperparams.Set( - elements=hyperparams.Hyperparameter[int](-1), - default=(), - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="A set of column indices to force primitive to operate on. If any specified column cannot be parsed, it is skipped.", - ) - exclude_columns = hyperparams.Set( - elements=hyperparams.Hyperparameter[int](-1), - default=(), - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="A set of column indices to not operate on. Applicable only if \"use_columns\" is not provided.", - ) - return_result = hyperparams.Enumeration( - values=['append', 'replace', 'new'], - default='new', - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="Should parsed columns be appended, should they replace original columns, or should only parsed columns be returned? This hyperparam is ignored if use_semantic_types is set to false.", - ) - use_semantic_types = hyperparams.UniformBool( - default=False, - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="Controls whether semantic_types metadata will be used for filtering columns in input dataframe. Setting this to false makes the code ignore return_result and will produce only the output dataframe" - ) - add_index_columns = hyperparams.UniformBool( - default=False, - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="Also include primary index columns if input data has them. Applicable only if \"return_result\" is set to \"new\".", - ) - error_on_no_input = hyperparams.UniformBool( - default=True, - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="Throw an exception if no input column is selected/provided. Defaults to true to behave like sklearn. To prevent pipelines from breaking set this to False.", - ) - - return_semantic_type = hyperparams.Enumeration[str]( - values=['https://metadata.datadrivendiscovery.org/types/Attribute', 'https://metadata.datadrivendiscovery.org/types/ConstructedAttribute'], - default='https://metadata.datadrivendiscovery.org/types/Attribute', - description='Decides what semantic type to attach to generated attributes', - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'] - ) - -class SKStandardScaler(UnsupervisedLearnerPrimitiveBase[Inputs, Outputs, Params, Hyperparams]): - """ - Primitive wrapping for sklearn StandardScaler - `sklearn documentation `_ - - """ - - __author__ = "JPL MARVIN" - metadata = metadata_base.PrimitiveMetadata({ - "algorithm_types": [metadata_base.PrimitiveAlgorithmType.FEATURE_SCALING, ], - "name": "sklearn.preprocessing.data.StandardScaler", - "primitive_family": metadata_base.PrimitiveFamily.DATA_PREPROCESSING, - "python_path": "d3m.primitives.data_preprocessing.standard_scaler.SKlearn", - "source": {'name': 'JPL', 'contact': 'mailto:shah@jpl.nasa.gov', 'uris': ['https://gitlab.com/datadrivendiscovery/sklearn-wrap/issues', 'https://scikit-learn.org/stable/modules/generated/sklearn.preprocessing.StandardScaler.html']}, - "version": "2019.11.13", - "id": "d639947e-ece0-3a39-a666-e974acf4521d", - 'installation': [ - {'type': metadata_base.PrimitiveInstallationType.PIP, - 'package_uri': 'git+https://gitlab.com/datadrivendiscovery/sklearn-wrap.git@{git_commit}#egg=sklearn_wrap'.format( - git_commit=utils.current_git_commit(os.path.dirname(__file__)), - ), - }] - }) - - def __init__(self, *, - hyperparams: Hyperparams, - random_seed: int = 0, - docker_containers: Dict[str, DockerContainer] = None) -> None: - - super().__init__(hyperparams=hyperparams, random_seed=random_seed, docker_containers=docker_containers) - - # False - self._clf = StandardScaler( - with_mean=self.hyperparams['with_mean'], - with_std=self.hyperparams['with_std'], - ) - - self._inputs = None - self._outputs = None - self._training_inputs = None - self._training_outputs = None - self._target_names = None - self._training_indices = None - self._target_column_indices = None - self._target_columns_metadata: List[OrderedDict] = None - self._input_column_names = None - self._fitted = False - - - def set_training_data(self, *, inputs: Inputs) -> None: - self._inputs = inputs - self._fitted = False - - def fit(self, *, timeout: float = None, iterations: int = None) -> CallResult[None]: - if self._fitted: - return CallResult(None) - - self._training_inputs, self._training_indices = self._get_columns_to_fit(self._inputs, self.hyperparams) - self._input_column_names = self._training_inputs.columns - - if self._training_inputs is None: - return CallResult(None) - - if len(self._training_indices) > 0: - self._clf.fit(self._training_inputs) - self._fitted = True - else: - if self.hyperparams['error_on_no_input']: - raise RuntimeError("No input columns were selected") - self.logger.warn("No input columns were selected") - return CallResult(None) - - def produce(self, *, inputs: Inputs, timeout: float = None, iterations: int = None) -> CallResult[Outputs]: - if not self._fitted: - raise PrimitiveNotFittedError("Primitive not fitted.") - sk_inputs = inputs - if self.hyperparams['use_semantic_types']: - sk_inputs = inputs.iloc[:, self._training_indices] - output_columns = [] - if len(self._training_indices) > 0: - sk_output = self._clf.transform(sk_inputs) - if sparse.issparse(sk_output): - sk_output = sk_output.toarray() - outputs = self._wrap_predictions(inputs, sk_output) - if len(outputs.columns) == len(self._input_column_names): - outputs.columns = self._input_column_names - output_columns = [outputs] - else: - if self.hyperparams['error_on_no_input']: - raise RuntimeError("No input columns were selected") - self.logger.warn("No input columns were selected") - outputs = base_utils.combine_columns(return_result=self.hyperparams['return_result'], - add_index_columns=self.hyperparams['add_index_columns'], - inputs=inputs, column_indices=self._training_indices, - columns_list=output_columns) - return CallResult(outputs) - - - def get_params(self) -> Params: - if not self._fitted: - return Params( - scale_=None, - mean_=None, - var_=None, - n_samples_seen_=None, - input_column_names=self._input_column_names, - training_indices_=self._training_indices, - target_names_=self._target_names, - target_column_indices_=self._target_column_indices, - target_columns_metadata_=self._target_columns_metadata - ) - - return Params( - scale_=getattr(self._clf, 'scale_', None), - mean_=getattr(self._clf, 'mean_', None), - var_=getattr(self._clf, 'var_', None), - n_samples_seen_=getattr(self._clf, 'n_samples_seen_', None), - input_column_names=self._input_column_names, - training_indices_=self._training_indices, - target_names_=self._target_names, - target_column_indices_=self._target_column_indices, - target_columns_metadata_=self._target_columns_metadata - ) - - def set_params(self, *, params: Params) -> None: - self._clf.scale_ = params['scale_'] - self._clf.mean_ = params['mean_'] - self._clf.var_ = params['var_'] - self._clf.n_samples_seen_ = params['n_samples_seen_'] - self._input_column_names = params['input_column_names'] - self._training_indices = params['training_indices_'] - self._target_names = params['target_names_'] - self._target_column_indices = params['target_column_indices_'] - self._target_columns_metadata = params['target_columns_metadata_'] - - if params['scale_'] is not None: - self._fitted = True - if params['mean_'] is not None: - self._fitted = True - if params['var_'] is not None: - self._fitted = True - if params['n_samples_seen_'] is not None: - self._fitted = True - - - - - - @classmethod - def _get_columns_to_fit(cls, inputs: Inputs, hyperparams: Hyperparams): - if not hyperparams['use_semantic_types']: - return inputs, list(range(len(inputs.columns))) - - inputs_metadata = inputs.metadata - - def can_produce_column(column_index: int) -> bool: - return cls._can_produce_column(inputs_metadata, column_index, hyperparams) - - columns_to_produce, columns_not_to_produce = base_utils.get_columns_to_use(inputs_metadata, - use_columns=hyperparams['use_columns'], - exclude_columns=hyperparams['exclude_columns'], - can_use_column=can_produce_column) - return inputs.iloc[:, columns_to_produce], columns_to_produce - # return columns_to_produce - - @classmethod - def _can_produce_column(cls, inputs_metadata: metadata_base.DataMetadata, column_index: int, hyperparams: Hyperparams) -> bool: - column_metadata = inputs_metadata.query((metadata_base.ALL_ELEMENTS, column_index)) - - accepted_structural_types = (int, float, numpy.integer, numpy.float64) - accepted_semantic_types = set() - accepted_semantic_types.add("https://metadata.datadrivendiscovery.org/types/Attribute") - if not issubclass(column_metadata['structural_type'], accepted_structural_types): - return False - - semantic_types = set(column_metadata.get('semantic_types', [])) - - if len(semantic_types) == 0: - cls.logger.warning("No semantic types found in column metadata") - return False - - # Making sure all accepted_semantic_types are available in semantic_types - if len(accepted_semantic_types - semantic_types) == 0: - return True - - return False - - - @classmethod - def _get_target_columns_metadata(cls, outputs_metadata: metadata_base.DataMetadata, hyperparams) -> List[OrderedDict]: - outputs_length = outputs_metadata.query((metadata_base.ALL_ELEMENTS,))['dimension']['length'] - - target_columns_metadata: List[OrderedDict] = [] - for column_index in range(outputs_length): - column_metadata = OrderedDict(outputs_metadata.query_column(column_index)) - - # Update semantic types and prepare it for predicted targets. - semantic_types = set(column_metadata.get('semantic_types', [])) - semantic_types_to_remove = set([]) - add_semantic_types = [] - add_semantic_types.add(hyperparams["return_semantic_type"]) - semantic_types = semantic_types - semantic_types_to_remove - semantic_types = semantic_types.union(add_semantic_types) - column_metadata['semantic_types'] = list(semantic_types) - - target_columns_metadata.append(column_metadata) - - return target_columns_metadata - - @classmethod - def _update_predictions_metadata(cls, inputs_metadata: metadata_base.DataMetadata, outputs: Optional[Outputs], - target_columns_metadata: List[OrderedDict]) -> metadata_base.DataMetadata: - outputs_metadata = metadata_base.DataMetadata().generate(value=outputs) - - for column_index, column_metadata in enumerate(target_columns_metadata): - column_metadata.pop("structural_type", None) - outputs_metadata = outputs_metadata.update_column(column_index, column_metadata) - - return outputs_metadata - - def _wrap_predictions(self, inputs: Inputs, predictions: ndarray) -> Outputs: - outputs = d3m_dataframe(predictions, generate_metadata=True) - target_columns_metadata = self._copy_inputs_metadata(inputs.metadata, self._training_indices, outputs.metadata, self.hyperparams) - outputs.metadata = self._update_predictions_metadata(inputs.metadata, outputs, target_columns_metadata) - return outputs - - - @classmethod - def _copy_inputs_metadata(cls, inputs_metadata: metadata_base.DataMetadata, input_indices: List[int], - outputs_metadata: metadata_base.DataMetadata, hyperparams): - outputs_length = outputs_metadata.query((metadata_base.ALL_ELEMENTS,))['dimension']['length'] - target_columns_metadata: List[OrderedDict] = [] - for column_index in input_indices: - column_name = inputs_metadata.query((metadata_base.ALL_ELEMENTS, column_index)).get("name") - if column_name is None: - column_name = "output_{}".format(column_index) - - column_metadata = OrderedDict(inputs_metadata.query_column(column_index)) - semantic_types = set(column_metadata.get('semantic_types', [])) - semantic_types_to_remove = set([]) - add_semantic_types = set() - add_semantic_types.add(hyperparams["return_semantic_type"]) - semantic_types = semantic_types - semantic_types_to_remove - semantic_types = semantic_types.union(add_semantic_types) - column_metadata['semantic_types'] = list(semantic_types) - - column_metadata["name"] = str(column_name) - target_columns_metadata.append(column_metadata) - - # If outputs has more columns than index, add Attribute Type to all remaining - if outputs_length > len(input_indices): - for column_index in range(len(input_indices), outputs_length): - column_metadata = OrderedDict() - semantic_types = set() - semantic_types.add(hyperparams["return_semantic_type"]) - column_name = "output_{}".format(column_index) - column_metadata["semantic_types"] = list(semantic_types) - column_metadata["name"] = str(column_name) - target_columns_metadata.append(column_metadata) - - return target_columns_metadata - - -SKStandardScaler.__doc__ = StandardScaler.__doc__ \ No newline at end of file diff --git a/common-primitives/sklearn-wrap/sklearn_wrap/SKStringImputer.py b/common-primitives/sklearn-wrap/sklearn_wrap/SKStringImputer.py deleted file mode 100644 index 6e0c125..0000000 --- a/common-primitives/sklearn-wrap/sklearn_wrap/SKStringImputer.py +++ /dev/null @@ -1,371 +0,0 @@ -from typing import Any, Callable, List, Dict, Union, Optional, Sequence, Tuple -from numpy import ndarray -from collections import OrderedDict -from scipy import sparse -import os -import sklearn -import numpy -import typing - -# Custom import commands if any -from sklearn.impute import SimpleImputer -from sklearn.impute._base import _get_mask - - -from d3m.container.numpy import ndarray as d3m_ndarray -from d3m.container import DataFrame as d3m_dataframe -from d3m.metadata import hyperparams, params, base as metadata_base -from d3m import utils -from d3m.base import utils as base_utils -from d3m.exceptions import PrimitiveNotFittedError -from d3m.primitive_interfaces.base import CallResult, DockerContainer -from d3m.primitive_interfaces.unsupervised_learning import UnsupervisedLearnerPrimitiveBase - - -Inputs = d3m_dataframe -Outputs = d3m_dataframe - - -class Params(params.Params): - statistics_: Optional[ndarray] - indicator_: Optional[sklearn.base.BaseEstimator] - input_column_names: Optional[Any] - target_names_: Optional[Sequence[Any]] - training_indices_: Optional[Sequence[int]] - target_column_indices_: Optional[Sequence[int]] - target_columns_metadata_: Optional[List[OrderedDict]] - - - -class Hyperparams(hyperparams.Hyperparams): - missing_values = hyperparams.Hyperparameter[str]( - default='', - description='The placeholder for the missing values. All occurrences of `missing_values` will be imputed.', - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'] - ) - add_indicator = hyperparams.UniformBool( - default=False, - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] - ) - strategy = hyperparams.Enumeration[str]( - default='most_frequent', - values=['most_frequent', 'constant'], - description='The imputation strategy. - If "mean", then replace missing values using the mean along each column. Can only be used with numeric data. - If "median", then replace missing values using the median along each column. Can only be used with numeric data. - If "most_frequent", then replace missing using the most frequent value along each column. Can be used with strings or numeric data. - If "constant", then replace missing values with fill_value. Can be used with strings or numeric data. .. versionadded:: 0.20 strategy="constant" for fixed value imputation.', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] - ) - fill_value = hyperparams.Hyperparameter[str]( - default='', - description='When strategy == "constant", fill_value is used to replace all occurrences of missing_values. If left to the default, fill_value will be 0 when imputing numerical data and "missing_value" for strings or object data types.', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] - ) - - use_columns = hyperparams.Set( - elements=hyperparams.Hyperparameter[int](-1), - default=(), - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="A set of column indices to force primitive to operate on. If any specified column cannot be parsed, it is skipped.", - ) - exclude_columns = hyperparams.Set( - elements=hyperparams.Hyperparameter[int](-1), - default=(), - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="A set of column indices to not operate on. Applicable only if \"use_columns\" is not provided.", - ) - return_result = hyperparams.Enumeration( - values=['append', 'replace', 'new'], - default='new', - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="Should parsed columns be appended, should they replace original columns, or should only parsed columns be returned? This hyperparam is ignored if use_semantic_types is set to false.", - ) - use_semantic_types = hyperparams.UniformBool( - default=False, - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="Controls whether semantic_types metadata will be used for filtering columns in input dataframe. Setting this to false makes the code ignore return_result and will produce only the output dataframe" - ) - add_index_columns = hyperparams.UniformBool( - default=False, - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="Also include primary index columns if input data has them. Applicable only if \"return_result\" is set to \"new\".", - ) - error_on_no_input = hyperparams.UniformBool( - default=True, - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="Throw an exception if no input column is selected/provided. Defaults to true to behave like sklearn. To prevent pipelines from breaking set this to False.", - ) - - return_semantic_type = hyperparams.Enumeration[str]( - values=['https://metadata.datadrivendiscovery.org/types/Attribute', 'https://metadata.datadrivendiscovery.org/types/ConstructedAttribute'], - default='https://metadata.datadrivendiscovery.org/types/Attribute', - description='Decides what semantic type to attach to generated attributes', - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'] - ) - -class SKStringImputer(UnsupervisedLearnerPrimitiveBase[Inputs, Outputs, Params, Hyperparams]): - """ - Primitive wrapping for sklearn SimpleImputer - `sklearn documentation `_ - - """ - - __author__ = "JPL MARVIN" - metadata = metadata_base.PrimitiveMetadata({ - "algorithm_types": [metadata_base.PrimitiveAlgorithmType.IMPUTATION, ], - "name": "sklearn.impute.SimpleImputer", - "primitive_family": metadata_base.PrimitiveFamily.DATA_CLEANING, - "python_path": "d3m.primitives.data_cleaning.string_imputer.SKlearn", - "source": {'name': 'JPL', 'contact': 'mailto:shah@jpl.nasa.gov', 'uris': ['https://gitlab.com/datadrivendiscovery/sklearn-wrap/issues', 'https://scikit-learn.org/stable/modules/generated/sklearn.impute.SimpleImputer.html']}, - "version": "2019.11.13", - "id": "caeed986-cd1b-303b-900f-868dfc665341", - "hyperparams_to_tune": ['strategy'], - 'installation': [ - {'type': metadata_base.PrimitiveInstallationType.PIP, - 'package_uri': 'git+https://gitlab.com/datadrivendiscovery/sklearn-wrap.git@{git_commit}#egg=sklearn_wrap'.format( - git_commit=utils.current_git_commit(os.path.dirname(__file__)), - ), - }] - }) - - def __init__(self, *, - hyperparams: Hyperparams, - random_seed: int = 0, - docker_containers: Dict[str, DockerContainer] = None, - _verbose: int = 0) -> None: - - super().__init__(hyperparams=hyperparams, random_seed=random_seed, docker_containers=docker_containers) - - # False - self._clf = SimpleImputer( - missing_values=self.hyperparams['missing_values'], - add_indicator=self.hyperparams['add_indicator'], - strategy=self.hyperparams['strategy'], - fill_value=self.hyperparams['fill_value'], - verbose=_verbose - ) - - self._inputs = None - self._outputs = None - self._training_inputs = None - self._training_outputs = None - self._target_names = None - self._training_indices = None - self._target_column_indices = None - self._target_columns_metadata: List[OrderedDict] = None - self._input_column_names = None - self._fitted = False - - - def set_training_data(self, *, inputs: Inputs) -> None: - self._inputs = inputs - self._fitted = False - - def fit(self, *, timeout: float = None, iterations: int = None) -> CallResult[None]: - if self._fitted: - return CallResult(None) - - self._training_inputs, self._training_indices, _ = self._get_columns_to_fit(self._inputs, self.hyperparams) - self._input_column_names = self._training_inputs.columns - - if self._training_inputs is None: - return CallResult(None) - - if len(self._training_indices) > 0: - self._clf.fit(self._training_inputs) - self._fitted = True - else: - if self.hyperparams['error_on_no_input']: - raise RuntimeError("No input columns were selected") - self.logger.warn("No input columns were selected") - return CallResult(None) - - def produce(self, *, inputs: Inputs, timeout: float = None, iterations: int = None) -> CallResult[Outputs]: - sk_inputs, columns_to_use, _ = self._get_columns_to_fit(inputs, self.hyperparams) - output = [] - if len(sk_inputs.columns): - try: - sk_output = self._clf.transform(sk_inputs) - except sklearn.exceptions.NotFittedError as error: - raise PrimitiveNotFittedError("Primitive not fitted.") from error - if sparse.issparse(sk_output): - sk_output = sk_output.toarray() - target_columns_metadata = self._copy_columns_metadata(inputs.metadata, self._training_indices, self.hyperparams) - output = self._wrap_predictions(inputs, sk_output, target_columns_metadata) - - output.columns = [inputs.columns[idx] for idx in range(len(inputs.columns)) if idx in self._training_indices] - output = [output] - else: - if self.hyperparams['error_on_no_input']: - raise RuntimeError("No input columns were selected") - self.logger.warn("No input columns were selected") - _, _, dropped_cols = self._get_columns_to_fit(inputs, self.hyperparams) - outputs = base_utils.combine_columns(return_result=self.hyperparams['return_result'], - add_index_columns=self.hyperparams['add_index_columns'], - inputs=inputs, column_indices=self._training_indices + dropped_cols, - columns_list=output) - return CallResult(outputs) - - - def get_params(self) -> Params: - if not self._fitted: - return Params( - statistics_=None, - indicator_=None, - input_column_names=self._input_column_names, - training_indices_=self._training_indices, - target_names_=self._target_names, - target_column_indices_=self._target_column_indices, - target_columns_metadata_=self._target_columns_metadata - ) - - return Params( - statistics_=getattr(self._clf, 'statistics_', None), - indicator_=getattr(self._clf, 'indicator_', None), - input_column_names=self._input_column_names, - training_indices_=self._training_indices, - target_names_=self._target_names, - target_column_indices_=self._target_column_indices, - target_columns_metadata_=self._target_columns_metadata - ) - - def set_params(self, *, params: Params) -> None: - self._clf.statistics_ = params['statistics_'] - self._clf.indicator_ = params['indicator_'] - self._input_column_names = params['input_column_names'] - self._training_indices = params['training_indices_'] - self._target_names = params['target_names_'] - self._target_column_indices = params['target_column_indices_'] - self._target_columns_metadata = params['target_columns_metadata_'] - - if params['statistics_'] is not None: - self._fitted = True - if params['indicator_'] is not None: - self._fitted = True - - - - - - @classmethod - def _get_columns_to_fit(cls, inputs: Inputs, hyperparams: Hyperparams): - - if not hyperparams['use_semantic_types']: - columns_to_produce = list(range(len(inputs.columns))) - - else: - inputs_metadata = inputs.metadata - - def can_produce_column(column_index: int) -> bool: - return cls._can_produce_column(inputs_metadata, column_index, hyperparams) - - columns_to_produce, columns_not_to_produce = base_utils.get_columns_to_use(inputs_metadata, - use_columns=hyperparams['use_columns'], - exclude_columns=hyperparams['exclude_columns'], - can_use_column=can_produce_column) - - columns_to_drop = cls._get_columns_to_drop(inputs, columns_to_produce, hyperparams) - for col in columns_to_drop: - columns_to_produce.remove(col) - - return inputs.iloc[:, columns_to_produce], columns_to_produce, columns_to_drop - - @classmethod - def _get_columns_to_drop(cls, inputs: Inputs, column_indices: List[int], hyperparams: Hyperparams): - """ - Check for columns that contain missing_values that need to be imputed - If strategy is constant and missin_values is nan, then all nan columns will not be dropped - :param inputs: - :param column_indices: - :return: - """ - columns_to_remove = [] - if hyperparams['strategy'] != "constant": - for _, col in enumerate(column_indices): - inp = inputs.iloc[:, [col]].values - mask = _get_mask(inp, hyperparams['missing_values']) - if mask.all(): - columns_to_remove.append(col) - return columns_to_remove - - @classmethod - def _can_produce_column(cls, inputs_metadata: metadata_base.DataMetadata, column_index: int, hyperparams: Hyperparams) -> bool: - column_metadata = inputs_metadata.query((metadata_base.ALL_ELEMENTS, column_index)) - - accepted_structural_types = (str,) - accepted_semantic_types = set() - accepted_semantic_types.add("https://metadata.datadrivendiscovery.org/types/Attribute") - if not issubclass(column_metadata['structural_type'], accepted_structural_types): - return False - - semantic_types = set(column_metadata.get('semantic_types', [])) - - if len(semantic_types) == 0: - cls.logger.warning("No semantic types found in column metadata") - return False - # Making sure all accepted_semantic_types are available in semantic_types - if len(accepted_semantic_types - semantic_types) == 0: - return True - - return False - - - @classmethod - def _get_target_columns_metadata(cls, outputs_metadata: metadata_base.DataMetadata, hyperparams) -> List[OrderedDict]: - outputs_length = outputs_metadata.query((metadata_base.ALL_ELEMENTS,))['dimension']['length'] - - target_columns_metadata: List[OrderedDict] = [] - for column_index in range(outputs_length): - column_metadata = OrderedDict(outputs_metadata.query_column(column_index)) - - # Update semantic types and prepare it for predicted targets. - semantic_types = set(column_metadata.get('semantic_types', [])) - semantic_types_to_remove = set([]) - add_semantic_types = [] - add_semantic_types.add(hyperparams["return_semantic_type"]) - semantic_types = semantic_types - semantic_types_to_remove - semantic_types = semantic_types.union(add_semantic_types) - column_metadata['semantic_types'] = list(semantic_types) - - target_columns_metadata.append(column_metadata) - - return target_columns_metadata - - @classmethod - def _update_predictions_metadata(cls, inputs_metadata: metadata_base.DataMetadata, outputs: Optional[Outputs], - target_columns_metadata: List[OrderedDict]) -> metadata_base.DataMetadata: - outputs_metadata = metadata_base.DataMetadata().generate(value=outputs) - - for column_index, column_metadata in enumerate(target_columns_metadata): - column_metadata.pop("structural_type", None) - outputs_metadata = outputs_metadata.update_column(column_index, column_metadata) - - return outputs_metadata - - def _wrap_predictions(self, inputs: Inputs, predictions: ndarray, target_columns_metadata) -> Outputs: - outputs = d3m_dataframe(predictions, generate_metadata=False) - outputs.metadata = self._update_predictions_metadata(inputs.metadata, outputs, target_columns_metadata) - return outputs - - - - @classmethod - def _copy_columns_metadata(cls, inputs_metadata: metadata_base.DataMetadata, column_indices, hyperparams) -> List[OrderedDict]: - outputs_length = inputs_metadata.query((metadata_base.ALL_ELEMENTS,))['dimension']['length'] - - target_columns_metadata: List[OrderedDict] = [] - for column_index in column_indices: - column_name = inputs_metadata.query((metadata_base.ALL_ELEMENTS, column_index)).get("name") - column_metadata = OrderedDict(inputs_metadata.query_column(column_index)) - semantic_types = set(column_metadata.get('semantic_types', [])) - semantic_types_to_remove = set([]) - add_semantic_types = set() - add_semantic_types.add(hyperparams["return_semantic_type"]) - semantic_types = semantic_types - semantic_types_to_remove - semantic_types = semantic_types.union(add_semantic_types) - column_metadata['semantic_types'] = list(semantic_types) - - column_metadata["name"] = str(column_name) - target_columns_metadata.append(column_metadata) - - return target_columns_metadata - - -SKStringImputer.__doc__ = SimpleImputer.__doc__ \ No newline at end of file diff --git a/common-primitives/sklearn-wrap/sklearn_wrap/SKTfidfVectorizer.py b/common-primitives/sklearn-wrap/sklearn_wrap/SKTfidfVectorizer.py deleted file mode 100644 index 99cd7da..0000000 --- a/common-primitives/sklearn-wrap/sklearn_wrap/SKTfidfVectorizer.py +++ /dev/null @@ -1,530 +0,0 @@ -from typing import Any, Callable, List, Dict, Union, Optional, Sequence, Tuple -from numpy import ndarray -from collections import OrderedDict -from scipy import sparse -import os -import sklearn -import numpy -import typing - -# Custom import commands if any -from sklearn.feature_extraction.text import TfidfVectorizer - - -from d3m.container.numpy import ndarray as d3m_ndarray -from d3m.container import DataFrame as d3m_dataframe -from d3m.metadata import hyperparams, params, base as metadata_base -from d3m import utils -from d3m.base import utils as base_utils -from d3m.exceptions import PrimitiveNotFittedError -from d3m.primitive_interfaces.base import CallResult, DockerContainer -from d3m.primitive_interfaces.unsupervised_learning import UnsupervisedLearnerPrimitiveBase -from d3m.metadata.base import ALL_ELEMENTS - - -Inputs = d3m_dataframe -Outputs = d3m_dataframe - - -class Params(params.Params): - vocabulary_: Optional[Sequence[dict]] - stop_words_: Optional[Sequence[set]] - _tfidf: Optional[Sequence[object]] - fixed_vocabulary_: Optional[Sequence[bool]] - _stop_words_id: Optional[Sequence[int]] - target_names_: Optional[Sequence[Any]] - training_indices_: Optional[Sequence[int]] - - -class Hyperparams(hyperparams.Hyperparams): - strip_accents = hyperparams.Union( - configuration=OrderedDict({ - 'accents': hyperparams.Enumeration[str]( - default='ascii', - values=['ascii', 'unicode'], - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'], - ), - 'none': hyperparams.Constant( - default=None, - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'], - ) - }), - default='none', - description='Remove accents during the preprocessing step. \'ascii\' is a fast method that only works on characters that have an direct ASCII mapping. \'unicode\' is a slightly slower method that works on any characters. None (default) does nothing.', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] - ) - analyzer = hyperparams.Enumeration[str]( - default='word', - values=['word', 'char', 'char_wb'], - description='Whether the feature should be made of word or character n-grams. If a callable is passed it is used to extract the sequence of features out of the raw, unprocessed input.', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] - ) - ngram_range = hyperparams.SortedList( - elements=hyperparams.Bounded[int](1, None, 1), - default=(1, 1), - min_size=2, - max_size=2, - description='The lower and upper boundary of the range of n-values for different n-grams to be extracted. All values of n such that min_n <= n <= max_n will be used.', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] - ) - stop_words = hyperparams.Union( - configuration=OrderedDict({ - 'string': hyperparams.Hyperparameter[str]( - default='english', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'], - ), - 'list': hyperparams.List( - elements=hyperparams.Hyperparameter[str](''), - default=[], - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'], - ), - 'none': hyperparams.Constant( - default=None, - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'], - ) - }), - default='none', - description='If a string, it is passed to _check_stop_list and the appropriate stop list is returned. \'english\' is currently the only supported string value. If a list, that list is assumed to contain stop words, all of which will be removed from the resulting tokens. Only applies if ``analyzer == \'word\'``. If None, no stop words will be used. max_df can be set to a value in the range [0.7, 1.0) to automatically detect and filter stop words based on intra corpus document frequency of terms.', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] - ) - lowercase = hyperparams.UniformBool( - default=True, - description='Convert all characters to lowercase before tokenizing.', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] - ) - token_pattern = hyperparams.Hyperparameter[str]( - default='(?u)\\b\w\w+\\b', - description='Regular expression denoting what constitutes a "token", only used if ``analyzer == \'word\'``. The default regexp selects tokens of 2 or more alphanumeric characters (punctuation is completely ignored and always treated as a token separator).', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] - ) - max_df = hyperparams.Union( - configuration=OrderedDict({ - 'proportion': hyperparams.Bounded[float]( - default=1.0, - lower=0.0, - upper=1.0, - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'], - ), - 'absolute': hyperparams.Bounded[int]( - default=1, - lower=0, - upper=None, - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'], - ) - }), - default='proportion', - description='When building the vocabulary ignore terms that have a document frequency strictly higher than the given threshold (corpus-specific stop words). If float, the parameter represents a proportion of documents, integer absolute counts. This parameter is ignored if vocabulary is not None.', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] - ) - min_df = hyperparams.Union( - configuration=OrderedDict({ - 'proportion': hyperparams.Bounded[float]( - default=1.0, - lower=0.0, - upper=1.0, - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'], - ), - 'absolute': hyperparams.Bounded[int]( - default=1, - lower=0, - upper=None, - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'], - ) - }), - default='absolute', - description='When building the vocabulary ignore terms that have a document frequency strictly lower than the given threshold. This value is also called cut-off in the literature. If float, the parameter represents a proportion of documents, integer absolute counts. This parameter is ignored if vocabulary is not None.', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] - ) - max_features = hyperparams.Union( - configuration=OrderedDict({ - 'absolute': hyperparams.Bounded[int]( - default=1, - lower=0, - upper=None, - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'], - ), - 'none': hyperparams.Constant( - default=None, - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'], - ) - }), - default='none', - description='If not None, build a vocabulary that only consider the top max_features ordered by term frequency across the corpus. This parameter is ignored if vocabulary is not None.', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] - ) - binary = hyperparams.UniformBool( - default=False, - description='If True, all non-zero term counts are set to 1. This does not mean outputs will have only 0/1 values, only that the tf term in tf-idf is binary. (Set idf and normalization to False to get 0/1 outputs.)', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] - ) - norm = hyperparams.Union( - configuration=OrderedDict({ - 'str': hyperparams.Enumeration[str]( - default='l2', - values=['l1', 'l2'], - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'], - ), - 'none': hyperparams.Constant( - default=None, - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'], - ) - }), - default='none', - description='Norm used to normalize term vectors. None for no normalization.', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] - ) - use_idf = hyperparams.UniformBool( - default=True, - description='Enable inverse-document-frequency reweighting.', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] - ) - smooth_idf = hyperparams.UniformBool( - default=True, - description='Smooth idf weights by adding one to document frequencies, as if an extra document was seen containing every term in the collection exactly once. Prevents zero divisions.', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] - ) - sublinear_tf = hyperparams.UniformBool( - default=False, - description='Apply sublinear tf scaling, i.e. replace tf with 1 + log(tf).', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] - ) - - use_columns = hyperparams.Set( - elements=hyperparams.Hyperparameter[int](-1), - default=(), - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="A set of column indices to force primitive to operate on. If any specified column cannot be parsed, it is skipped.", - ) - exclude_columns = hyperparams.Set( - elements=hyperparams.Hyperparameter[int](-1), - default=(), - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="A set of column indices to not operate on. Applicable only if \"use_columns\" is not provided.", - ) - return_result = hyperparams.Enumeration( - values=['append', 'replace', 'new'], - default='new', - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="Should parsed columns be appended, should they replace original columns, or should only parsed columns be returned? This hyperparam is ignored if use_semantic_types is set to false.", - ) - use_semantic_types = hyperparams.UniformBool( - default=False, - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="Controls whether semantic_types metadata will be used for filtering columns in input dataframe. Setting this to false makes the code ignore return_result and will produce only the output dataframe" - ) - add_index_columns = hyperparams.UniformBool( - default=False, - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="Also include primary index columns if input data has them. Applicable only if \"return_result\" is set to \"new\".", - ) - error_on_no_input = hyperparams.UniformBool( - default=True, - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="Throw an exception if no input column is selected/provided. Defaults to true to behave like sklearn. To prevent pipelines from breaking set this to False.", - ) - - - -class SKTfidfVectorizer(UnsupervisedLearnerPrimitiveBase[Inputs, Outputs, Params, Hyperparams]): - """ - Primitive wrapping for sklearn TfidfVectorizer - `sklearn documentation `_ - - """ - - __author__ = "JPL MARVIN" - metadata = metadata_base.PrimitiveMetadata({ - "algorithm_types": [metadata_base.PrimitiveAlgorithmType.MINIMUM_REDUNDANCY_FEATURE_SELECTION, ], - "name": "sklearn.feature_extraction.text.TfidfVectorizer", - "primitive_family": metadata_base.PrimitiveFamily.DATA_PREPROCESSING, - "python_path": "d3m.primitives.data_preprocessing.tfidf_vectorizer.SKlearn", - "source": {'name': 'JPL', 'contact': 'mailto:shah@jpl.nasa.gov', 'uris': ['https://gitlab.com/datadrivendiscovery/sklearn-wrap/issues', 'https://scikit-learn.org/stable/modules/generated/sklearn.feature_extraction.TfidfVectorizer.html']}, - "version": "2019.11.13", - "id": "1f7ce2c7-1ec8-3483-9a65-eedd4b5811d6", - "hyperparams_to_tune": ['max_df', 'min_df'], - 'installation': [ - {'type': metadata_base.PrimitiveInstallationType.PIP, - 'package_uri': 'git+https://gitlab.com/datadrivendiscovery/sklearn-wrap.git@{git_commit}#egg=sklearn_wrap'.format( - git_commit=utils.current_git_commit(os.path.dirname(__file__)), - ), - }] - }) - - def __init__(self, *, - hyperparams: Hyperparams, - random_seed: int = 0, - docker_containers: Dict[str, DockerContainer] = None) -> None: - - super().__init__(hyperparams=hyperparams, random_seed=random_seed, docker_containers=docker_containers) - - # True - - self._clf = list() - - self._training_inputs = None - self._target_names = None - self._training_indices = None - self._fitted = False - - - def set_training_data(self, *, inputs: Inputs) -> None: - self._inputs = inputs - self._fitted = False - - def fit(self, *, timeout: float = None, iterations: int = None) -> CallResult[None]: - if self._fitted: - return CallResult(None) - - self._training_inputs, self._training_indices = self._get_columns_to_fit(self._inputs, self.hyperparams) - - if self._training_inputs is None: - raise ValueError("Missing training data.") - - if len(self._training_indices) > 0: - for column_index in range(len(self._training_inputs.columns)): - clf = self._create_new_sklearn_estimator() - clf.fit(self._training_inputs.iloc[:, column_index]) - self._clf.append(clf) - - self._fitted = True - else: - if self.hyperparams['error_on_no_input']: - raise RuntimeError("No input columns were selected") - self.logger.warn("No input columns were selected") - - return CallResult(None) - - def produce(self, *, inputs: Inputs, timeout: float = None, iterations: int = None) -> CallResult[Outputs]: - if not self._fitted: - raise PrimitiveNotFittedError("Primitive not fitted.") - sk_inputs = inputs - if self.hyperparams['use_semantic_types']: - sk_inputs, training_indices = self._get_columns_to_fit(inputs, self.hyperparams) - else: - training_indices = list(range(len(inputs))) - - # Iterating over all estimators and call transform on them. - # No. of estimators should be equal to the number of columns in the input - if len(self._clf) != len(sk_inputs.columns): - raise RuntimeError("Input data does not have the same number of columns as training data") - outputs = [] - if len(self._training_indices) > 0: - for column_index in range(len(sk_inputs.columns)): - clf = self._clf[column_index] - output = clf.transform(sk_inputs.iloc[:, column_index]) - column_name = sk_inputs.columns[column_index] - - if sparse.issparse(output): - output = output.toarray() - output = self._wrap_predictions(inputs, output) - - # Updating column names. - output.columns = map(lambda x: "{}_{}".format(column_name, x), clf.get_feature_names()) - for i, name in enumerate(clf.get_feature_names()): - output.metadata = output.metadata.update((ALL_ELEMENTS, i), {'name': name}) - - outputs.append(output) - else: - if self.hyperparams['error_on_no_input']: - raise RuntimeError("No input columns were selected") - self.logger.warn("No input columns were selected") - - outputs = base_utils.combine_columns(return_result=self.hyperparams['return_result'], - add_index_columns=self.hyperparams['add_index_columns'], - inputs=inputs, column_indices=self._training_indices, - columns_list=outputs) - - return CallResult(outputs) - - - def get_params(self) -> Params: - if not self._fitted: - return Params( - vocabulary_=None, - stop_words_=None, - _tfidf=None, - fixed_vocabulary_=None, - _stop_words_id=None, - training_indices_=self._training_indices, - target_names_=self._target_names - ) - - return Params( - vocabulary_=list(map(lambda clf: getattr(clf, 'vocabulary_', None), self._clf)), - stop_words_=list(map(lambda clf: getattr(clf, 'stop_words_', None), self._clf)), - _tfidf=list(map(lambda clf: getattr(clf, '_tfidf', None), self._clf)), - fixed_vocabulary_=list(map(lambda clf: getattr(clf, 'fixed_vocabulary_', None), self._clf)), - _stop_words_id=list(map(lambda clf: getattr(clf, '_stop_words_id', None), self._clf)), - training_indices_=self._training_indices, - target_names_=self._target_names - ) - - def set_params(self, *, params: Params) -> None: - for param, val in params.items(): - if val is not None and param not in ['target_names_', 'training_indices_']: - self._clf = list(map(lambda x: self._create_new_sklearn_estimator(), val)) - break - for index in range(len(self._clf)): - for param, val in params.items(): - if val is not None: - setattr(self._clf[index], param, val[index]) - else: - setattr(self._clf[index], param, None) - self._training_indices = params['training_indices_'] - self._target_names = params['target_names_'] - self._fitted = False - - if params['vocabulary_'] is not None: - self._fitted = True - if params['stop_words_'] is not None: - self._fitted = True - if params['_tfidf'] is not None: - self._fitted = True - if params['fixed_vocabulary_'] is not None: - self._fitted = True - if params['_stop_words_id'] is not None: - self._fitted = True - - def _create_new_sklearn_estimator(self): - clf = TfidfVectorizer( - strip_accents=self.hyperparams['strip_accents'], - analyzer=self.hyperparams['analyzer'], - ngram_range=self.hyperparams['ngram_range'], - stop_words=self.hyperparams['stop_words'], - lowercase=self.hyperparams['lowercase'], - token_pattern=self.hyperparams['token_pattern'], - max_df=self.hyperparams['max_df'], - min_df=self.hyperparams['min_df'], - max_features=self.hyperparams['max_features'], - binary=self.hyperparams['binary'], - norm=self.hyperparams['norm'], - use_idf=self.hyperparams['use_idf'], - smooth_idf=self.hyperparams['smooth_idf'], - sublinear_tf=self.hyperparams['sublinear_tf'], - ) - return clf - - - - - - - @classmethod - def _get_columns_to_fit(cls, inputs: Inputs, hyperparams: Hyperparams): - if not hyperparams['use_semantic_types']: - return inputs, list(range(len(inputs.columns))) - - inputs_metadata = inputs.metadata - - def can_produce_column(column_index: int) -> bool: - return cls._can_produce_column(inputs_metadata, column_index, hyperparams) - - columns_to_produce, columns_not_to_produce = base_utils.get_columns_to_use(inputs_metadata, - use_columns=hyperparams['use_columns'], - exclude_columns=hyperparams['exclude_columns'], - can_use_column=can_produce_column) - return inputs.iloc[:, columns_to_produce], columns_to_produce - # return columns_to_produce - - @classmethod - def _can_produce_column(cls, inputs_metadata: metadata_base.DataMetadata, column_index: int, hyperparams: Hyperparams) -> bool: - column_metadata = inputs_metadata.query((metadata_base.ALL_ELEMENTS, column_index)) - - accepted_structural_types = (str,) - accepted_semantic_types = set(["http://schema.org/Text",]) - accepted_semantic_types.add("https://metadata.datadrivendiscovery.org/types/Attribute") - if not issubclass(column_metadata['structural_type'], accepted_structural_types): - return False - - semantic_types = set(column_metadata.get('semantic_types', [])) - - if len(semantic_types) == 0: - cls.logger.warning("No semantic types found in column metadata") - return False - - # Making sure all accepted_semantic_types are available in semantic_types - if len(accepted_semantic_types - semantic_types) == 0: - return True - - return False - - @classmethod - def _get_targets(cls, data: d3m_dataframe, hyperparams: Hyperparams): - if not hyperparams['use_semantic_types']: - return data, list(data.columns), [] - target_names = [] - target_semantic_type = [] - target_column_indices = [] - metadata = data.metadata - target_column_indices.extend(metadata.get_columns_with_semantic_type('https://metadata.datadrivendiscovery.org/types/TrueTarget')) - - for column_index in target_column_indices: - if column_index is metadata_base.ALL_ELEMENTS: - continue - column_index = typing.cast(metadata_base.SimpleSelectorSegment, column_index) - column_metadata = metadata.query((metadata_base.ALL_ELEMENTS, column_index)) - target_names.append(column_metadata.get('name', str(column_index))) - target_semantic_type.append(column_metadata.get('semantic_types', [])) - - targets = data.iloc[:, target_column_indices] - return targets, target_names, target_semantic_type - - @classmethod - def _get_target_columns_metadata(cls, outputs_metadata: metadata_base.DataMetadata, hyperparams) -> List[OrderedDict]: - outputs_length = outputs_metadata.query((metadata_base.ALL_ELEMENTS,))['dimension']['length'] - - target_columns_metadata: List[OrderedDict] = [] - for column_index in range(outputs_length): - column_metadata = OrderedDict(outputs_metadata.query_column(column_index)) - - # Update semantic types and prepare it for predicted targets. - semantic_types = set(column_metadata.get('semantic_types', [])) - semantic_types_to_remove = set([]) - add_semantic_types = [] - add_semantic_types.add(hyperparams["return_semantic_type"]) - semantic_types = semantic_types - semantic_types_to_remove - semantic_types = semantic_types.union(add_semantic_types) - column_metadata['semantic_types'] = list(semantic_types) - - target_columns_metadata.append(column_metadata) - - return target_columns_metadata - - @classmethod - def _update_predictions_metadata(cls, inputs_metadata: metadata_base.DataMetadata, outputs: Optional[Outputs], - target_columns_metadata: List[OrderedDict]) -> metadata_base.DataMetadata: - outputs_metadata = metadata_base.DataMetadata().generate(value=outputs) - - for column_index, column_metadata in enumerate(target_columns_metadata): - column_metadata.pop("structural_type", None) - outputs_metadata = outputs_metadata.update_column(column_index, column_metadata) - - return outputs_metadata - - def _wrap_predictions(self, inputs: Inputs, predictions: ndarray) -> Outputs: - outputs = d3m_dataframe(predictions, generate_metadata=True) - target_columns_metadata = self._add_target_columns_metadata(outputs.metadata) - outputs.metadata = self._update_predictions_metadata(inputs.metadata, outputs, target_columns_metadata) - return outputs - - - @classmethod - def _add_target_columns_metadata(cls, outputs_metadata: metadata_base.DataMetadata): - outputs_length = outputs_metadata.query((metadata_base.ALL_ELEMENTS,))['dimension']['length'] - - target_columns_metadata: List[OrderedDict] = [] - for column_index in range(outputs_length): - column_metadata = OrderedDict() - semantic_types = [] - semantic_types.append('https://metadata.datadrivendiscovery.org/types/Attribute') - column_name = outputs_metadata.query((metadata_base.ALL_ELEMENTS, column_index)).get("name") - if column_name is None: - column_name = "output_{}".format(column_index) - column_metadata["semantic_types"] = semantic_types - column_metadata["name"] = str(column_name) - target_columns_metadata.append(column_metadata) - - return target_columns_metadata - - -SKTfidfVectorizer.__doc__ = TfidfVectorizer.__doc__ \ No newline at end of file diff --git a/common-primitives/sklearn-wrap/sklearn_wrap/SKTruncatedSVD.py b/common-primitives/sklearn-wrap/sklearn_wrap/SKTruncatedSVD.py deleted file mode 100644 index 2591180..0000000 --- a/common-primitives/sklearn-wrap/sklearn_wrap/SKTruncatedSVD.py +++ /dev/null @@ -1,369 +0,0 @@ -from typing import Any, Callable, List, Dict, Union, Optional, Sequence, Tuple -from numpy import ndarray -from collections import OrderedDict -from scipy import sparse -import os -import sklearn -import numpy -import typing - -# Custom import commands if any -from sklearn.decomposition.truncated_svd import TruncatedSVD - - -from d3m.container.numpy import ndarray as d3m_ndarray -from d3m.container import DataFrame as d3m_dataframe -from d3m.metadata import hyperparams, params, base as metadata_base -from d3m import utils -from d3m.base import utils as base_utils -from d3m.exceptions import PrimitiveNotFittedError -from d3m.primitive_interfaces.base import CallResult, DockerContainer -from d3m.primitive_interfaces.unsupervised_learning import UnsupervisedLearnerPrimitiveBase - - -Inputs = d3m_dataframe -Outputs = d3m_dataframe - - -class Params(params.Params): - components_: Optional[ndarray] - explained_variance_ratio_: Optional[ndarray] - explained_variance_: Optional[ndarray] - singular_values_: Optional[ndarray] - input_column_names: Optional[Any] - target_names_: Optional[Sequence[Any]] - training_indices_: Optional[Sequence[int]] - target_column_indices_: Optional[Sequence[int]] - target_columns_metadata_: Optional[List[OrderedDict]] - - - -class Hyperparams(hyperparams.Hyperparams): - n_components = hyperparams.Bounded[int]( - default=2, - lower=0, - upper=None, - description='Desired dimensionality of output data. Must be strictly less than the number of features. The default value is useful for visualisation. For LSA, a value of 100 is recommended.', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] - ) - algorithm = hyperparams.Choice( - choices={ - 'randomized': hyperparams.Hyperparams.define( - configuration=OrderedDict({ - 'n_iter': hyperparams.Bounded[int]( - default=5, - lower=0, - upper=None, - description='Number of iterations for randomized SVD solver. Not used in arpack', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] - ) - }) - ), - 'arpack': hyperparams.Hyperparams.define( - configuration=OrderedDict({ - 'tol': hyperparams.Bounded[float]( - default=0, - lower=0, - upper=None, - description='Tolerance for ARPACK. 0 means machine precision. Ignored by randomized SVD solver.', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] - ) - }) - ) - }, - default='randomized', - description='SVD solver to use. Either "arpack" for the ARPACK wrapper in SciPy (scipy.sparse.linalg.svds), or "randomized" for the randomized algorithm due to Halko (2009).', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] - ) - - use_columns = hyperparams.Set( - elements=hyperparams.Hyperparameter[int](-1), - default=(), - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="A set of column indices to force primitive to operate on. If any specified column cannot be parsed, it is skipped.", - ) - exclude_columns = hyperparams.Set( - elements=hyperparams.Hyperparameter[int](-1), - default=(), - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="A set of column indices to not operate on. Applicable only if \"use_columns\" is not provided.", - ) - return_result = hyperparams.Enumeration( - values=['append', 'replace', 'new'], - default='new', - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="Should parsed columns be appended, should they replace original columns, or should only parsed columns be returned? This hyperparam is ignored if use_semantic_types is set to false.", - ) - use_semantic_types = hyperparams.UniformBool( - default=False, - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="Controls whether semantic_types metadata will be used for filtering columns in input dataframe. Setting this to false makes the code ignore return_result and will produce only the output dataframe" - ) - add_index_columns = hyperparams.UniformBool( - default=False, - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="Also include primary index columns if input data has them. Applicable only if \"return_result\" is set to \"new\".", - ) - error_on_no_input = hyperparams.UniformBool( - default=True, - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="Throw an exception if no input column is selected/provided. Defaults to true to behave like sklearn. To prevent pipelines from breaking set this to False.", - ) - - return_semantic_type = hyperparams.Enumeration[str]( - values=['https://metadata.datadrivendiscovery.org/types/Attribute', 'https://metadata.datadrivendiscovery.org/types/ConstructedAttribute'], - default='https://metadata.datadrivendiscovery.org/types/Attribute', - description='Decides what semantic type to attach to generated attributes', - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'] - ) - -class SKTruncatedSVD(UnsupervisedLearnerPrimitiveBase[Inputs, Outputs, Params, Hyperparams]): - """ - Primitive wrapping for sklearn TruncatedSVD - `sklearn documentation `_ - - """ - - __author__ = "JPL MARVIN" - metadata = metadata_base.PrimitiveMetadata({ - "algorithm_types": [metadata_base.PrimitiveAlgorithmType.SINGULAR_VALUE_DECOMPOSITION, ], - "name": "sklearn.decomposition.truncated_svd.TruncatedSVD", - "primitive_family": metadata_base.PrimitiveFamily.DATA_PREPROCESSING, - "python_path": "d3m.primitives.data_preprocessing.truncated_svd.SKlearn", - "source": {'name': 'JPL', 'contact': 'mailto:shah@jpl.nasa.gov', 'uris': ['https://gitlab.com/datadrivendiscovery/sklearn-wrap/issues', 'https://scikit-learn.org/stable/modules/generated/sklearn.decomposition.TruncatedSVD.html']}, - "version": "2019.11.13", - "id": "9231fde3-7322-3c41-b4cf-d00a93558c44", - "hyperparams_to_tune": ['n_components'], - 'installation': [ - {'type': metadata_base.PrimitiveInstallationType.PIP, - 'package_uri': 'git+https://gitlab.com/datadrivendiscovery/sklearn-wrap.git@{git_commit}#egg=sklearn_wrap'.format( - git_commit=utils.current_git_commit(os.path.dirname(__file__)), - ), - }] - }) - - def __init__(self, *, - hyperparams: Hyperparams, - random_seed: int = 0, - docker_containers: Dict[str, DockerContainer] = None) -> None: - - super().__init__(hyperparams=hyperparams, random_seed=random_seed, docker_containers=docker_containers) - - # False - self._clf = TruncatedSVD( - n_components=self.hyperparams['n_components'], - algorithm=self.hyperparams['algorithm']['choice'], - n_iter=self.hyperparams['algorithm'].get('n_iter', 5), - tol=self.hyperparams['algorithm'].get('tol', 0), - random_state=self.random_seed, - ) - - self._inputs = None - self._outputs = None - self._training_inputs = None - self._training_outputs = None - self._target_names = None - self._training_indices = None - self._target_column_indices = None - self._target_columns_metadata: List[OrderedDict] = None - self._input_column_names = None - self._fitted = False - - - def set_training_data(self, *, inputs: Inputs) -> None: - self._inputs = inputs - self._fitted = False - - def fit(self, *, timeout: float = None, iterations: int = None) -> CallResult[None]: - if self._fitted: - return CallResult(None) - - self._training_inputs, self._training_indices = self._get_columns_to_fit(self._inputs, self.hyperparams) - self._input_column_names = self._training_inputs.columns - - if self._training_inputs is None: - return CallResult(None) - - if len(self._training_indices) > 0: - self._clf.fit(self._training_inputs) - self._fitted = True - else: - if self.hyperparams['error_on_no_input']: - raise RuntimeError("No input columns were selected") - self.logger.warn("No input columns were selected") - return CallResult(None) - - def produce(self, *, inputs: Inputs, timeout: float = None, iterations: int = None) -> CallResult[Outputs]: - if not self._fitted: - raise PrimitiveNotFittedError("Primitive not fitted.") - sk_inputs = inputs - if self.hyperparams['use_semantic_types']: - sk_inputs = inputs.iloc[:, self._training_indices] - output_columns = [] - if len(self._training_indices) > 0: - sk_output = self._clf.transform(sk_inputs) - if sparse.issparse(sk_output): - sk_output = sk_output.toarray() - outputs = self._wrap_predictions(inputs, sk_output) - if len(outputs.columns) == len(self._input_column_names): - outputs.columns = self._input_column_names - output_columns = [outputs] - else: - if self.hyperparams['error_on_no_input']: - raise RuntimeError("No input columns were selected") - self.logger.warn("No input columns were selected") - outputs = base_utils.combine_columns(return_result=self.hyperparams['return_result'], - add_index_columns=self.hyperparams['add_index_columns'], - inputs=inputs, column_indices=self._training_indices, - columns_list=output_columns) - return CallResult(outputs) - - - def get_params(self) -> Params: - if not self._fitted: - return Params( - components_=None, - explained_variance_ratio_=None, - explained_variance_=None, - singular_values_=None, - input_column_names=self._input_column_names, - training_indices_=self._training_indices, - target_names_=self._target_names, - target_column_indices_=self._target_column_indices, - target_columns_metadata_=self._target_columns_metadata - ) - - return Params( - components_=getattr(self._clf, 'components_', None), - explained_variance_ratio_=getattr(self._clf, 'explained_variance_ratio_', None), - explained_variance_=getattr(self._clf, 'explained_variance_', None), - singular_values_=getattr(self._clf, 'singular_values_', None), - input_column_names=self._input_column_names, - training_indices_=self._training_indices, - target_names_=self._target_names, - target_column_indices_=self._target_column_indices, - target_columns_metadata_=self._target_columns_metadata - ) - - def set_params(self, *, params: Params) -> None: - self._clf.components_ = params['components_'] - self._clf.explained_variance_ratio_ = params['explained_variance_ratio_'] - self._clf.explained_variance_ = params['explained_variance_'] - self._clf.singular_values_ = params['singular_values_'] - self._input_column_names = params['input_column_names'] - self._training_indices = params['training_indices_'] - self._target_names = params['target_names_'] - self._target_column_indices = params['target_column_indices_'] - self._target_columns_metadata = params['target_columns_metadata_'] - - if params['components_'] is not None: - self._fitted = True - if params['explained_variance_ratio_'] is not None: - self._fitted = True - if params['explained_variance_'] is not None: - self._fitted = True - if params['singular_values_'] is not None: - self._fitted = True - - - - - - @classmethod - def _get_columns_to_fit(cls, inputs: Inputs, hyperparams: Hyperparams): - if not hyperparams['use_semantic_types']: - return inputs, list(range(len(inputs.columns))) - - inputs_metadata = inputs.metadata - - def can_produce_column(column_index: int) -> bool: - return cls._can_produce_column(inputs_metadata, column_index, hyperparams) - - columns_to_produce, columns_not_to_produce = base_utils.get_columns_to_use(inputs_metadata, - use_columns=hyperparams['use_columns'], - exclude_columns=hyperparams['exclude_columns'], - can_use_column=can_produce_column) - return inputs.iloc[:, columns_to_produce], columns_to_produce - # return columns_to_produce - - @classmethod - def _can_produce_column(cls, inputs_metadata: metadata_base.DataMetadata, column_index: int, hyperparams: Hyperparams) -> bool: - column_metadata = inputs_metadata.query((metadata_base.ALL_ELEMENTS, column_index)) - - accepted_structural_types = (int, float, numpy.integer, numpy.float64) - accepted_semantic_types = set() - accepted_semantic_types.add("https://metadata.datadrivendiscovery.org/types/Attribute") - if not issubclass(column_metadata['structural_type'], accepted_structural_types): - return False - - semantic_types = set(column_metadata.get('semantic_types', [])) - - if len(semantic_types) == 0: - cls.logger.warning("No semantic types found in column metadata") - return False - - # Making sure all accepted_semantic_types are available in semantic_types - if len(accepted_semantic_types - semantic_types) == 0: - return True - - return False - - - @classmethod - def _get_target_columns_metadata(cls, outputs_metadata: metadata_base.DataMetadata, hyperparams) -> List[OrderedDict]: - outputs_length = outputs_metadata.query((metadata_base.ALL_ELEMENTS,))['dimension']['length'] - - target_columns_metadata: List[OrderedDict] = [] - for column_index in range(outputs_length): - column_metadata = OrderedDict(outputs_metadata.query_column(column_index)) - - # Update semantic types and prepare it for predicted targets. - semantic_types = set(column_metadata.get('semantic_types', [])) - semantic_types_to_remove = set([]) - add_semantic_types = [] - add_semantic_types.add(hyperparams["return_semantic_type"]) - semantic_types = semantic_types - semantic_types_to_remove - semantic_types = semantic_types.union(add_semantic_types) - column_metadata['semantic_types'] = list(semantic_types) - - target_columns_metadata.append(column_metadata) - - return target_columns_metadata - - @classmethod - def _update_predictions_metadata(cls, inputs_metadata: metadata_base.DataMetadata, outputs: Optional[Outputs], - target_columns_metadata: List[OrderedDict]) -> metadata_base.DataMetadata: - outputs_metadata = metadata_base.DataMetadata().generate(value=outputs) - - for column_index, column_metadata in enumerate(target_columns_metadata): - column_metadata.pop("structural_type", None) - outputs_metadata = outputs_metadata.update_column(column_index, column_metadata) - - return outputs_metadata - - def _wrap_predictions(self, inputs: Inputs, predictions: ndarray) -> Outputs: - outputs = d3m_dataframe(predictions, generate_metadata=True) - target_columns_metadata = self._add_target_columns_metadata(outputs.metadata, self.hyperparams) - outputs.metadata = self._update_predictions_metadata(inputs.metadata, outputs, target_columns_metadata) - return outputs - - - @classmethod - def _add_target_columns_metadata(cls, outputs_metadata: metadata_base.DataMetadata, hyperparams): - - outputs_length = outputs_metadata.query((metadata_base.ALL_ELEMENTS,))['dimension']['length'] - target_columns_metadata: List[OrderedDict] = [] - for column_index in range(outputs_length): - column_name = "output_{}".format(column_index) - column_metadata = OrderedDict() - semantic_types = set() - semantic_types.add(hyperparams["return_semantic_type"]) - column_metadata['semantic_types'] = list(semantic_types) - - column_metadata["name"] = str(column_name) - target_columns_metadata.append(column_metadata) - - return target_columns_metadata - - -SKTruncatedSVD.__doc__ = TruncatedSVD.__doc__ \ No newline at end of file diff --git a/common-primitives/sklearn-wrap/sklearn_wrap/SKVarianceThreshold.py b/common-primitives/sklearn-wrap/sklearn_wrap/SKVarianceThreshold.py deleted file mode 100644 index d6f30ab..0000000 --- a/common-primitives/sklearn-wrap/sklearn_wrap/SKVarianceThreshold.py +++ /dev/null @@ -1,414 +0,0 @@ -from typing import Any, Callable, List, Dict, Union, Optional, Sequence, Tuple -from numpy import ndarray -from collections import OrderedDict -from scipy import sparse -import os -import sklearn -import numpy -import typing - -# Custom import commands if any -from sklearn.feature_selection.variance_threshold import VarianceThreshold - - -from d3m.container.numpy import ndarray as d3m_ndarray -from d3m.container import DataFrame as d3m_dataframe -from d3m.metadata import hyperparams, params, base as metadata_base -from d3m import utils -from d3m.base import utils as base_utils -from d3m.exceptions import PrimitiveNotFittedError -from d3m.primitive_interfaces.base import CallResult, DockerContainer - -from d3m.primitive_interfaces.supervised_learning import SupervisedLearnerPrimitiveBase -from d3m.primitive_interfaces.base import ProbabilisticCompositionalityMixin, ContinueFitMixin -from d3m import exceptions -import pandas - - - -Inputs = d3m_dataframe -Outputs = d3m_dataframe - - -class Params(params.Params): - variances_: Optional[ndarray] - input_column_names: Optional[Any] - target_names_: Optional[Sequence[Any]] - training_indices_: Optional[Sequence[int]] - target_column_indices_: Optional[Sequence[int]] - target_columns_metadata_: Optional[List[OrderedDict]] - - - -class Hyperparams(hyperparams.Hyperparams): - threshold = hyperparams.Bounded[float]( - default=0.0, - lower=0, - upper=None, - description='Features with a training-set variance lower than this threshold will be removed. The default is to keep all features with non-zero variance, i.e. remove the features that have the same value in all samples.', - semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] - ) - - use_inputs_columns = hyperparams.Set( - elements=hyperparams.Hyperparameter[int](-1), - default=(), - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="A set of column indices to force primitive to use as training input. If any specified column cannot be parsed, it is skipped.", - ) - use_outputs_columns = hyperparams.Set( - elements=hyperparams.Hyperparameter[int](-1), - default=(), - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="A set of column indices to force primitive to use as training target. If any specified column cannot be parsed, it is skipped.", - ) - exclude_inputs_columns = hyperparams.Set( - elements=hyperparams.Hyperparameter[int](-1), - default=(), - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="A set of column indices to not use as training inputs. Applicable only if \"use_columns\" is not provided.", - ) - exclude_outputs_columns = hyperparams.Set( - elements=hyperparams.Hyperparameter[int](-1), - default=(), - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="A set of column indices to not use as training target. Applicable only if \"use_columns\" is not provided.", - ) - return_result = hyperparams.Enumeration( - values=['update_semantic_types', 'replace', 'new'], - default='new', - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="Should parsed columns be appended, should they replace original columns, or should only parsed columns be returned? This hyperparam is ignored if use_semantic_types is set to false.", -) - use_semantic_types = hyperparams.UniformBool( - default=False, - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="Controls whether semantic_types metadata will be used for filtering columns in input dataframe. Setting this to false makes the code ignore return_result and will produce only the output dataframe" - ) - add_index_columns = hyperparams.UniformBool( - default=False, - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="Also include primary index columns if input data has them. Applicable only if \"return_result\" is set to \"new\".", - ) - error_on_no_input = hyperparams.UniformBool( - default=True, - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], - description="Throw an exception if no input column is selected/provided. Defaults to true to behave like sklearn. To prevent pipelines from breaking set this to False.", - ) - - return_semantic_type = hyperparams.Enumeration[str]( - values=['https://metadata.datadrivendiscovery.org/types/Attribute', 'https://metadata.datadrivendiscovery.org/types/ConstructedAttribute', 'https://metadata.datadrivendiscovery.org/types/PredictedTarget'], - default='https://metadata.datadrivendiscovery.org/types/PredictedTarget', - description='Decides what semantic type to attach to generated output', - semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'] - ) - -class SKVarianceThreshold(SupervisedLearnerPrimitiveBase[Inputs, Outputs, Params, Hyperparams]): - """ - Primitive wrapping for sklearn VarianceThreshold - `sklearn documentation `_ - - """ - - __author__ = "JPL MARVIN" - metadata = metadata_base.PrimitiveMetadata({ - "algorithm_types": [metadata_base.PrimitiveAlgorithmType.FEATURE_SCALING, ], - "name": "sklearn.feature_selection.variance_threshold.VarianceThreshold", - "primitive_family": metadata_base.PrimitiveFamily.FEATURE_SELECTION, - "python_path": "d3m.primitives.feature_selection.variance_threshold.SKlearn", - "source": {'name': 'JPL', 'contact': 'mailto:shah@jpl.nasa.gov', 'uris': ['https://gitlab.com/datadrivendiscovery/sklearn-wrap/issues', 'https://scikit-learn.org/stable/modules/generated/sklearn.feature_selection.VarianceThreshold.html']}, - "version": "2019.11.13", - "id": "980c43c7-ab2a-3dc9-943b-db08a7c25cb6", - "hyperparams_to_tune": ['threshold'], - 'installation': [ - {'type': metadata_base.PrimitiveInstallationType.PIP, - 'package_uri': 'git+https://gitlab.com/datadrivendiscovery/sklearn-wrap.git@{git_commit}#egg=sklearn_wrap'.format( - git_commit=utils.current_git_commit(os.path.dirname(__file__)), - ), - }] - }) - - def __init__(self, *, - hyperparams: Hyperparams, - random_seed: int = 0, - docker_containers: Dict[str, DockerContainer] = None) -> None: - - super().__init__(hyperparams=hyperparams, random_seed=random_seed, docker_containers=docker_containers) - - # False - self._clf = VarianceThreshold( - threshold=self.hyperparams['threshold'], - ) - - self._inputs = None - self._outputs = None - self._training_inputs = None - self._training_outputs = None - self._target_names = None - self._training_indices = None - self._target_column_indices = None - self._target_columns_metadata: List[OrderedDict] = None - self._input_column_names = None - self._fitted = False - self._new_training_data = False - - def set_training_data(self, *, inputs: Inputs, outputs: Outputs) -> None: - self._inputs = inputs - self._outputs = outputs - self._fitted = False - self._new_training_data = True - - def fit(self, *, timeout: float = None, iterations: int = None) -> CallResult[None]: - if self._fitted: - return CallResult(None) - - self._training_inputs, self._training_indices = self._get_columns_to_fit(self._inputs, self.hyperparams) - self._training_outputs, self._target_names, self._target_column_indices = self._get_targets(self._outputs, self.hyperparams) - self._input_column_names = self._training_inputs.columns - - if self._training_inputs is None or self._training_outputs is None: - raise ValueError("Missing training data.") - - if len(self._training_indices) > 0 and len(self._target_column_indices) > 0: - sk_training_output = self._training_outputs.values - - shape = sk_training_output.shape - if len(shape) == 2 and shape[1] == 1: - sk_training_output = numpy.ravel(sk_training_output) - - self._clf.fit(self._training_inputs, sk_training_output) - self._fitted = True - else: - if self.hyperparams['error_on_no_input']: - raise RuntimeError("No input columns were selected") - self.logger.warn("No input columns were selected") - - return CallResult(None) - - def produce(self, *, inputs: Inputs, timeout: float = None, iterations: int = None) -> CallResult[Outputs]: - sk_inputs, columns_to_use = self._get_columns_to_fit(inputs, self.hyperparams) - output = [] - if len(sk_inputs.columns): - try: - sk_output = self._clf.transform(sk_inputs) - except sklearn.exceptions.NotFittedError as error: - raise PrimitiveNotFittedError("Primitive not fitted.") from error - if sparse.issparse(sk_output): - sk_output = sk_output.toarray() - target_columns_metadata = self._copy_columns_metadata(inputs.iloc[:, self._training_indices].metadata, - self.produce_support().value) - output = self._wrap_predictions(inputs, sk_output, target_columns_metadata) - output.columns = [inputs.columns[idx] for idx in range(len(inputs.columns)) if idx in self.produce_support().value] - output = [output] - else: - if self.hyperparams['error_on_no_input']: - raise RuntimeError("No input columns were selected") - self.logger.warn("No input columns were selected") - - if self.hyperparams['return_result'] == 'update_semantic_types': - temp_inputs = inputs.copy() - columns_not_selected = sorted(set(range(len(temp_inputs.columns))) - set(self.produce_support().value)) - - for idx in columns_not_selected: - temp_inputs.metadata = temp_inputs.metadata.remove_semantic_type((metadata_base.ALL_ELEMENTS, idx), - 'https://metadata.datadrivendiscovery.org/types/Attribute') - - temp_inputs = temp_inputs.select_columns(self._training_indices) - outputs = base_utils.combine_columns(return_result='replace', - add_index_columns=self.hyperparams['add_index_columns'], - inputs=inputs, column_indices=self._training_indices, - columns_list=[temp_inputs]) - return CallResult(outputs) - - outputs = base_utils.combine_columns(return_result=self.hyperparams['return_result'], - add_index_columns=self.hyperparams['add_index_columns'], - inputs=inputs, column_indices=self._training_indices, - columns_list=output) - - return CallResult(outputs) - - def produce_support(self, *, timeout: float = None, iterations: int = None) -> CallResult[Any]: - all_indices = self._training_indices - selected_indices = self._clf.get_support(indices=True).tolist() - indices = [all_indices[index] for index in selected_indices] - return CallResult(indices) - - - def get_params(self) -> Params: - if not self._fitted: - return Params( - variances_=None, - input_column_names=self._input_column_names, - training_indices_=self._training_indices, - target_names_=self._target_names, - target_column_indices_=self._target_column_indices, - target_columns_metadata_=self._target_columns_metadata - ) - - return Params( - variances_=getattr(self._clf, 'variances_', None), - input_column_names=self._input_column_names, - training_indices_=self._training_indices, - target_names_=self._target_names, - target_column_indices_=self._target_column_indices, - target_columns_metadata_=self._target_columns_metadata - ) - - def set_params(self, *, params: Params) -> None: - self._clf.variances_ = params['variances_'] - self._input_column_names = params['input_column_names'] - self._training_indices = params['training_indices_'] - self._target_names = params['target_names_'] - self._target_column_indices = params['target_column_indices_'] - self._target_columns_metadata = params['target_columns_metadata_'] - - if params['variances_'] is not None: - self._fitted = True - - - - - - - - @classmethod - def _get_columns_to_fit(cls, inputs: Inputs, hyperparams: Hyperparams): - if not hyperparams['use_semantic_types']: - return inputs, list(range(len(inputs.columns))) - - inputs_metadata = inputs.metadata - - def can_produce_column(column_index: int) -> bool: - return cls._can_produce_column(inputs_metadata, column_index, hyperparams) - - columns_to_produce, columns_not_to_produce = base_utils.get_columns_to_use(inputs_metadata, - use_columns=hyperparams['use_inputs_columns'], - exclude_columns=hyperparams['exclude_inputs_columns'], - can_use_column=can_produce_column) - return inputs.iloc[:, columns_to_produce], columns_to_produce - # return columns_to_produce - - @classmethod - def _can_produce_column(cls, inputs_metadata: metadata_base.DataMetadata, column_index: int, hyperparams: Hyperparams) -> bool: - column_metadata = inputs_metadata.query((metadata_base.ALL_ELEMENTS, column_index)) - - accepted_structural_types = (int, float, numpy.integer, numpy.float64) - accepted_semantic_types = set() - accepted_semantic_types.add("https://metadata.datadrivendiscovery.org/types/Attribute") - if not issubclass(column_metadata['structural_type'], accepted_structural_types): - return False - - semantic_types = set(column_metadata.get('semantic_types', [])) - - if len(semantic_types) == 0: - cls.logger.warning("No semantic types found in column metadata") - return False - # Making sure all accepted_semantic_types are available in semantic_types - if len(accepted_semantic_types - semantic_types) == 0: - return True - - return False - - @classmethod - def _get_targets(cls, data: d3m_dataframe, hyperparams: Hyperparams): - if not hyperparams['use_semantic_types']: - return data, list(data.columns), list(range(len(data.columns))) - - metadata = data.metadata - - def can_produce_column(column_index: int) -> bool: - accepted_semantic_types = set() - accepted_semantic_types.add("https://metadata.datadrivendiscovery.org/types/TrueTarget") - column_metadata = metadata.query((metadata_base.ALL_ELEMENTS, column_index)) - semantic_types = set(column_metadata.get('semantic_types', [])) - if len(semantic_types) == 0: - cls.logger.warning("No semantic types found in column metadata") - return False - # Making sure all accepted_semantic_types are available in semantic_types - if len(accepted_semantic_types - semantic_types) == 0: - return True - return False - - target_column_indices, target_columns_not_to_produce = base_utils.get_columns_to_use(metadata, - use_columns=hyperparams[ - 'use_outputs_columns'], - exclude_columns= - hyperparams[ - 'exclude_outputs_columns'], - can_use_column=can_produce_column) - targets = [] - if target_column_indices: - targets = data.select_columns(target_column_indices) - target_column_names = [] - for idx in target_column_indices: - target_column_names.append(data.columns[idx]) - return targets, target_column_names, target_column_indices - - @classmethod - def _get_target_columns_metadata(cls, outputs_metadata: metadata_base.DataMetadata, hyperparams) -> List[OrderedDict]: - outputs_length = outputs_metadata.query((metadata_base.ALL_ELEMENTS,))['dimension']['length'] - - target_columns_metadata: List[OrderedDict] = [] - for column_index in range(outputs_length): - column_metadata = OrderedDict(outputs_metadata.query_column(column_index)) - - # Update semantic types and prepare it for predicted targets. - semantic_types = set(column_metadata.get('semantic_types', [])) - semantic_types_to_remove = set([]) - add_semantic_types = [] - add_semantic_types.add(hyperparams["return_semantic_type"]) - semantic_types = semantic_types - semantic_types_to_remove - semantic_types = semantic_types.union(add_semantic_types) - column_metadata['semantic_types'] = list(semantic_types) - - target_columns_metadata.append(column_metadata) - - return target_columns_metadata - - @classmethod - def _update_predictions_metadata(cls, inputs_metadata: metadata_base.DataMetadata, outputs: Optional[Outputs], - target_columns_metadata: List[OrderedDict]) -> metadata_base.DataMetadata: - outputs_metadata = metadata_base.DataMetadata().generate(value=outputs) - - for column_index, column_metadata in enumerate(target_columns_metadata): - if len(target_columns_metadata) == 1: - name = column_metadata.get("name") - for idx in range(len(outputs.columns)): - outputs_metadata = outputs_metadata.update_column(idx, column_metadata) - if len(outputs.columns) > 1: - # Updating column names. - outputs_metadata = outputs_metadata.update((metadata_base.ALL_ELEMENTS, idx), {'name': "{}_{}".format(name, idx)}) - else: - outputs_metadata = outputs_metadata.update_column(column_index, column_metadata) - - return outputs_metadata - - - def _wrap_predictions(self, inputs: Inputs, predictions: ndarray, target_columns_metadata) -> Outputs: - outputs = d3m_dataframe(predictions, generate_metadata=False) - outputs.metadata = self._update_predictions_metadata(inputs.metadata, outputs, target_columns_metadata) - return outputs - - - - @classmethod - def _copy_columns_metadata(cls, inputs_metadata: metadata_base.DataMetadata, column_indices) -> List[OrderedDict]: - outputs_length = inputs_metadata.query((metadata_base.ALL_ELEMENTS,))['dimension']['length'] - - target_columns_metadata: List[OrderedDict] = [] - for column_index in column_indices: - column_name = inputs_metadata.query((metadata_base.ALL_ELEMENTS, column_index)).get("name") - column_metadata = OrderedDict(inputs_metadata.query_column(column_index)) - semantic_types = set(column_metadata.get('semantic_types', [])) - semantic_types_to_remove = set([]) - add_semantic_types = [] - semantic_types = semantic_types - semantic_types_to_remove - semantic_types = semantic_types.union(add_semantic_types) - column_metadata['semantic_types'] = list(semantic_types) - - column_metadata["name"] = str(column_name) - target_columns_metadata.append(column_metadata) - - return target_columns_metadata - - -SKVarianceThreshold.__doc__ = VarianceThreshold.__doc__ \ No newline at end of file diff --git a/common-primitives/sklearn-wrap/sklearn_wrap/__init__.py b/common-primitives/sklearn-wrap/sklearn_wrap/__init__.py deleted file mode 100644 index def4f5b..0000000 --- a/common-primitives/sklearn-wrap/sklearn_wrap/__init__.py +++ /dev/null @@ -1,2 +0,0 @@ -__author__ = 'JPL DARPA D3M TEAM' -__version__ = '2019.11.13' diff --git a/common-primitives/tests/test_audio_reader.py b/common-primitives/tests/test_audio_reader.py deleted file mode 100644 index f02bd2b..0000000 --- a/common-primitives/tests/test_audio_reader.py +++ /dev/null @@ -1,105 +0,0 @@ -import unittest -import os - -from d3m import container - -from common_primitives import audio_reader, dataset_to_dataframe, denormalize - - -class AudioReaderPrimitiveTestCase(unittest.TestCase): - def test_basic(self): - dataset_doc_path = os.path.abspath(os.path.join(os.path.dirname(__file__), 'data', 'datasets', 'audio_dataset_1', 'datasetDoc.json')) - - dataset = container.Dataset.load('file://{dataset_doc_path}'.format(dataset_doc_path=dataset_doc_path)) - - dataframe_hyperparams_class = dataset_to_dataframe.DatasetToDataFramePrimitive.metadata.get_hyperparams() - dataframe_primitive = dataset_to_dataframe.DatasetToDataFramePrimitive(hyperparams=dataframe_hyperparams_class.defaults().replace({'dataframe_resource': '0'})) - dataframe = dataframe_primitive.produce(inputs=dataset).value - - audio_hyperparams_class = audio_reader.AudioReaderPrimitive.metadata.get_hyperparams() - audio_primitive = audio_reader.AudioReaderPrimitive(hyperparams=audio_hyperparams_class.defaults().replace({'return_result': 'replace'})) - audios = audio_primitive.produce(inputs=dataframe).value - - self.assertEqual(audios.shape, (1, 1)) - self.assertEqual(audios.iloc[0, 0].shape, (4410, 1)) - - self._test_metadata(audios.metadata, True) - - self.assertEqual(audios.metadata.query((0, 0))['dimension']['length'], 4410) - self.assertEqual(audios.metadata.query((0, 0))['dimension']['sampling_rate'], 44100) - - def _test_metadata(self, metadata, is_table): - semantic_types = ('https://metadata.datadrivendiscovery.org/types/PrimaryKey', 'http://schema.org/AudioObject') - - if is_table: - semantic_types += ('https://metadata.datadrivendiscovery.org/types/Table',) - - self.assertEqual(metadata.query_column(0)['name'], 'filename') - self.assertEqual(metadata.query_column(0)['structural_type'], container.ndarray) - self.assertEqual(metadata.query_column(0)['semantic_types'], semantic_types) - - def test_boundaries_reassign(self): - dataset_doc_path = os.path.abspath(os.path.join(os.path.dirname(__file__), 'data', 'datasets', 'audio_dataset_1', 'datasetDoc.json')) - - dataset = container.Dataset.load('file://{dataset_doc_path}'.format(dataset_doc_path=dataset_doc_path)) - - denormalize_hyperparams_class = denormalize.DenormalizePrimitive.metadata.get_hyperparams() - denormalize_primitive = denormalize.DenormalizePrimitive(hyperparams=denormalize_hyperparams_class.defaults()) - dataset = denormalize_primitive.produce(inputs=dataset).value - - dataframe_hyperparams_class = dataset_to_dataframe.DatasetToDataFramePrimitive.metadata.get_hyperparams() - dataframe_primitive = dataset_to_dataframe.DatasetToDataFramePrimitive(hyperparams=dataframe_hyperparams_class.defaults()) - dataframe = dataframe_primitive.produce(inputs=dataset).value - - audio_hyperparams_class = audio_reader.AudioReaderPrimitive.metadata.get_hyperparams() - audio_primitive = audio_reader.AudioReaderPrimitive(hyperparams=audio_hyperparams_class.defaults().replace({'return_result': 'append'})) - audios = audio_primitive.produce(inputs=dataframe).value - - self.assertEqual(audios.shape, (1, 6)) - self.assertEqual(audios.iloc[0, 5].shape, (4410, 1)) - - self._test_boundaries_reassign_metadata(audios.metadata, True) - - self.assertEqual(audios.metadata.query((0, 5))['dimension']['length'], 4410) - self.assertEqual(audios.metadata.query((0, 5))['dimension']['sampling_rate'], 44100) - - def _test_boundaries_reassign_metadata(self, metadata, is_table): - semantic_types = ('http://schema.org/AudioObject', 'https://metadata.datadrivendiscovery.org/types/Attribute', 'https://metadata.datadrivendiscovery.org/types/UniqueKey') - - if is_table: - semantic_types += ('https://metadata.datadrivendiscovery.org/types/Table',) - - self.assertEqual(metadata.query_column(5)['name'], 'filename') - self.assertEqual(metadata.query_column(5)['structural_type'], container.ndarray) - self.assertEqual(metadata.query_column(5)['semantic_types'], semantic_types) - - self.assertEqual(metadata.query_column(2), { - 'structural_type': str, - 'name': 'start', - 'semantic_types': ( - 'http://schema.org/Float', - 'https://metadata.datadrivendiscovery.org/types/Boundary', - 'https://metadata.datadrivendiscovery.org/types/IntervalStart', - ), - 'boundary_for': { - 'resource_id': 'learningData', - 'column_index': 5, - }, - }) - self.assertEqual(metadata.query_column(3), { - 'structural_type': str, - 'name': 'end', - 'semantic_types': ( - 'http://schema.org/Float', - 'https://metadata.datadrivendiscovery.org/types/Boundary', - 'https://metadata.datadrivendiscovery.org/types/IntervalEnd', - ), - 'boundary_for': { - 'resource_id': 'learningData', - 'column_index': 5, - }, - }) - - -if __name__ == '__main__': - unittest.main() diff --git a/common-primitives/tests/test_cast_to_type.py b/common-primitives/tests/test_cast_to_type.py deleted file mode 100644 index 304ef18..0000000 --- a/common-primitives/tests/test_cast_to_type.py +++ /dev/null @@ -1,131 +0,0 @@ -import os -import logging -import unittest - -import numpy - -from d3m import container -from d3m.metadata import base as metadata_base - -from common_primitives import cast_to_type, column_parser, dataset_to_dataframe, extract_columns_semantic_types - - -class CastToTypePrimitiveTestCase(unittest.TestCase): - def test_basic(self): - inputs = container.DataFrame({'a': [1, 2, 3], 'b': ['a', 'b', 'c']}, generate_metadata=True) - - self.assertEqual(inputs.dtypes['a'], numpy.int64) - self.assertEqual(inputs.dtypes['b'], object) - - hyperparams_class = cast_to_type.CastToTypePrimitive.metadata.get_hyperparams() - - primitive = cast_to_type.CastToTypePrimitive(hyperparams=hyperparams_class.defaults().replace({'type_to_cast': 'str'})) - - call_metadata = primitive.produce(inputs=inputs) - - self.assertIsInstance(call_metadata.value, container.DataFrame) - - self.assertEqual(len(call_metadata.value.dtypes), 2) - self.assertEqual(call_metadata.value.dtypes['a'], object) - self.assertEqual(call_metadata.value.dtypes['b'], object) - - self.assertEqual(call_metadata.value.metadata.query((metadata_base.ALL_ELEMENTS, 0))['structural_type'], str) - self.assertEqual(call_metadata.value.metadata.query((metadata_base.ALL_ELEMENTS, 1))['structural_type'], str) - self.assertEqual(call_metadata.value.metadata.query((metadata_base.ALL_ELEMENTS,))['dimension']['length'], 2) - - primitive = cast_to_type.CastToTypePrimitive(hyperparams=hyperparams_class.defaults().replace({'type_to_cast': 'float'})) - - with self.assertLogs(level=logging.WARNING) as cm: - call_metadata = primitive.produce(inputs=inputs) - - self.assertEqual(len(call_metadata.value.dtypes), 1) - self.assertEqual(call_metadata.value.dtypes['a'], float) - - self.assertEqual(call_metadata.value.metadata.query((metadata_base.ALL_ELEMENTS, 0))['structural_type'], float) - self.assertEqual(call_metadata.value.metadata.query((metadata_base.ALL_ELEMENTS,))['dimension']['length'], 1) - - self.assertEqual(len(cm.records), 1) - self.assertEqual(cm.records[0].msg, "Not all columns can be cast to type '%(type)s'. Skipping columns: %(columns)s") - - primitive = cast_to_type.CastToTypePrimitive(hyperparams=hyperparams_class.defaults().replace({'exclude_columns': (0,), 'type_to_cast': 'float'})) - - with self.assertRaisesRegex(ValueError, 'No columns to be cast to type'): - primitive.produce(inputs=inputs) - - def test_objects(self): - hyperparams_class = cast_to_type.CastToTypePrimitive.metadata.get_hyperparams() - - inputs = container.DataFrame({'a': [1, 2, 3], 'b': [{'a': 1}, {'b': 1}, {'c': 1}]}, { - 'schema': metadata_base.CONTAINER_SCHEMA_VERSION, - 'structural_type': container.DataFrame, - 'dimension': { - 'length': 3, - }, - }, generate_metadata=False) - inputs.metadata = inputs.metadata.update((metadata_base.ALL_ELEMENTS,), { - 'dimension': { - 'length': 2, - }, - }) - inputs.metadata = inputs.metadata.update((metadata_base.ALL_ELEMENTS, 0), { - 'structural_type': int, - }) - inputs.metadata = inputs.metadata.update((metadata_base.ALL_ELEMENTS, 1), { - 'structural_type': dict, - }) - - self.assertEqual(inputs.dtypes['a'], numpy.int64) - self.assertEqual(inputs.dtypes['b'], object) - - primitive = cast_to_type.CastToTypePrimitive(hyperparams=hyperparams_class.defaults().replace({'type_to_cast': 'str'})) - - call_metadata = primitive.produce(inputs=inputs) - - self.assertEqual(len(call_metadata.value.dtypes), 2) - self.assertEqual(call_metadata.value.dtypes['a'], object) - self.assertEqual(call_metadata.value.dtypes['b'], object) - - self.assertEqual(call_metadata.value.metadata.query((metadata_base.ALL_ELEMENTS, 0))['structural_type'], str) - self.assertEqual(call_metadata.value.metadata.query((metadata_base.ALL_ELEMENTS, 1))['structural_type'], str) - self.assertEqual(call_metadata.value.metadata.query((metadata_base.ALL_ELEMENTS,))['dimension']['length'], 2) - - primitive = cast_to_type.CastToTypePrimitive(hyperparams=hyperparams_class.defaults().replace({'type_to_cast': 'float'})) - - with self.assertLogs(level=logging.WARNING) as cm: - call_metadata = primitive.produce(inputs=inputs) - - self.assertEqual(len(call_metadata.value.dtypes), 1) - self.assertEqual(call_metadata.value.dtypes['a'], float) - - self.assertEqual(call_metadata.value.metadata.query((metadata_base.ALL_ELEMENTS, 0))['structural_type'], float) - self.assertEqual(call_metadata.value.metadata.query((metadata_base.ALL_ELEMENTS,))['dimension']['length'], 1) - - self.assertEqual(len(cm.records), 1) - self.assertEqual(cm.records[0].msg, "Not all columns can be cast to type '%(type)s'. Skipping columns: %(columns)s") - - def test_data(self): - dataset_doc_path = os.path.abspath(os.path.join(os.path.dirname(__file__), 'data', 'datasets', 'iris_dataset_1', 'datasetDoc.json')) - - dataset = container.Dataset.load('file://{dataset_doc_path}'.format(dataset_doc_path=dataset_doc_path)) - - hyperparams_class = dataset_to_dataframe.DatasetToDataFramePrimitive.metadata.get_hyperparams() - primitive = dataset_to_dataframe.DatasetToDataFramePrimitive(hyperparams=hyperparams_class.defaults()) - dataframe = primitive.produce(inputs=dataset).value - - hyperparams_class = column_parser.ColumnParserPrimitive.metadata.get_hyperparams() - primitive = column_parser.ColumnParserPrimitive(hyperparams=hyperparams_class.defaults()) - dataframe = primitive.produce(inputs=dataframe).value - - hyperparams_class = extract_columns_semantic_types.ExtractColumnsBySemanticTypesPrimitive.metadata.get_hyperparams() - primitive = extract_columns_semantic_types.ExtractColumnsBySemanticTypesPrimitive(hyperparams=hyperparams_class.defaults()) - attributes = primitive.produce(inputs=dataframe).value - - hyperparams_class = cast_to_type.CastToTypePrimitive.metadata.get_hyperparams() - primitive = cast_to_type.CastToTypePrimitive(hyperparams=hyperparams_class.defaults().replace({'type_to_cast': 'float'})) - cast_attributes = primitive.produce(inputs=attributes).value - - self.assertEqual(cast_attributes.values.dtype, numpy.float64) - - -if __name__ == '__main__': - unittest.main() diff --git a/common-primitives/tests/test_column_map.py b/common-primitives/tests/test_column_map.py deleted file mode 100644 index 0323239..0000000 --- a/common-primitives/tests/test_column_map.py +++ /dev/null @@ -1,75 +0,0 @@ -import unittest -import os -import pickle -import sys - -from d3m import container, index, utils as d3m_utils - -TEST_PRIMITIVES_DIR = os.path.join(os.path.dirname(__file__), 'data', 'primitives') -sys.path.insert(0, TEST_PRIMITIVES_DIR) - -from test_primitives.null import NullTransformerPrimitive, NullUnsupervisedLearnerPrimitive - -# To hide any logging or stdout output. -with d3m_utils.silence(): - index.register_primitive('d3m.primitives.operator.null.TransformerTest', NullTransformerPrimitive) - index.register_primitive('d3m.primitives.operator.null.UnsupervisedLearnerTest', NullUnsupervisedLearnerPrimitive) - -from common_primitives import dataset_to_dataframe, csv_reader, denormalize, column_map, column_parser - -import utils as test_utils - - -class ColumnMapTestCase(unittest.TestCase): - def test_transformer(self): - self.maxDiff = None - - dataset_doc_path = os.path.abspath(os.path.join(os.path.dirname(__file__), 'data', 'datasets', 'timeseries_dataset_2', 'datasetDoc.json')) - - dataset = container.Dataset.load('file://{dataset_doc_path}'.format(dataset_doc_path=dataset_doc_path)) - - hyperparams = denormalize.DenormalizePrimitive.metadata.get_hyperparams() - primitive = denormalize.DenormalizePrimitive(hyperparams=hyperparams.defaults()) - dataset = primitive.produce(inputs=dataset).value - - hyperparams = dataset_to_dataframe.DatasetToDataFramePrimitive.metadata.get_hyperparams() - primitive = dataset_to_dataframe.DatasetToDataFramePrimitive(hyperparams=hyperparams.defaults()) - dataframe = primitive.produce(inputs=dataset).value - - hyperparams = csv_reader.CSVReaderPrimitive.metadata.get_hyperparams() - primitive = csv_reader.CSVReaderPrimitive(hyperparams=hyperparams.defaults().replace({'return_result': 'replace'})) - dataframe = primitive.produce(inputs=dataframe).value - - hyperparams = column_map.DataFrameColumnMapPrimitive.metadata.get_hyperparams() - primitive = column_map.DataFrameColumnMapPrimitive( - # We have to make an instance of the primitive ourselves. - hyperparams=hyperparams.defaults().replace({ - # First we use identity primitive which should not really change anything. - 'primitive': NullTransformerPrimitive( - hyperparams=NullTransformerPrimitive.metadata.get_hyperparams().defaults(), - ), - }), - ) - mapped_dataframe = primitive.produce(inputs=dataframe).value - - self.assertEqual(test_utils.convert_through_json(test_utils.effective_metadata(dataframe.metadata)), test_utils.convert_through_json(test_utils.effective_metadata(mapped_dataframe.metadata))) - - self.assertEqual(test_utils.convert_through_json(dataframe), test_utils.convert_through_json(mapped_dataframe)) - - primitive = column_map.DataFrameColumnMapPrimitive( - # We have to make an instance of the primitive ourselves. - hyperparams=hyperparams.defaults().replace({ - 'primitive': column_parser.ColumnParserPrimitive( - hyperparams=column_parser.ColumnParserPrimitive.metadata.get_hyperparams().defaults(), - ), - }), - ) - dataframe = primitive.produce(inputs=mapped_dataframe).value - - self.assertEqual(test_utils.convert_through_json(dataframe)[0][1][0], [0, 2.6173]) - - pickle.dumps(primitive) - - -if __name__ == '__main__': - unittest.main() diff --git a/common-primitives/tests/test_column_parser.py b/common-primitives/tests/test_column_parser.py deleted file mode 100644 index 5d4e4b6..0000000 --- a/common-primitives/tests/test_column_parser.py +++ /dev/null @@ -1,474 +0,0 @@ -import math -import os.path -import unittest - -import numpy - -from d3m import container, utils -from d3m.metadata import base as metadata_base - -from common_primitives import dataset_to_dataframe, column_parser, utils as common_utils - -import utils as test_utils - - -class ColumnParserPrimitiveTestCase(unittest.TestCase): - def test_basic(self): - dataset_doc_path = os.path.abspath(os.path.join(os.path.dirname(__file__), 'data', 'datasets', 'iris_dataset_1', 'datasetDoc.json')) - dataset = container.Dataset.load('file://{dataset_doc_path}'.format(dataset_doc_path=dataset_doc_path)) - - hyperparams_class = dataset_to_dataframe.DatasetToDataFramePrimitive.metadata.get_hyperparams() - - primitive = dataset_to_dataframe.DatasetToDataFramePrimitive(hyperparams=hyperparams_class.defaults()) - - call_metadata = primitive.produce(inputs=dataset) - - dataframe = call_metadata.value - - hyperparams_class = column_parser.ColumnParserPrimitive.metadata.get_hyperparams() - - primitive = column_parser.ColumnParserPrimitive(hyperparams=hyperparams_class.defaults()) - - call_metadata = primitive.produce(inputs=dataframe) - - dataframe = call_metadata.value - - first_row = list(dataframe.itertuples(index=False, name=None))[0] - - self.assertEqual(first_row, (0, 5.1, 3.5, 1.4, 0.2, 6241605690342144121)) - - self.assertEqual([type(o) for o in first_row], [int, float, float, float, float, int]) - - self._test_basic_metadata(dataframe.metadata) - - def _test_basic_metadata(self, metadata): - self.maxDiff = None - - self.assertEqual(test_utils.convert_through_json(metadata.query(())), { - 'schema': metadata_base.CONTAINER_SCHEMA_VERSION, - 'structural_type': 'd3m.container.pandas.DataFrame', - 'semantic_types': [ - 'https://metadata.datadrivendiscovery.org/types/Table', - ], - 'dimension': { - 'name': 'rows', - 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/TabularRow'], - 'length': 150, - } - }) - - self.assertEqual(test_utils.convert_through_json(metadata.query((metadata_base.ALL_ELEMENTS,))), { - 'dimension': { - 'name': 'columns', - 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/TabularColumn'], - 'length': 6, - } - }) - - self.assertEqual(test_utils.convert_through_json(metadata.query((metadata_base.ALL_ELEMENTS, 0))), { - 'name': 'd3mIndex', - 'structural_type': 'int', - 'semantic_types': [ - 'http://schema.org/Integer', - 'https://metadata.datadrivendiscovery.org/types/PrimaryKey', - ], - }) - - for i in range(1, 5): - self.assertEqual(test_utils.convert_through_json(metadata.query((metadata_base.ALL_ELEMENTS, i))), { - 'name': ['sepalLength', 'sepalWidth', 'petalLength', 'petalWidth'][i - 1], - 'structural_type': 'float', - 'semantic_types': [ - 'http://schema.org/Float', - 'https://metadata.datadrivendiscovery.org/types/Attribute', - ], - }, i) - - self.assertEqual(test_utils.convert_through_json(metadata.query((metadata_base.ALL_ELEMENTS, 5))), { - 'name': 'species', - 'structural_type': 'int', - 'semantic_types': [ - 'https://metadata.datadrivendiscovery.org/types/CategoricalData', - 'https://metadata.datadrivendiscovery.org/types/SuggestedTarget', - 'https://metadata.datadrivendiscovery.org/types/Attribute', - ], - }) - - def test_new(self): - dataset_doc_path = os.path.abspath(os.path.join(os.path.dirname(__file__), 'data', 'datasets', 'iris_dataset_1', 'datasetDoc.json')) - - dataset = container.Dataset.load('file://{dataset_doc_path}'.format(dataset_doc_path=dataset_doc_path)) - - hyperparams_class = dataset_to_dataframe.DatasetToDataFramePrimitive.metadata.get_hyperparams() - - primitive = dataset_to_dataframe.DatasetToDataFramePrimitive(hyperparams=hyperparams_class.defaults()) - - call_metadata = primitive.produce(inputs=dataset) - - dataframe = call_metadata.value - - hyperparams_class = column_parser.ColumnParserPrimitive.metadata.get_hyperparams() - - primitive = column_parser.ColumnParserPrimitive(hyperparams=hyperparams_class.defaults().replace({'return_result': 'new', 'use_columns': [2]})) - - call_metadata = primitive.produce(inputs=dataframe) - - dataframe = call_metadata.value - - first_row = list(dataframe.itertuples(index=False, name=None))[0] - - self.assertEqual(first_row, ('0', 3.5)) - - self.assertEqual([type(o) for o in first_row], [str, float]) - - self._test_new_metadata(dataframe.metadata) - - def _test_new_metadata(self, metadata): - self.maxDiff = None - - self.assertEqual(test_utils.convert_through_json(metadata.query(())), { - 'schema': metadata_base.CONTAINER_SCHEMA_VERSION, - 'structural_type': 'd3m.container.pandas.DataFrame', - 'semantic_types': [ - 'https://metadata.datadrivendiscovery.org/types/Table', - ], - 'dimension': { - 'name': 'rows', - 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/TabularRow'], - 'length': 150, - } - }) - - self.assertEqual(test_utils.convert_through_json(metadata.query((metadata_base.ALL_ELEMENTS,))), { - 'dimension': { - 'name': 'columns', - 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/TabularColumn'], - 'length': 2, - } - }) - - self.assertEqual(test_utils.convert_through_json(metadata.query((metadata_base.ALL_ELEMENTS, 0))), { - 'name': 'd3mIndex', - 'structural_type': 'str', - 'semantic_types': [ - 'http://schema.org/Integer', - 'https://metadata.datadrivendiscovery.org/types/PrimaryKey', - ], - }) - - self.assertEqual(test_utils.convert_through_json(metadata.query((metadata_base.ALL_ELEMENTS, 1))), { - 'name': 'sepalWidth', - 'structural_type': 'float', - 'semantic_types': [ - 'http://schema.org/Float', - 'https://metadata.datadrivendiscovery.org/types/Attribute', - ], - }) - - def test_append(self): - dataset_doc_path = os.path.abspath(os.path.join(os.path.dirname(__file__), 'data', 'datasets', 'iris_dataset_1', 'datasetDoc.json')) - - dataset = container.Dataset.load('file://{dataset_doc_path}'.format(dataset_doc_path=dataset_doc_path)) - - hyperparams_class = dataset_to_dataframe.DatasetToDataFramePrimitive.metadata.get_hyperparams() - - primitive = dataset_to_dataframe.DatasetToDataFramePrimitive(hyperparams=hyperparams_class.defaults()) - - call_metadata = primitive.produce(inputs=dataset) - - dataframe = call_metadata.value - - hyperparams_class = column_parser.ColumnParserPrimitive.metadata.get_hyperparams() - - primitive = column_parser.ColumnParserPrimitive(hyperparams=hyperparams_class.defaults().replace({'return_result': 'append', 'replace_index_columns': False, 'parse_semantic_types': ['https://metadata.datadrivendiscovery.org/types/CategoricalData', 'http://schema.org/Integer']})) - - call_metadata = primitive.produce(inputs=dataframe) - - dataframe = call_metadata.value - - first_row = list(dataframe.itertuples(index=False, name=None))[0] - - self.assertEqual(first_row, ('0', '5.1', '3.5', '1.4', '0.2', 'Iris-setosa', 0, 6241605690342144121)) - - self.assertEqual([type(o) for o in first_row], [str, str, str, str, str, str, int, int]) - - self._test_append_metadata(dataframe.metadata, False) - - def test_append_replace_index_columns(self): - dataset_doc_path = os.path.abspath(os.path.join(os.path.dirname(__file__), 'data', 'datasets', 'iris_dataset_1', 'datasetDoc.json')) - - dataset = container.Dataset.load('file://{dataset_doc_path}'.format(dataset_doc_path=dataset_doc_path)) - - hyperparams_class = dataset_to_dataframe.DatasetToDataFramePrimitive.metadata.get_hyperparams() - - primitive = dataset_to_dataframe.DatasetToDataFramePrimitive(hyperparams=hyperparams_class.defaults()) - - call_metadata = primitive.produce(inputs=dataset) - - dataframe = call_metadata.value - - hyperparams_class = column_parser.ColumnParserPrimitive.metadata.get_hyperparams() - - primitive = column_parser.ColumnParserPrimitive(hyperparams=hyperparams_class.defaults().replace({'return_result': 'append', 'parse_semantic_types': ['https://metadata.datadrivendiscovery.org/types/CategoricalData', 'http://schema.org/Integer']})) - - call_metadata = primitive.produce(inputs=dataframe) - - dataframe = call_metadata.value - - first_row = list(dataframe.itertuples(index=False, name=None))[0] - - self.assertEqual(first_row, (0, '5.1', '3.5', '1.4', '0.2', 'Iris-setosa', 6241605690342144121)) - - self.assertEqual([type(o) for o in first_row], [int, str, str, str, str, str, int]) - - self._test_append_metadata(dataframe.metadata, True) - - def _test_append_metadata(self, metadata, replace_index_columns): - self.maxDiff = None - - self.assertEqual(test_utils.convert_through_json(metadata.query(())), { - 'schema': metadata_base.CONTAINER_SCHEMA_VERSION, - 'structural_type': 'd3m.container.pandas.DataFrame', - 'semantic_types': [ - 'https://metadata.datadrivendiscovery.org/types/Table', - ], - 'dimension': { - 'name': 'rows', - 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/TabularRow'], - 'length': 150, - } - }) - - self.assertEqual(test_utils.convert_through_json(metadata.query((metadata_base.ALL_ELEMENTS,))), { - 'dimension': { - 'name': 'columns', - 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/TabularColumn'], - 'length': 7 if replace_index_columns else 8, - } - }) - - self.assertEqual(test_utils.convert_through_json(metadata.query((metadata_base.ALL_ELEMENTS, 0))), { - 'name': 'd3mIndex', - 'structural_type': 'int' if replace_index_columns else 'str', - 'semantic_types': [ - 'http://schema.org/Integer', - 'https://metadata.datadrivendiscovery.org/types/PrimaryKey', - ], - }) - - for i in range(1, 5): - self.assertEqual(test_utils.convert_through_json(metadata.query((metadata_base.ALL_ELEMENTS, i))), { - 'name': ['sepalLength', 'sepalWidth', 'petalLength', 'petalWidth'][i - 1], - 'structural_type': 'str', - 'semantic_types': [ - 'http://schema.org/Float', - 'https://metadata.datadrivendiscovery.org/types/Attribute', - ], - }, i) - - self.assertEqual(test_utils.convert_through_json(metadata.query((metadata_base.ALL_ELEMENTS, 5))), { - 'name': 'species', - 'structural_type': 'str', - 'semantic_types': [ - 'https://metadata.datadrivendiscovery.org/types/CategoricalData', - 'https://metadata.datadrivendiscovery.org/types/SuggestedTarget', - 'https://metadata.datadrivendiscovery.org/types/Attribute', - ], - }) - - if not replace_index_columns: - self.assertEqual(test_utils.convert_through_json(metadata.query((metadata_base.ALL_ELEMENTS, 6))), { - 'name': 'd3mIndex', - 'structural_type': 'int', - 'semantic_types': [ - 'http://schema.org/Integer', - 'https://metadata.datadrivendiscovery.org/types/PrimaryKey', - ], - }) - - self.assertEqual(test_utils.convert_through_json(metadata.query((metadata_base.ALL_ELEMENTS, 6 if replace_index_columns else 7))), { - 'name': 'species', - 'structural_type': 'int', - 'semantic_types': [ - 'https://metadata.datadrivendiscovery.org/types/CategoricalData', - 'https://metadata.datadrivendiscovery.org/types/SuggestedTarget', - 'https://metadata.datadrivendiscovery.org/types/Attribute', - ], - }) - - def test_integer(self): - hyperparams_class = column_parser.ColumnParserPrimitive.metadata.get_hyperparams() - - primitive = column_parser.ColumnParserPrimitive(hyperparams=hyperparams_class.defaults()) - - dataframe = container.DataFrame({'a': ['1.0', '2.0', '3.0']}, generate_metadata=True) - - dataframe.metadata = dataframe.metadata.update((metadata_base.ALL_ELEMENTS, 0), { - 'name': 'test', - 'semantic_types': [ - 'http://schema.org/Integer', - 'https://metadata.datadrivendiscovery.org/types/PrimaryKey', - ], - }) - - call_metadata = primitive.produce(inputs=dataframe) - - parsed_dataframe = call_metadata.value - - self.assertEqual(test_utils.convert_through_json(parsed_dataframe.metadata.query((metadata_base.ALL_ELEMENTS, 0))), { - 'name': 'test', - 'structural_type': 'int', - 'semantic_types': [ - 'http://schema.org/Integer', - 'https://metadata.datadrivendiscovery.org/types/PrimaryKey', - ], - }) - - self.assertEqual(list(parsed_dataframe.iloc[:, 0]), [1, 2, 3]) - - dataframe.iloc[2, 0] = '3.1' - - call_metadata = primitive.produce(inputs=dataframe) - - parsed_dataframe = call_metadata.value - - self.assertEqual(test_utils.convert_through_json(parsed_dataframe.metadata.query((metadata_base.ALL_ELEMENTS, 0))), { - 'name': 'test', - 'structural_type': 'int', - 'semantic_types': [ - 'http://schema.org/Integer', - 'https://metadata.datadrivendiscovery.org/types/PrimaryKey', - ], - }) - - self.assertEqual(list(parsed_dataframe.iloc[:, 0]), [1, 2, 3]) - - dataframe.iloc[2, 0] = 'aaa' - - with self.assertRaisesRegex(ValueError, 'Not all values in a column can be parsed into integers, but only integers were expected'): - primitive.produce(inputs=dataframe) - - dataframe.metadata = dataframe.metadata.update((metadata_base.ALL_ELEMENTS, 0), { - 'name': 'test', - 'structural_type': str, - 'semantic_types': [ - 'http://schema.org/Integer', - ], - }) - - call_metadata = primitive.produce(inputs=dataframe) - - parsed_dataframe = call_metadata.value - - self.assertEqual(test_utils.convert_through_json(parsed_dataframe.metadata.query((metadata_base.ALL_ELEMENTS, 0))), { - 'name': 'test', - 'structural_type': 'float', - 'semantic_types': [ - 'http://schema.org/Integer', - ], - }) - - self.assertEqual(list(parsed_dataframe.iloc[0:2, 0]), [1.0, 2.0]) - self.assertTrue(math.isnan(parsed_dataframe.iloc[2, 0])) - - def test_float_vector(self): - dataset_doc_path = os.path.abspath(os.path.join(os.path.dirname(__file__), 'data', 'datasets', 'object_dataset_1', 'datasetDoc.json')) - - dataset = container.Dataset.load('file://{dataset_doc_path}'.format(dataset_doc_path=dataset_doc_path)) - - hyperparams_class = dataset_to_dataframe.DatasetToDataFramePrimitive.metadata.get_hyperparams() - primitive = dataset_to_dataframe.DatasetToDataFramePrimitive(hyperparams=hyperparams_class.defaults().replace({'dataframe_resource': 'learningData'})) - dataframe = primitive.produce(inputs=dataset).value - - hyperparams_class = column_parser.ColumnParserPrimitive.metadata.get_hyperparams() - primitive = column_parser.ColumnParserPrimitive(hyperparams=hyperparams_class.defaults()) - dataframe = primitive.produce(inputs=dataframe).value - - self.assertIsInstance(dataframe.iloc[0, 3], container.ndarray) - self.assertEqual(dataframe.iloc[0, 3].shape, (8,)) - - self.assertEqual(utils.to_json_structure(dataframe.metadata.to_internal_simple_structure()), [{ - 'selector': [], - 'metadata': { - 'structural_type': 'd3m.container.pandas.DataFrame', - 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/Table'], - 'dimension': { - 'name': 'rows', - 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/TabularRow'], - 'length': 4, - }, - 'schema': 'https://metadata.datadrivendiscovery.org/schemas/v0/container.json'}, - }, { - 'selector': ['__ALL_ELEMENTS__'], - 'metadata': { - 'dimension': { - 'name': 'columns', - 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/TabularColumn'], - 'length': 4, - }, - }, - }, { - 'selector': ['__ALL_ELEMENTS__', 0], - 'metadata': { - 'structural_type': 'int', - 'name': 'd3mIndex', - 'semantic_types': ['http://schema.org/Integer', 'https://metadata.datadrivendiscovery.org/types/PrimaryMultiKey'], - }, - }, { - 'selector': ['__ALL_ELEMENTS__', 1], - 'metadata': { - 'name': 'image', - 'structural_type': 'str', - 'semantic_types': ['http://schema.org/Text', 'https://metadata.datadrivendiscovery.org/types/Attribute'], - 'foreign_key': { - 'type': 'COLUMN', - 'resource_id': '0', - 'column_index': 0, - }, - }, - }, { - 'selector': ['__ALL_ELEMENTS__', 2], - 'metadata': { - 'name': 'color_not_class', - 'structural_type': 'int', - 'semantic_types': [ - 'https://metadata.datadrivendiscovery.org/types/CategoricalData', - 'https://metadata.datadrivendiscovery.org/types/SuggestedTarget', - 'https://metadata.datadrivendiscovery.org/types/Attribute', - ], - }, - }, { - 'selector': ['__ALL_ELEMENTS__', 3], - 'metadata': { - 'structural_type': 'd3m.container.numpy.ndarray', - 'dimension': { - 'length': 8, - }, - 'name': 'bounding_polygon_area', - 'semantic_types': [ - 'https://metadata.datadrivendiscovery.org/types/FloatVector', - 'https://metadata.datadrivendiscovery.org/types/SuggestedTarget', - 'https://metadata.datadrivendiscovery.org/types/Boundary', - 'https://metadata.datadrivendiscovery.org/types/BoundingPolygon', - 'https://metadata.datadrivendiscovery.org/types/Attribute', - ], - 'boundary_for': { - 'resource_id': 'learningData', - 'column_name': 'image', - }, - }, - }, { - 'selector': ['__ALL_ELEMENTS__', 3, '__ALL_ELEMENTS__'], - 'metadata': {'structural_type': 'numpy.float64'}, - }]) - - def test_ugly_time_values(self): - for value in [ - 'Original chained constant price data are rescaled.', - '1986/87', - ]: - self.assertTrue(numpy.isnan(common_utils.parse_datetime_to_float(value)), value) - - -if __name__ == '__main__': - unittest.main() diff --git a/common-primitives/tests/test_compute_metafeatures.py b/common-primitives/tests/test_compute_metafeatures.py deleted file mode 100644 index 07a1e4c..0000000 --- a/common-primitives/tests/test_compute_metafeatures.py +++ /dev/null @@ -1,1106 +0,0 @@ -import math -import os -import os.path -import unittest - -import numpy - -from d3m import container -from d3m.metadata import base as metadata_base - -from common_primitives import column_parser, compute_metafeatures, dataset_to_dataframe, denormalize - -import utils as test_utils - - -def round_to_significant_digits(x, n): - if x == 0: - return x - elif not numpy.isfinite(x): - return x - else: - return round(x, -int(math.floor(math.log10(abs(x)))) + (n - 1)) - - -def round_numbers(obj): - if isinstance(obj, (int, str)): - return obj - elif isinstance(obj, float): - return round_to_significant_digits(obj, 12) - elif isinstance(obj, list): - return [round_numbers(el) for el in obj] - elif isinstance(obj, tuple): - return tuple(round_numbers(el) for el in obj) - elif isinstance(obj, dict): - return {k: round_numbers(v) for k, v in obj.items()} - else: - return obj - - -class ComputeMetafeaturesPrimitiveTestCase(unittest.TestCase): - def _get_iris(self): - dataset_doc_path = os.path.abspath(os.path.join(os.path.dirname(__file__), 'data', 'datasets', 'iris_dataset_1', 'datasetDoc.json')) - - dataset = container.Dataset.load('file://{dataset_doc_path}'.format(dataset_doc_path=dataset_doc_path)) - - # We set semantic types like runtime would. - dataset.metadata = dataset.metadata.add_semantic_type(('learningData', metadata_base.ALL_ELEMENTS, 5), 'https://metadata.datadrivendiscovery.org/types/Target') - dataset.metadata = dataset.metadata.add_semantic_type(('learningData', metadata_base.ALL_ELEMENTS, 5), 'https://metadata.datadrivendiscovery.org/types/TrueTarget') - dataset.metadata = dataset.metadata.remove_semantic_type(('learningData', metadata_base.ALL_ELEMENTS, 5), 'https://metadata.datadrivendiscovery.org/types/Attribute') - - dataframe_hyperparams_class = dataset_to_dataframe.DatasetToDataFramePrimitive.metadata.get_hyperparams() - dataframe_primitive = dataset_to_dataframe.DatasetToDataFramePrimitive(hyperparams=dataframe_hyperparams_class.defaults()) - dataframe = dataframe_primitive.produce(inputs=dataset).value - - column_parser_hyperparams_class = column_parser.ColumnParserPrimitive.metadata.get_hyperparams() - column_parser_primitive = column_parser.ColumnParserPrimitive(hyperparams=column_parser_hyperparams_class.defaults()) - dataframe = column_parser_primitive.produce(inputs=dataframe).value - - return dataframe - - def _get_database(self, parse_categorical_columns): - dataset_doc_path = os.path.abspath(os.path.join(os.path.dirname(__file__), 'data', 'datasets', 'database_dataset_1', 'datasetDoc.json')) - - dataset = container.Dataset.load('file://{dataset_doc_path}'.format(dataset_doc_path=dataset_doc_path)) - - # We set semantic types like runtime would. - dataset.metadata = dataset.metadata.add_semantic_type(('learningData', metadata_base.ALL_ELEMENTS, 4), 'https://metadata.datadrivendiscovery.org/types/Target') - dataset.metadata = dataset.metadata.add_semantic_type(('learningData', metadata_base.ALL_ELEMENTS, 4), 'https://metadata.datadrivendiscovery.org/types/TrueTarget') - dataset.metadata = dataset.metadata.remove_semantic_type(('learningData', metadata_base.ALL_ELEMENTS, 4), 'https://metadata.datadrivendiscovery.org/types/Attribute') - - denormalize_hyperparams_class = denormalize.DenormalizePrimitive.metadata.get_hyperparams() - denormalize_primitive = denormalize.DenormalizePrimitive(hyperparams=denormalize_hyperparams_class.defaults()) - dataset = denormalize_primitive.produce(inputs=dataset).value - - dataframe_hyperparams_class = dataset_to_dataframe.DatasetToDataFramePrimitive.metadata.get_hyperparams() - dataframe_primitive = dataset_to_dataframe.DatasetToDataFramePrimitive(hyperparams=dataframe_hyperparams_class.defaults()) - dataframe = dataframe_primitive.produce(inputs=dataset).value - - if parse_categorical_columns: - parse_semantic_types = ( - 'http://schema.org/Boolean', 'https://metadata.datadrivendiscovery.org/types/CategoricalData', - 'http://schema.org/Integer', 'http://schema.org/Float', - 'https://metadata.datadrivendiscovery.org/types/FloatVector', 'http://schema.org/DateTime', - ) - else: - parse_semantic_types = ( - 'http://schema.org/Boolean', - 'http://schema.org/Integer', 'http://schema.org/Float', - 'https://metadata.datadrivendiscovery.org/types/FloatVector', 'http://schema.org/DateTime', - ) - - column_parser_hyperparams_class = column_parser.ColumnParserPrimitive.metadata.get_hyperparams() - column_parser_primitive = column_parser.ColumnParserPrimitive(hyperparams=column_parser_hyperparams_class.defaults().replace({'parse_semantic_types': parse_semantic_types})) - dataframe = column_parser_primitive.produce(inputs=dataframe).value - - return dataframe - - def test_iris(self): - self.maxDiff = None - - dataframe = self._get_iris() - - hyperparams_class = compute_metafeatures.ComputeMetafeaturesPrimitive.metadata.get_hyperparams() - primitive = compute_metafeatures.ComputeMetafeaturesPrimitive(hyperparams=hyperparams_class.defaults()) - dataframe = primitive.produce(inputs=dataframe).value - - self.assertEqual(round_numbers(test_utils.convert_through_json(dataframe.metadata.query(())['data_metafeatures'])), round_numbers({ - 'attribute_counts_by_semantic_type': { - 'http://schema.org/Float': 4, - 'https://metadata.datadrivendiscovery.org/types/Attribute': 4, - }, - 'attribute_counts_by_structural_type': { - 'float': 4, - }, - 'attribute_ratios_by_semantic_type': { - 'http://schema.org/Float': 1.0, - 'https://metadata.datadrivendiscovery.org/types/Attribute': 1.0, - }, - 'attribute_ratios_by_structural_type': { - 'float': 1.0, - }, - 'dimensionality': 0.02666666666666667, - 'entropy_of_attributes': { - 'count': 4, - 'kurtosis': -1.4343159590314425, - 'max': 1.525353510619575, - 'mean': 1.4166844257365265, - 'median': 1.4323995290219738, - 'min': 1.2765851342825842, - 'quartile_1': 1.3565647450899858, - 'quartile_3': 1.4925192096685145, - 'skewness': -0.6047691718752254, - 'std': 0.11070539686522164, - }, - 'entropy_of_numeric_attributes': { - 'count': 4, - 'kurtosis': -1.4343159590314425, - 'max': 1.525353510619575, - 'mean': 1.4166844257365265, - 'median': 1.4323995290219738, - 'min': 1.2765851342825842, - 'quartile_1': 1.3565647450899858, - 'quartile_3': 1.4925192096685145, - 'skewness': -0.6047691718752254, - 'std': 0.11070539686522164, - }, - 'kurtosis_of_attributes': { - 'count': 4, - 'kurtosis': -1.1515850633224236, - 'max': 0.2907810623654279, - 'mean': -0.7507394876837397, - 'median': -0.9459091062274914, - 'min': -1.4019208006454036, - 'quartile_1': -1.3552958285158583, - 'quartile_3': -0.3413527653953726, - 'skewness': 0.8725328682893572, - 'std': 0.7948191385132984, - }, - 'mean_of_attributes': { - 'count': 4, - 'kurtosis': 0.8595879081956515, - 'max': 5.843333333333335, - 'mean': 3.4636666666666684, - 'median': 3.406333333333335, - 'min': 1.1986666666666672, - 'quartile_1': 2.5901666666666676, - 'quartile_3': 4.279833333333335, - 'skewness': 0.17098811780721151, - 'std': 1.919017997329383, - }, - 'number_distinct_values_of_numeric_attributes': { - 'count': 4, - 'kurtosis': -3.0617196548227046, - 'max': 43, - 'mean': 30.75, - 'median': 29.0, - 'min': 22, - 'quartile_1': 22.75, - 'quartile_3': 37.0, - 'skewness': 0.5076458131399395, - 'std': 10.07885575516057, - }, - 'number_of_attributes': 4, - 'number_of_binary_attributes': 0, - 'number_of_categorical_attributes': 0, - 'number_of_discrete_attributes': 0, - 'number_of_instances': 150, - 'number_of_instances_with_missing_values': 0, - 'number_of_instances_with_present_values': 150, - 'number_of_numeric_attributes': 4, - 'number_of_other_attributes': 0, - 'number_of_string_attributes': 0, - 'ratio_of_binary_attributes': 0.0, - 'ratio_of_categorical_attributes': 0.0, - 'ratio_of_discrete_attributes': 0.0, - 'ratio_of_instances_with_missing_values': 0.0, - 'ratio_of_instances_with_present_values': 1.0, - 'ratio_of_numeric_attributes': 1.0, - 'ratio_of_other_attributes': 0.0, - 'ratio_of_string_attributes': 0.0, - 'skew_of_attributes': { - 'count': 4, - 'kurtosis': -4.4981774675194846, - 'max': 0.3340526621720866, - 'mean': 0.06737570104778733, - 'median': 0.10495719724642275, - 'min': -0.27446425247378287, - 'quartile_1': -0.1473634847265412, - 'quartile_3': 0.3196963830207513, - 'skewness': -0.25709026597426626, - 'std': 0.3049355425307816, - }, - 'standard_deviation_of_attributes': { - 'count': 4, - 'kurtosis': 2.65240266862979, - 'max': 1.7644204199522617, - 'mean': 0.9473104002482848, - 'median': 0.7956134348393522, - 'min': 0.4335943113621737, - 'quartile_1': 0.6807691341161745, - 'quartile_3': 1.0621547009714627, - 'skewness': 1.4362343455338735, - 'std': 0.5714610798918619, - } - })) - self.assertFalse('data_metafeatures' in dataframe.metadata.query_column(0)) - self.assertEqual(round_numbers(test_utils.convert_through_json(dataframe.metadata.query_column(1)['data_metafeatures'])), round_numbers({ - 'entropy_of_values': 1.525353510619575, - 'number_distinct_values': 35, - 'number_of_missing_values': 0, - 'number_of_negative_numeric_values': 0, - 'number_of_numeric_values': 150, - 'number_of_numeric_values_equal_-1': 0, - 'number_of_numeric_values_equal_0': 0, - 'number_of_numeric_values_equal_1': 0, - 'number_of_positive_numeric_values': 150, - 'number_of_present_values': 150, - 'ratio_of_missing_values': 0.0, - 'ratio_of_negative_numeric_values': 0.0, - 'ratio_of_numeric_values': 1.0, - 'ratio_of_numeric_values_equal_-1': 0.0, - 'ratio_of_numeric_values_equal_0': 0.0, - 'ratio_of_numeric_values_equal_1': 0.0, - 'ratio_of_positive_numeric_values': 1.0, - 'ratio_of_present_values': 1.0, - 'value_counts_aggregate': { - 'count': 5, - 'kurtosis': -0.46949652355057747, - 'max': 42, - 'mean': 30.0, - 'median': 32.0, - 'min': 11, - 'quartile_1': 24.0, - 'quartile_3': 41.0, - 'skewness': -0.7773115383470599, - 'std': 12.90348790056394, - }, - 'value_probabilities_aggregate': { - 'count': 5, - 'kurtosis': -0.4694965235505757, - 'max': 0.28, - 'mean': 0.2, - 'median': 0.21333333333333335, - 'min': 0.07333333333333333, - 'quartile_1': 0.16, - 'quartile_3': 0.2733333333333333, - 'skewness': -0.7773115383470603, - 'std': 0.08602325267042626, - }, - 'values_aggregate': { - 'count': 150, - 'kurtosis': -0.5520640413156395, - 'max': 7.9, - 'mean': 5.843333333333335, - 'median': 5.8, - 'min': 4.3, - 'quartile_1': 5.1, - 'quartile_3': 6.4, - 'skewness': 0.3149109566369728, - 'std': 0.8280661279778629, - }, - })) - self.assertEqual(round_numbers(test_utils.convert_through_json(dataframe.metadata.query_column(2)['data_metafeatures'])), round_numbers({ - 'entropy_of_values': 1.2765851342825842, - 'number_distinct_values': 23, - 'number_of_missing_values': 0, - 'number_of_negative_numeric_values': 0, - 'number_of_numeric_values': 150, - 'number_of_numeric_values_equal_-1': 0, - 'number_of_numeric_values_equal_0': 0, - 'number_of_numeric_values_equal_1': 0, - 'number_of_positive_numeric_values': 150, - 'number_of_present_values': 150, - 'ratio_of_missing_values': 0.0, - 'ratio_of_negative_numeric_values': 0.0, - 'ratio_of_numeric_values': 1.0, - 'ratio_of_numeric_values_equal_-1': 0.0, - 'ratio_of_numeric_values_equal_0': 0.0, - 'ratio_of_numeric_values_equal_1': 0.0, - 'ratio_of_positive_numeric_values': 1.0, - 'ratio_of_present_values': 1.0, - 'value_counts_aggregate': { - 'count': 5, - 'kurtosis': -0.9899064888741496, - 'max': 69, - 'mean': 30.0, - 'median': 20.0, - 'min': 4, - 'quartile_1': 11.0, - 'quartile_3': 46.0, - 'skewness': 0.8048211570183503, - 'std': 26.99073915253156, - }, - 'value_probabilities_aggregate': { - 'count': 5, - 'kurtosis': -0.9899064888741478, - 'max': 0.46, - 'mean': 0.19999999999999998, - 'median': 0.13333333333333333, - 'min': 0.02666666666666667, - 'quartile_1': 0.07333333333333333, - 'quartile_3': 0.30666666666666664, - 'skewness': 0.8048211570183509, - 'std': 0.17993826101687704, - }, - 'values_aggregate': { - 'count': 150, - 'kurtosis': 0.2907810623654279, - 'max': 4.4, - 'mean': 3.0540000000000007, - 'median': 3.0, - 'min': 2.0, - 'quartile_1': 2.8, - 'quartile_3': 3.3, - 'skewness': 0.3340526621720866, - 'std': 0.4335943113621737, - }, - })) - self.assertEqual(round_numbers(test_utils.convert_through_json(dataframe.metadata.query_column(3)['data_metafeatures'])), round_numbers({ - 'entropy_of_values': 1.38322461535912, - 'number_distinct_values': 43, - 'number_of_missing_values': 0, - 'number_of_negative_numeric_values': 0, - 'number_of_numeric_values': 150, - 'number_of_numeric_values_equal_-1': 0, - 'number_of_numeric_values_equal_0': 0, - 'number_of_numeric_values_equal_1': 1, - 'number_of_positive_numeric_values': 150, - 'number_of_present_values': 150, - 'ratio_of_missing_values': 0.0, - 'ratio_of_negative_numeric_values': 0.0, - 'ratio_of_numeric_values': 1.0, - 'ratio_of_numeric_values_equal_-1': 0.0, - 'ratio_of_numeric_values_equal_0': 0.0, - 'ratio_of_numeric_values_equal_1': 0.006666666666666667, - 'ratio_of_positive_numeric_values': 1.0, - 'ratio_of_present_values': 1.0, - 'value_counts_aggregate': { - 'count': 5, - 'kurtosis': -1.875313335089766, - 'max': 50, - 'mean': 30.0, - 'median': 34.0, - 'min': 3, - 'quartile_1': 16.0, - 'quartile_3': 47.0, - 'skewness': -0.4786622161186872, - 'std': 20.18662923818635, - }, - 'value_probabilities_aggregate': { - 'count': 5, - 'kurtosis': -1.8753133350897668, - 'max': 0.3333333333333333, - 'mean': 0.2, - 'median': 0.22666666666666666, - 'min': 0.02, - 'quartile_1': 0.10666666666666667, - 'quartile_3': 0.31333333333333335, - 'skewness': -0.4786622161186876, - 'std': 0.13457752825457567, - }, - 'values_aggregate': { - 'count': 150, - 'kurtosis': -1.4019208006454036, - 'max': 6.9, - 'mean': 3.7586666666666693, - 'median': 4.35, - 'min': 1.0, - 'quartile_1': 1.6, - 'quartile_3': 5.1, - 'skewness': -0.27446425247378287, - 'std': 1.7644204199522617, - }, - })) - self.assertEqual(round_numbers(test_utils.convert_through_json(dataframe.metadata.query_column(4)['data_metafeatures'])), round_numbers({ - 'entropy_of_values': 1.4815744426848276, - 'number_distinct_values': 22, - 'number_of_missing_values': 0, - 'number_of_negative_numeric_values': 0, - 'number_of_numeric_values': 150, - 'number_of_numeric_values_equal_-1': 0, - 'number_of_numeric_values_equal_0': 0, - 'number_of_numeric_values_equal_1': 7, - 'number_of_positive_numeric_values': 150, - 'number_of_present_values': 150, - 'ratio_of_missing_values': 0.0, - 'ratio_of_negative_numeric_values': 0.0, - 'ratio_of_numeric_values': 1.0, - 'ratio_of_numeric_values_equal_-1': 0.0, - 'ratio_of_numeric_values_equal_0': 0.0, - 'ratio_of_numeric_values_equal_1': 0.04666666666666667, - 'ratio_of_positive_numeric_values': 1.0, - 'ratio_of_present_values': 1.0, - 'value_counts_aggregate': { - 'count': 5, - 'kurtosis': -0.6060977121954245, - 'max': 49, - 'mean': 30.0, - 'median': 29.0, - 'min': 8, - 'quartile_1': 23.0, - 'quartile_3': 41.0, - 'skewness': -0.28840734350346464, - 'std': 15.937377450509228, - }, - 'value_probabilities_aggregate': { - 'count': 5, - 'kurtosis': -0.606097712195421, - 'max': 0.32666666666666666, - 'mean': 0.2, - 'median': 0.19333333333333333, - 'min': 0.05333333333333334, - 'quartile_1': 0.15333333333333332, - 'quartile_3': 0.2733333333333333, - 'skewness': -0.2884073435034653, - 'std': 0.10624918300339484, - }, - 'values_aggregate': { - 'count': 150, - 'kurtosis': -1.3397541711393433, - 'max': 2.5, - 'mean': 1.1986666666666672, - 'median': 1.3, - 'min': 0.1, - 'quartile_1': 0.3, - 'quartile_3': 1.8, - 'skewness': -0.10499656214412734, - 'std': 0.7631607417008414, - }, - })) - self.assertEqual(round_numbers(test_utils.convert_through_json(dataframe.metadata.query_column(5)['data_metafeatures'])), round_numbers({ - 'default_accuracy': 0.3333333333333333, - 'entropy_of_values': 1.0986122886681096, - 'equivalent_number_of_numeric_attributes': 1.7538156960944151, - 'joint_entropy_of_attributes': { - 'count': 4, - 'kurtosis': -4.468260105522818, - 'max': 0.9180949375453917, - 'mean': 0.6264126219845205, - 'median': 0.6607409495199184, - 'min': 0.26607365135285327, - 'quartile_1': 0.3993550878466134, - 'quartile_3': 0.8877984836578254, - 'skewness': -0.24309705749856694, - 'std': 0.3221913428169348, - }, - 'joint_entropy_of_numeric_attributes': { - 'count': 4, - 'kurtosis': -5.533056612798099, - 'max': 2.1801835659431514, - 'mean': 1.8888840924201158, - 'median': 1.8856077827026931, - 'min': 1.604137238331926, - 'quartile_1': 1.6476031549386407, - 'quartile_3': 2.1268887201841684, - 'skewness': 0.01639056780792744, - 'std': 0.29770030633854977, - }, - 'mutual_information_of_numeric_attributes': { - 'count': 4, - 'kurtosis': -4.468260105522818, - 'max': 0.9180949375453917, - 'mean': 0.6264126219845205, - 'median': 0.6607409495199184, - 'min': 0.26607365135285327, - 'quartile_1': 0.3993550878466134, - 'quartile_3': 0.8877984836578254, - 'skewness': -0.24309705749856694, - 'std': 0.3221913428169348, - }, - 'number_distinct_values': 3, - 'number_of_missing_values': 0, - 'number_of_present_values': 150, - 'numeric_noise_to_signal_ratio': 1.2615834611511623, - 'ratio_of_missing_values': 0.0, - 'ratio_of_present_values': 1.0, - 'value_counts_aggregate': { - 'count': 3, - 'max': 50, - 'mean': 50.0, - 'median': 50.0, - 'min': 50, - 'quartile_1': 50.0, - 'quartile_3': 50.0, - 'skewness': 0, - 'std': 0.0, - }, - 'value_probabilities_aggregate': { - 'count': 3, - 'max': 0.3333333333333333, - 'mean': 0.3333333333333333, - 'median': 0.3333333333333333, - 'min': 0.3333333333333333, - 'quartile_1': 0.3333333333333333, - 'quartile_3': 0.3333333333333333, - 'skewness': 0, - 'std': 0.0, - }, - })) - - def test_database_with_parsed_categorical_columns(self): - self.maxDiff = None - - dataframe = self._get_database(True) - - hyperparams_class = compute_metafeatures.ComputeMetafeaturesPrimitive.metadata.get_hyperparams() - primitive = compute_metafeatures.ComputeMetafeaturesPrimitive(hyperparams=hyperparams_class.defaults()) - dataframe = primitive.produce(inputs=dataframe).value - - self._test_database_metafeatures(dataframe.metadata, True) - - def test_database_without_parsed_categorical_columns(self): - self.maxDiff = None - - dataframe = self._get_database(False) - - hyperparams_class = compute_metafeatures.ComputeMetafeaturesPrimitive.metadata.get_hyperparams() - primitive = compute_metafeatures.ComputeMetafeaturesPrimitive(hyperparams=hyperparams_class.defaults()) - dataframe = primitive.produce(inputs=dataframe).value - - self._test_database_metafeatures(dataframe.metadata, False) - - def _test_database_metafeatures(self, metadata, parse_categorical_columns): - expected_metafeatures = { - 'attribute_counts_by_semantic_type': { - 'http://schema.org/DateTime': 1, - 'http://schema.org/Integer': 1, - 'http://schema.org/Text': 2, - 'https://metadata.datadrivendiscovery.org/types/Attribute': 6, - 'https://metadata.datadrivendiscovery.org/types/CategoricalData': 2, - }, - 'attribute_counts_by_structural_type': { - 'float': 2, - 'str': 4, - }, - 'attribute_ratios_by_semantic_type': { - 'http://schema.org/DateTime': 0.16666666666666666, - 'http://schema.org/Integer': 0.16666666666666666, - 'http://schema.org/Text': 0.3333333333333333, - 'https://metadata.datadrivendiscovery.org/types/Attribute': 1.0, - 'https://metadata.datadrivendiscovery.org/types/CategoricalData': 0.3333333333333333, - }, - 'attribute_ratios_by_structural_type': { - 'float': 0.3333333333333333, - 'str': 0.6666666666666666, - }, - 'dimensionality': 0.13333333333333333, - 'entropy_of_attributes': { - 'count': 4, - 'kurtosis': 1.5975414707531783, - 'max': 1.6094379124341005, - 'mean': 1.1249524175825663, - 'median': 1.0986122886681096, - 'min': 0.6931471805599453, - 'quartile_1': 0.9972460116410685, - 'quartile_3': 1.2263186946096072, - 'skewness': 0.4183300365459641, - 'std': 0.3753085673700856, - }, - 'entropy_of_categorical_attributes': { - 'count': 2, - 'max': 1.6094379124341005, - 'mean': 1.354025100551105, - 'median': 1.354025100551105, - 'min': 1.0986122886681096, - 'quartile_1': 1.2263186946096072, - 'quartile_3': 1.4817315064926029, - 'std': 0.3612082625687802, - }, - 'entropy_of_discrete_attributes': { - 'count': 2, - 'max': 1.0986122886681096, - 'mean': 0.8958797346140275, - 'median': 0.8958797346140275, - 'min': 0.6931471805599453, - 'quartile_1': 0.7945134575869863, - 'quartile_3': 0.9972460116410685, - 'std': 0.28670712747781957, - }, - 'entropy_of_numeric_attributes': { - 'count': 2, - 'max': 1.0986122886681096, - 'mean': 0.8958797346140275, - 'median': 0.8958797346140275, - 'min': 0.6931471805599453, - 'quartile_1': 0.7945134575869863, - 'quartile_3': 0.9972460116410685, - 'std': 0.28670712747781957, - }, - 'kurtosis_of_attributes': { - 'count': 2, - 'max': -1.5348837209302326, - 'mean': -1.8415159345391905, - 'median': -1.8415159345391905, - 'min': -2.1481481481481484, - 'quartile_1': -1.9948320413436693, - 'quartile_3': -1.6881998277347114, - 'std': 0.4336434351462721, - }, - 'mean_of_attributes': { - 'count': 2, - 'max': 946713600.0, - 'mean': 473356800.75, - 'median': 473356800.75, - 'min': 1.5, - 'quartile_1': 236678401.125, - 'quartile_3': 710035200.375, - 'std': 669427605.3408685, - }, - 'number_distinct_values_of_categorical_attributes': { - 'count': 2, - 'max': 5, - 'mean': 4.0, - 'median': 4.0, - 'min': 3, - 'quartile_1': 3.5, - 'quartile_3': 4.5, - 'std': 1.4142135623730951, - }, - 'number_distinct_values_of_discrete_attributes': { - 'count': 2, - 'max': 3, - 'mean': 2.5, - 'median': 2.5, - 'min': 2, - 'quartile_1': 2.25, - 'quartile_3': 2.75, - 'std': 0.7071067811865476, - }, - 'number_distinct_values_of_numeric_attributes': { - 'count': 2, - 'max': 3, - 'mean': 2.5, - 'median': 2.5, - 'min': 2, - 'quartile_1': 2.25, - 'quartile_3': 2.75, - 'std': 0.7071067811865476, - }, - 'number_of_attributes': 6, - 'number_of_binary_attributes': 1, - 'number_of_categorical_attributes': 2, - 'number_of_discrete_attributes': 2, - 'number_of_instances': 45, - 'number_of_instances_with_missing_values': 15, - 'number_of_instances_with_present_values': 45, - 'number_of_numeric_attributes': 2, - 'number_of_other_attributes': 0, - 'number_of_string_attributes': 2, - 'ratio_of_binary_attributes': 0.16666666666666666, - 'ratio_of_categorical_attributes': 0.3333333333333333, - 'ratio_of_discrete_attributes': 0.3333333333333333, - 'ratio_of_instances_with_missing_values': 0.3333333333333333, - 'ratio_of_instances_with_present_values': 1.0, - 'ratio_of_numeric_attributes': 0.3333333333333333, - 'ratio_of_other_attributes': 0.0, - 'ratio_of_string_attributes': 0.3333333333333333, - 'skew_of_attributes': { - 'count': 2, - 'max': 0.00017349603091112943, - 'mean': 8.674801545556472e-05, - 'median': 8.674801545556472e-05, - 'min': 0.0, - 'quartile_1': 4.337400772778236e-05, - 'quartile_3': 0.00013012202318334707, - 'std': 0.0001226802199662105, - }, - 'standard_deviation_of_attributes': { - 'count': 2, - 'max': 260578306.67149138, - 'mean': 130289153.59001951, - 'median': 130289153.59001951, - 'min': 0.5085476277156078, - 'quartile_1': 65144577.049283564, - 'quartile_3': 195433730.13075545, - 'std': 184256687.31792185, - }, - } - - if parse_categorical_columns: - expected_metafeatures['attribute_counts_by_structural_type'] = { - 'float': 2, - 'int': 2, - 'str': 2, - } - expected_metafeatures['attribute_ratios_by_structural_type'] = { - 'float': 0.3333333333333333, - 'int': 0.3333333333333333, - 'str': 0.3333333333333333, - } - - self.assertEqual(round_numbers(test_utils.convert_through_json(metadata.query(())['data_metafeatures'])), round_numbers(expected_metafeatures)) - self.assertFalse('data_metafeatures' in metadata.query_column(0)) - - expected_metafeatures = { - 'entropy_of_values': 1.0986122886681096, - 'number_distinct_values': 3, - 'number_of_missing_values': 0, - 'number_of_present_values': 45, - 'ratio_of_missing_values': 0.0, - 'ratio_of_present_values': 1.0, - 'value_counts_aggregate': { - 'count': 3, - 'max': 15, - 'mean': 15.0, - 'median': 15.0, - 'min': 15, - 'quartile_1': 15.0, - 'quartile_3': 15.0, - 'skewness': 0, - 'std': 0.0, - }, - 'value_probabilities_aggregate': { - 'count': 3, - 'max': 0.3333333333333333, - 'mean': 0.3333333333333333, - 'median': 0.3333333333333333, - 'min': 0.3333333333333333, - 'quartile_1': 0.3333333333333333, - 'quartile_3': 0.3333333333333333, - 'skewness': 0, - 'std': 0.0, - }, - } - - if parse_categorical_columns: - expected_metafeatures['values_aggregate'] = { - 'count': 45, - 'kurtosis': -1.5348837209302337, - 'max': 3183890296585507471, - 'mean': 1.3152606765673695e+18, - 'median': 5.866629697275507e+17, - 'min': 175228763389048878, - 'quartile_1': 1.7522876338904886e+17, - 'quartile_3': 3.1838902965855073e+18, - 'skewness': 0.679711376572956, - 'std': 1.3470047628846746e+18, - } - - self.assertEqual(round_numbers(test_utils.convert_through_json(metadata.query_column(1)['data_metafeatures'])), round_numbers(expected_metafeatures)) - self.assertEqual(round_numbers(test_utils.convert_through_json(metadata.query_column(2)['data_metafeatures'])), round_numbers({ - 'number_of_missing_values': 0, - 'number_of_present_values': 45, - 'ratio_of_missing_values': 0.0, - 'ratio_of_present_values': 1.0, - })) - self.assertEqual(round_numbers(test_utils.convert_through_json(metadata.query_column(3)['data_metafeatures'])), round_numbers({ - 'entropy_of_values': 0.6931471805599453, - 'number_distinct_values': 2, - 'number_of_missing_values': 15, - 'number_of_negative_numeric_values': 0, - 'number_of_numeric_values': 30, - 'number_of_numeric_values_equal_-1': 0, - 'number_of_numeric_values_equal_0': 0, - 'number_of_numeric_values_equal_1': 15, - 'number_of_positive_numeric_values': 30, - 'number_of_present_values': 30, - 'ratio_of_missing_values': 0.3333333333333333, - 'ratio_of_negative_numeric_values': 0.0, - 'ratio_of_numeric_values': 0.6666666666666666, - 'ratio_of_numeric_values_equal_-1': 0.0, - 'ratio_of_numeric_values_equal_0': 0.0, - 'ratio_of_numeric_values_equal_1': 0.3333333333333333, - 'ratio_of_positive_numeric_values': 0.6666666666666666, - 'ratio_of_present_values': 0.6666666666666666, - 'value_counts_aggregate': { - 'count': 2, - 'max': 15, - 'mean': 15.0, - 'median': 15.0, - 'min': 15, - 'quartile_1': 15.0, - 'quartile_3': 15.0, - 'std': 0.0, - }, - 'value_probabilities_aggregate': { - 'count': 2, - 'max': 0.5, - 'mean': 0.5, - 'median': 0.5, - 'min': 0.5, - 'quartile_1': 0.5, - 'quartile_3': 0.5, - 'std': 0.0, - }, - 'values_aggregate': { - 'count': 30, - 'kurtosis': -2.1481481481481484, - 'max': 2.0, - 'mean': 1.5, - 'median': 1.5, - 'min': 1.0, - 'quartile_1': 1.0, - 'quartile_3': 2.0, - 'skewness': 0.0, - 'std': 0.5085476277156078, - }, - })) - self.assertEqual(round_numbers(test_utils.convert_through_json(metadata.query_column(4)['data_metafeatures'])), round_numbers({ - 'number_of_missing_values': 0, - 'number_of_present_values': 45, - 'ratio_of_missing_values': 0.0, - 'ratio_of_present_values': 1.0, - })) - - expected_metafeatures = { - 'entropy_of_values': 1.6094379124341005, - 'number_distinct_values': 5, - 'number_of_missing_values': 0, - 'number_of_present_values': 45, - 'ratio_of_missing_values': 0.0, - 'ratio_of_present_values': 1.0, - 'value_counts_aggregate': { - 'count': 5, - 'kurtosis': 0, - 'max': 9, - 'mean': 9.0, - 'median': 9.0, - 'min': 9, - 'quartile_1': 9.0, - 'quartile_3': 9.0, - 'skewness': 0, - 'std': 0.0, - }, - 'value_probabilities_aggregate': { - 'count': 5, - 'kurtosis': 0, - 'max': 0.2, - 'mean': 0.2, - 'median': 0.2, - 'min': 0.2, - 'quartile_1': 0.2, - 'quartile_3': 0.2, - 'skewness': 0, - 'std': 0.0, - }, - } - - if parse_categorical_columns: - expected_metafeatures['values_aggregate'] = { - 'count': 45, - 'kurtosis': -0.8249445297886884, - 'max': 17926897368031380755, - 'mean': 1.1617029581691474e+19, - 'median': 1.1818891258207388e+19, - 'min': 4819821729471251610, - 'quartile_1': 9.804127312560234e+18, - 'quartile_3': 1.3715410240187093e+19, - 'skewness': -0.15176089654708094, - 'std': 4.378987201456074e+18, - } - - self.assertEqual(round_numbers(test_utils.convert_through_json(metadata.query_column(5)['data_metafeatures'])), round_numbers(expected_metafeatures)) - self.assertEqual(round_numbers(test_utils.convert_through_json(metadata.query_column(6)['data_metafeatures'])), round_numbers({ - 'entropy_of_values': 1.0986122886681096, - 'number_distinct_values': 3, - 'number_of_missing_values': 0, - 'number_of_negative_numeric_values': 0, - 'number_of_numeric_values': 45, - 'number_of_numeric_values_equal_-1': 0, - 'number_of_numeric_values_equal_0': 0, - 'number_of_numeric_values_equal_1': 0, - 'number_of_positive_numeric_values': 45, - 'number_of_present_values': 45, - 'ratio_of_missing_values': 0.0, - 'ratio_of_negative_numeric_values': 0.0, - 'ratio_of_numeric_values': 1.0, - 'ratio_of_numeric_values_equal_-1': 0.0, - 'ratio_of_numeric_values_equal_0': 0.0, - 'ratio_of_numeric_values_equal_1': 0.0, - 'ratio_of_positive_numeric_values': 1.0, - 'ratio_of_present_values': 1.0, - 'value_counts_aggregate': { - 'count': 3, - 'max': 15, - 'mean': 15.0, - 'median': 15.0, - 'min': 15, - 'quartile_1': 15.0, - 'quartile_3': 15.0, - 'skewness': 0, - 'std': 0.0, - }, - 'value_probabilities_aggregate': { - 'count': 3, - 'max': 0.3333333333333333, - 'mean': 0.3333333333333333, - 'median': 0.3333333333333333, - 'min': 0.3333333333333333, - 'quartile_1': 0.3333333333333333, - 'quartile_3': 0.3333333333333333, - 'skewness': 0, - 'std': 0.0, - }, - 'values_aggregate': { - 'count': 45, - 'kurtosis': -1.5348837209302326, - 'max': 1262304000.0, - 'mean': 946713600.0, - 'median': 946684800.0, - 'min': 631152000.0, - 'quartile_1': 631152000.0, - 'quartile_3': 1262304000.0, - 'skewness': 0.00017349603091112943, - 'std': 260578306.67149138, - }, - })) - - expected_metafeatures = { - 'categorical_noise_to_signal_ratio': 6.856024896846719, - 'discrete_noise_to_signal_ratio': 16.280596971377722, - 'entropy_of_values': 1.2922333886497557, - 'equivalent_number_of_attributes': 7.497510695804063, - 'equivalent_number_of_categorical_attributes': 7.497510695804063, - 'equivalent_number_of_discrete_attributes': 24.925850557201, - 'equivalent_number_of_numeric_attributes': 24.925850557201, - 'joint_entropy_of_attributes': { - 'count': 4, - 'kurtosis': 3.8310594212937232, - 'max': 0.27405736318703244, - 'mean': 0.11209904602421886, - 'median': 0.06401513288957879, - 'min': 0.04630855513068542, - 'quartile_1': 0.05461037397689525, - 'quartile_3': 0.12150380493690241, - 'skewness': 1.949786087429789, - 'std': 0.10842988984399864, - }, - 'joint_entropy_of_categorical_attributes': { - 'count': 2, - 'max': 2.6276139378968235, - 'mean': 2.473903498180581, - 'median': 2.473903498180581, - 'min': 2.3201930584643393, - 'quartile_1': 2.3970482783224605, - 'quartile_3': 2.5507587180387024, - 'std': 0.2173793885250416, - }, - 'joint_entropy_of_discrete_attributes': { - 'count': 2, - 'max': 2.3334680303922335, - 'mean': 2.139600733638498, - 'median': 2.139600733638498, - 'min': 1.945733436884763, - 'quartile_1': 2.0426670852616304, - 'quartile_3': 2.236534382015366, - 'std': 0.2741697603697419, - }, - 'joint_entropy_of_numeric_attributes': { - 'count': 2, - 'max': 2.3334680303922335, - 'mean': 2.139600733638498, - 'median': 2.139600733638498, - 'min': 1.945733436884763, - 'quartile_1': 2.0426670852616304, - 'quartile_3': 2.236534382015366, - 'std': 0.2741697603697419, - }, - 'mutual_information_of_attributes': { - 'count': 2, - 'max': 0.27405736318703244, - 'mean': 0.17235499102027907, - 'median': 0.17235499102027907, - 'min': 0.07065261885352572, - 'quartile_1': 0.12150380493690241, - 'quartile_3': 0.22320617710365576, - 'std': 0.1438288740437386, - }, - 'mutual_information_of_categorical_attributes': { - 'count': 2, - 'max': 0.27405736318703244, - 'mean': 0.17235499102027907, - 'median': 0.17235499102027907, - 'min': 0.07065261885352572, - 'quartile_1': 0.12150380493690241, - 'quartile_3': 0.22320617710365576, - 'std': 0.1438288740437386, - }, - 'mutual_information_of_discrete_attributes': { - 'count': 2, - 'max': 0.05737764692563185, - 'mean': 0.05184310102815864, - 'median': 0.05184310102815864, - 'min': 0.04630855513068542, - 'quartile_1': 0.049075828079422026, - 'quartile_3': 0.05461037397689525, - 'std': 0.007827029869782995, - }, - 'mutual_information_of_numeric_attributes': { - 'count': 2, - 'max': 0.05737764692563185, - 'mean': 0.05184310102815864, - 'median': 0.05184310102815864, - 'min': 0.04630855513068542, - 'quartile_1': 0.049075828079422026, - 'quartile_3': 0.05461037397689525, - 'std': 0.007827029869782995, - }, - 'noise_to_signal_ratio': 5.526950051885679, - 'number_distinct_values': 45, - 'number_of_missing_values': 0, - 'number_of_negative_numeric_values': 0, - 'number_of_numeric_values': 45, - 'number_of_numeric_values_equal_-1': 0, - 'number_of_numeric_values_equal_0': 0, - 'number_of_numeric_values_equal_1': 0, - 'number_of_positive_numeric_values': 45, - 'number_of_present_values': 45, - 'numeric_noise_to_signal_ratio': 16.280596971377722, - 'ratio_of_missing_values': 0.0, - 'ratio_of_negative_numeric_values': 0.0, - 'ratio_of_numeric_values': 1.0, - 'ratio_of_numeric_values_equal_-1': 0.0, - 'ratio_of_numeric_values_equal_0': 0.0, - 'ratio_of_numeric_values_equal_1': 0.0, - 'ratio_of_positive_numeric_values': 1.0, - 'ratio_of_present_values': 1.0, - 'value_counts_aggregate': { - 'count': 4, - 'kurtosis': 0.2795705816375573, - 'max': 19, - 'mean': 11.25, - 'median': 10.0, - 'min': 6, - 'quartile_1': 7.5, - 'quartile_3': 13.75, - 'skewness': 1.0126926768695854, - 'std': 5.737304826019502, - }, - 'value_probabilities_aggregate': { - 'count': 4, - 'kurtosis': 0.2795705816375609, - 'max': 0.4222222222222222, - 'mean': 0.25, - 'median': 0.2222222222222222, - 'min': 0.13333333333333333, - 'quartile_1': 0.16666666666666666, - 'quartile_3': 0.3055555555555556, - 'skewness': 1.0126926768695859, - 'std': 0.12749566280043337, - }, - 'values_aggregate': { - 'count': 45, - 'kurtosis': -1.376558337329924, - 'max': 70.8170731707317, - 'mean': 54.363425575007106, - 'median': 53.6699876392329, - 'min': 32.328512195122, - 'quartile_1': 45.648691933945, - 'quartile_3': 65.5693658536586, - 'skewness': -0.11742803570367141, - 'std': 11.607381033992365, - }, - } - - if parse_categorical_columns: - # Because the order of string values is different from the order of encoded values, - # the numbers are slightly different between parsed and not parsed cases. - expected_metafeatures['joint_entropy_of_categorical_attributes'] = { - 'count': 2, - 'max': 2.6276139378968226, - 'mean': 2.473903498180581, - 'median': 2.473903498180581, - 'min': 2.3201930584643393, - 'quartile_1': 2.39704827832246, - 'quartile_3': 2.550758718038702, - 'std': 0.217379388525041, - } - expected_metafeatures['joint_entropy_of_attributes'] = { - 'count': 4, - 'kurtosis': 3.8310594212937232, - 'max': 0.27405736318703244, - 'mean': 0.11209904602421886, - 'median': 0.06401513288957879, - 'min': 0.04630855513068542, - 'quartile_1': 0.05461037397689525, - 'quartile_3': 0.12150380493690241, - 'skewness': 1.949786087429789, - 'std': 0.10842988984399864, - } - - self.assertEqual(round_numbers(test_utils.convert_through_json(metadata.query_column(7)['data_metafeatures'])), round_numbers(expected_metafeatures)) - - -if __name__ == '__main__': - unittest.main() diff --git a/common-primitives/tests/test_construct_predictions.py b/common-primitives/tests/test_construct_predictions.py deleted file mode 100644 index 531d711..0000000 --- a/common-primitives/tests/test_construct_predictions.py +++ /dev/null @@ -1,233 +0,0 @@ -import copy -import os -import unittest - -import numpy - -from d3m import container -from d3m.metadata import base as metadata_base - -from common_primitives import dataset_to_dataframe, construct_predictions, extract_columns_semantic_types - -import utils as test_utils - - -class ConstructPredictionsPrimitiveTestCase(unittest.TestCase): - # TODO: Make this part of metadata API. - # Something like setting a semantic type for given columns. - def _mark_all_targets(self, dataset, targets): - for target in targets: - dataset.metadata = dataset.metadata.add_semantic_type((target['resource_id'], metadata_base.ALL_ELEMENTS, target['column_index']), 'https://metadata.datadrivendiscovery.org/types/Target') - dataset.metadata = dataset.metadata.add_semantic_type((target['resource_id'], metadata_base.ALL_ELEMENTS, target['column_index']), 'https://metadata.datadrivendiscovery.org/types/TrueTarget') - dataset.metadata = dataset.metadata.remove_semantic_type((target['resource_id'], metadata_base.ALL_ELEMENTS, target['column_index']), 'https://metadata.datadrivendiscovery.org/types/Attribute') - - def _get_iris_dataframe(self): - dataset_doc_path = os.path.abspath(os.path.join(os.path.dirname(__file__), 'data', 'datasets', 'iris_dataset_1', 'datasetDoc.json')) - - dataset = container.Dataset.load('file://{dataset_doc_path}'.format(dataset_doc_path=dataset_doc_path)) - - self._mark_all_targets(dataset, [{'resource_id': 'learningData', 'column_index': 5}]) - - hyperparams_class = dataset_to_dataframe.DatasetToDataFramePrimitive.metadata.get_hyperparams() - - primitive = dataset_to_dataframe.DatasetToDataFramePrimitive(hyperparams=hyperparams_class.defaults()) - - call_metadata = primitive.produce(inputs=dataset) - - dataframe = call_metadata.value - - return dataframe - - def test_correct_order(self): - dataframe = self._get_iris_dataframe() - - hyperparams_class = extract_columns_semantic_types.ExtractColumnsBySemanticTypesPrimitive.metadata.get_hyperparams() - - # We extract both the primary index and targets. So it is in the output format already. - primitive = extract_columns_semantic_types.ExtractColumnsBySemanticTypesPrimitive(hyperparams=hyperparams_class.defaults().replace({'semantic_types': ('https://metadata.datadrivendiscovery.org/types/PrimaryKey', 'https://metadata.datadrivendiscovery.org/types/Target',)})) - - call_metadata = primitive.produce(inputs=dataframe) - - targets = call_metadata.value - - # We pretend these are our predictions. - targets.metadata = targets.metadata.remove_semantic_type((metadata_base.ALL_ELEMENTS, 1), 'https://metadata.datadrivendiscovery.org/types/TrueTarget') - targets.metadata = targets.metadata.add_semantic_type((metadata_base.ALL_ELEMENTS, 1), 'https://metadata.datadrivendiscovery.org/types/PredictedTarget') - - # We switch columns around. - targets = targets.select_columns([1, 0]) - - hyperparams_class = construct_predictions.ConstructPredictionsPrimitive.metadata.get_hyperparams() - - construct_primitive = construct_predictions.ConstructPredictionsPrimitive(hyperparams=hyperparams_class.defaults()) - - call_metadata = construct_primitive.produce(inputs=targets, reference=dataframe) - - dataframe = call_metadata.value - - self.assertEqual(list(dataframe.columns), ['d3mIndex', 'species']) - - self._test_metadata(dataframe.metadata) - - def test_all_columns(self): - dataframe = self._get_iris_dataframe() - - # We use all columns. Output has to be just index and targets. - targets = copy.copy(dataframe) - - # We pretend these are our predictions. - targets.metadata = targets.metadata.remove_semantic_type((metadata_base.ALL_ELEMENTS, 5), 'https://metadata.datadrivendiscovery.org/types/TrueTarget') - targets.metadata = targets.metadata.add_semantic_type((metadata_base.ALL_ELEMENTS, 5), 'https://metadata.datadrivendiscovery.org/types/PredictedTarget') - - hyperparams_class = construct_predictions.ConstructPredictionsPrimitive.metadata.get_hyperparams() - - construct_primitive = construct_predictions.ConstructPredictionsPrimitive(hyperparams=hyperparams_class.defaults()) - - call_metadata = construct_primitive.produce(inputs=targets, reference=dataframe) - - dataframe = call_metadata.value - - self.assertEqual(list(dataframe.columns), ['d3mIndex', 'species']) - - self._test_metadata(dataframe.metadata) - - def test_missing_index(self): - dataframe = self._get_iris_dataframe() - - # We just use all columns. - targets = copy.copy(dataframe) - - # We pretend these are our predictions. - targets.metadata = targets.metadata.remove_semantic_type((metadata_base.ALL_ELEMENTS, 5), 'https://metadata.datadrivendiscovery.org/types/TrueTarget') - targets.metadata = targets.metadata.add_semantic_type((metadata_base.ALL_ELEMENTS, 5), 'https://metadata.datadrivendiscovery.org/types/PredictedTarget') - - # Remove primary index. This one has to be reconstructed. - targets = targets.remove_columns([0]) - - hyperparams_class = construct_predictions.ConstructPredictionsPrimitive.metadata.get_hyperparams() - - construct_primitive = construct_predictions.ConstructPredictionsPrimitive(hyperparams=hyperparams_class.defaults()) - - call_metadata = construct_primitive.produce(inputs=targets, reference=dataframe) - - dataframe = call_metadata.value - - self.assertEqual(list(dataframe.columns), ['d3mIndex', 'species']) - - self._test_metadata(dataframe.metadata) - - def test_just_targets_no_metadata(self): - dataframe = self._get_iris_dataframe() - - hyperparams_class = extract_columns_semantic_types.ExtractColumnsBySemanticTypesPrimitive.metadata.get_hyperparams() - - # We extract just targets. - primitive = extract_columns_semantic_types.ExtractColumnsBySemanticTypesPrimitive(hyperparams=hyperparams_class.defaults().replace({'semantic_types': ('https://metadata.datadrivendiscovery.org/types/Target',)})) - - call_metadata = primitive.produce(inputs=dataframe) - - targets = call_metadata.value - - # Remove all metadata. - targets.metadata = metadata_base.DataMetadata().generate(targets) - - hyperparams_class = construct_predictions.ConstructPredictionsPrimitive.metadata.get_hyperparams() - - construct_primitive = construct_predictions.ConstructPredictionsPrimitive(hyperparams=hyperparams_class.defaults()) - - call_metadata = construct_primitive.produce(inputs=targets, reference=dataframe) - - dataframe = call_metadata.value - - self.assertEqual(list(dataframe.columns), ['d3mIndex', 'species']) - - self._test_metadata(dataframe.metadata, True) - - def _test_metadata(self, metadata, no_metadata=False): - self.maxDiff = None - - self.assertEqual(test_utils.convert_through_json(metadata.query(())), { - 'schema': metadata_base.CONTAINER_SCHEMA_VERSION, - 'structural_type': 'd3m.container.pandas.DataFrame', - 'semantic_types': [ - 'https://metadata.datadrivendiscovery.org/types/Table', - ], - 'dimension': { - 'name': 'rows', - 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/TabularRow'], - 'length': 150, - } - }) - - self.assertEqual(test_utils.convert_through_json(metadata.query((metadata_base.ALL_ELEMENTS,))), { - 'dimension': { - 'name': 'columns', - 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/TabularColumn'], - 'length': 2, - } - }) - - self.assertEqual(test_utils.convert_through_json(metadata.query((metadata_base.ALL_ELEMENTS, 0))), { - 'name': 'd3mIndex', - 'structural_type': 'str', - 'semantic_types': [ - 'http://schema.org/Integer', - 'https://metadata.datadrivendiscovery.org/types/PrimaryKey', - ], - }) - - if no_metadata: - self.assertEqual(test_utils.convert_through_json(metadata.query((metadata_base.ALL_ELEMENTS, 1))), { - 'name': 'species', - 'structural_type': 'str', - 'semantic_types': [ - 'https://metadata.datadrivendiscovery.org/types/Target', - 'https://metadata.datadrivendiscovery.org/types/PredictedTarget', - ], - }) - - else: - self.assertEqual(test_utils.convert_through_json(metadata.query((metadata_base.ALL_ELEMENTS, 1))), { - 'name': 'species', - 'structural_type': 'str', - 'semantic_types': [ - 'https://metadata.datadrivendiscovery.org/types/CategoricalData', - 'https://metadata.datadrivendiscovery.org/types/SuggestedTarget', - 'https://metadata.datadrivendiscovery.org/types/Target', - 'https://metadata.datadrivendiscovery.org/types/PredictedTarget', - ], - }) - - def test_float_vector(self): - dataframe = container.DataFrame({ - 'd3mIndex': [0], - 'target': [container.ndarray(numpy.array([3,5,9,10]))], - }, generate_metadata=True) - - # Update metadata. - dataframe.metadata = dataframe.metadata.add_semantic_type((metadata_base.ALL_ELEMENTS, 0), 'https://metadata.datadrivendiscovery.org/types/PrimaryKey') - dataframe.metadata = dataframe.metadata.add_semantic_type((metadata_base.ALL_ELEMENTS, 1), 'https://metadata.datadrivendiscovery.org/types/PredictedTarget') - - hyperparams_class = construct_predictions.ConstructPredictionsPrimitive.metadata.get_hyperparams() - - construct_primitive = construct_predictions.ConstructPredictionsPrimitive(hyperparams=hyperparams_class.defaults()) - - dataframe = construct_primitive.produce(inputs=dataframe, reference=dataframe).value - - self.assertEqual(list(dataframe.columns), ['d3mIndex', 'target']) - - self.assertEqual(dataframe.values.tolist(), [ - [0, '3,5,9,10'], - ]) - - self.assertEqual(dataframe.metadata.query_column(1), { - 'structural_type': str, - 'name': 'target', - 'semantic_types': ( - 'https://metadata.datadrivendiscovery.org/types/PredictedTarget', - ), - }) - - -if __name__ == '__main__': - unittest.main() diff --git a/common-primitives/tests/test_csv_reader.py b/common-primitives/tests/test_csv_reader.py deleted file mode 100644 index 3430c33..0000000 --- a/common-primitives/tests/test_csv_reader.py +++ /dev/null @@ -1,50 +0,0 @@ -import unittest -import os - -from d3m import container - -from common_primitives import dataset_to_dataframe, csv_reader - - -class CSVReaderPrimitiveTestCase(unittest.TestCase): - def test_basic(self): - dataset_doc_path = os.path.abspath(os.path.join(os.path.dirname(__file__), 'data', 'datasets', 'timeseries_dataset_2', 'datasetDoc.json')) - - dataset = container.Dataset.load('file://{dataset_doc_path}'.format(dataset_doc_path=dataset_doc_path)) - - dataframe_hyperparams_class = dataset_to_dataframe.DatasetToDataFramePrimitive.metadata.get_hyperparams() - dataframe_primitive = dataset_to_dataframe.DatasetToDataFramePrimitive(hyperparams=dataframe_hyperparams_class.defaults().replace({'dataframe_resource': '0'})) - dataframe = dataframe_primitive.produce(inputs=dataset).value - - csv_hyperparams_class = csv_reader.CSVReaderPrimitive.metadata.get_hyperparams() - csv_primitive = csv_reader.CSVReaderPrimitive(hyperparams=csv_hyperparams_class.defaults().replace({'return_result': 'replace'})) - tables = csv_primitive.produce(inputs=dataframe).value - - self.assertEqual(tables.shape, (5, 1)) - - self._test_metadata(tables.metadata) - - def _test_metadata(self, metadata): - self.assertEqual(metadata.query_column(0)['structural_type'], container.DataFrame) - self.assertEqual(metadata.query_column(0)['semantic_types'], ('https://metadata.datadrivendiscovery.org/types/PrimaryKey', 'https://metadata.datadrivendiscovery.org/types/Timeseries', 'https://metadata.datadrivendiscovery.org/types/Table')) - - self.assertEqual(metadata.query_column(0, at=(0, 0)), { - 'structural_type': str, - 'name': 'time', - 'semantic_types': ( - 'http://schema.org/Integer', - 'https://metadata.datadrivendiscovery.org/types/Time', - ) - }) - self.assertEqual(metadata.query_column(1, at=(0, 0)), { - 'structural_type': str, - 'name': 'value', - 'semantic_types': ( - 'http://schema.org/Float', - 'https://metadata.datadrivendiscovery.org/types/Attribute', - ) - }) - - -if __name__ == '__main__': - unittest.main() diff --git a/common-primitives/tests/test_cut_audio.py b/common-primitives/tests/test_cut_audio.py deleted file mode 100644 index da8282a..0000000 --- a/common-primitives/tests/test_cut_audio.py +++ /dev/null @@ -1,122 +0,0 @@ -import unittest -import os - -from d3m import container - -from common_primitives import audio_reader, cut_audio, dataset_to_dataframe, denormalize, column_parser - - -class AudioReaderPrimitiveTestCase(unittest.TestCase): - def test_basic(self): - dataset_doc_path = os.path.abspath(os.path.join(os.path.dirname(__file__), 'data', 'datasets', 'audio_dataset_1', 'datasetDoc.json')) - - dataset = container.Dataset.load('file://{dataset_doc_path}'.format(dataset_doc_path=dataset_doc_path)) - - denormalize_hyperparams_class = denormalize.DenormalizePrimitive.metadata.get_hyperparams() - denormalize_primitive = denormalize.DenormalizePrimitive(hyperparams=denormalize_hyperparams_class.defaults()) - dataset = denormalize_primitive.produce(inputs=dataset).value - - dataframe_hyperparams_class = dataset_to_dataframe.DatasetToDataFramePrimitive.metadata.get_hyperparams() - dataframe_primitive = dataset_to_dataframe.DatasetToDataFramePrimitive(hyperparams=dataframe_hyperparams_class.defaults()) - dataframe = dataframe_primitive.produce(inputs=dataset).value - - column_parser_hyperparams_class = column_parser.ColumnParserPrimitive.metadata.get_hyperparams() - column_parser_primitive = column_parser.ColumnParserPrimitive(hyperparams=column_parser_hyperparams_class.defaults()) - dataframe = column_parser_primitive.produce(inputs=dataframe).value - - audio_hyperparams_class = audio_reader.AudioReaderPrimitive.metadata.get_hyperparams() - audio_primitive = audio_reader.AudioReaderPrimitive(hyperparams=audio_hyperparams_class.defaults()) - dataframe = audio_primitive.produce(inputs=dataframe).value - - self.assertEqual(dataframe.iloc[0, 1], 'test_audio.mp3') - self.assertEqual(dataframe.iloc[0, 5].shape, (4410, 1)) - - cut_audio_hyperparams_class = cut_audio.CutAudioPrimitive.metadata.get_hyperparams() - cut_audio_primitive = cut_audio.CutAudioPrimitive(hyperparams=cut_audio_hyperparams_class.defaults()) - dataframe = cut_audio_primitive.produce(inputs=dataframe).value - - self.assertEqual(dataframe.iloc[0, 1], 'test_audio.mp3') - self.assertEqual(dataframe.iloc[0, 5].shape, (44, 1)) - - self._test_metadata(dataframe.metadata, False) - - def _test_metadata(self, dataframe_metadata, is_can_accept): - self.assertEqual(dataframe_metadata.query_column(2), { - 'structural_type': float, - 'name': 'start', - 'semantic_types': ( - 'http://schema.org/Float', - 'https://metadata.datadrivendiscovery.org/types/Boundary', - 'https://metadata.datadrivendiscovery.org/types/IntervalStart', - ), - }) - self.assertEqual(dataframe_metadata.query_column(3), { - 'structural_type': float, - 'name': 'end', - 'semantic_types': ( - 'http://schema.org/Float', - 'https://metadata.datadrivendiscovery.org/types/Boundary', - 'https://metadata.datadrivendiscovery.org/types/IntervalEnd', - ), - }) - - if is_can_accept: - self.assertEqual(dataframe_metadata.query_column(5), { - 'structural_type': container.ndarray, - 'semantic_types': ( - 'http://schema.org/AudioObject', - 'https://metadata.datadrivendiscovery.org/types/Attribute', - 'https://metadata.datadrivendiscovery.org/types/UniqueKey', - ), - 'name': 'filename', - }) - self.assertEqual(dataframe_metadata.query((0, 5)), { - 'structural_type': container.ndarray, - 'semantic_types': ( - 'http://schema.org/AudioObject', - 'https://metadata.datadrivendiscovery.org/types/Attribute', - 'https://metadata.datadrivendiscovery.org/types/UniqueKey', - ), - 'name': 'filename', - }) - else: - self.assertEqual(dataframe_metadata.query_column(5), { - 'structural_type': container.ndarray, - 'semantic_types': ( - 'http://schema.org/AudioObject', - 'https://metadata.datadrivendiscovery.org/types/Attribute', - 'https://metadata.datadrivendiscovery.org/types/UniqueKey', - 'https://metadata.datadrivendiscovery.org/types/Table', - ), - 'dimension': { - # The length is set here only because there is only one row. - 'length': 44, - 'name': 'rows', - 'semantic_types': ( - 'https://metadata.datadrivendiscovery.org/types/TabularRow', - ), - }, - 'name': 'filename', - }) - self.assertEqual(dataframe_metadata.query((0, 5)), { - 'structural_type': container.ndarray, - 'semantic_types': ( - 'http://schema.org/AudioObject', - 'https://metadata.datadrivendiscovery.org/types/Attribute', - 'https://metadata.datadrivendiscovery.org/types/UniqueKey', - 'https://metadata.datadrivendiscovery.org/types/Table', - ), - 'dimension': { - 'length': 44, - 'name': 'rows', - 'semantic_types': ( - 'https://metadata.datadrivendiscovery.org/types/TabularRow', - ), - 'sampling_rate': 44100, - }, - 'name': 'filename', - }) - - -if __name__ == '__main__': - unittest.main() diff --git a/common-primitives/tests/test_dataframe_flatten.py b/common-primitives/tests/test_dataframe_flatten.py deleted file mode 100644 index 7554132..0000000 --- a/common-primitives/tests/test_dataframe_flatten.py +++ /dev/null @@ -1,132 +0,0 @@ -import unittest -import os - -from d3m import container -from d3m.metadata import base as metadata_base - -from common_primitives import dataset_to_dataframe, csv_reader, dataframe_flatten - - -class DataFrameFlattenPrimitiveTestCase(unittest.TestCase): - - COLUMN_METADATA = { - 'time': { - 'structural_type': str, - 'name': 'time', - 'semantic_types': ( - 'http://schema.org/Integer', - 'https://metadata.datadrivendiscovery.org/types/Time' - ), - }, - 'value': { - 'structural_type': str, - 'name': 'value', - 'semantic_types': ( - 'http://schema.org/Float', - 'https://metadata.datadrivendiscovery.org/types/Attribute' - ), - } - } - - def test_replace(self) -> None: - tables = self._load_data() - flat_hyperparams_class = dataframe_flatten.DataFrameFlattenPrimitive.metadata.get_hyperparams() - flat_primitive = dataframe_flatten.DataFrameFlattenPrimitive(hyperparams=flat_hyperparams_class.defaults()) - flat_result = flat_primitive.produce(inputs=tables).value - - self.assertEqual(flat_result.shape, (830, 3)) - - metadata = flat_result.metadata - self._check_filename_metadata(metadata, 0) - self.assertEqual(metadata.query_column(1), self.COLUMN_METADATA['time']) - self.assertEqual(metadata.query_column(2), self.COLUMN_METADATA['value']) - - def test_new(self) -> None: - tables = self._load_data() - - flat_hyperparams_class = dataframe_flatten.DataFrameFlattenPrimitive.metadata.get_hyperparams() - hp = flat_hyperparams_class.defaults().replace({ - 'return_result': 'new', - 'add_index_columns': False - }) - flat_primitive = dataframe_flatten.DataFrameFlattenPrimitive(hyperparams=hp) - flat_result = flat_primitive.produce(inputs=tables).value - - self.assertEqual(flat_result.shape, (830, 2)) - metadata = flat_result.metadata - self.assertEqual(metadata.query_column(0), self.COLUMN_METADATA['time']) - self.assertEqual(metadata.query_column(1), self.COLUMN_METADATA['value']) - - def test_add_index_columns(self) -> None: - tables = self._load_data() - - flat_hyperparams_class = dataframe_flatten.DataFrameFlattenPrimitive.metadata.get_hyperparams() - hp = flat_hyperparams_class.defaults().replace({ - 'return_result': 'new', - 'add_index_columns': True - }) - flat_primitive = dataframe_flatten.DataFrameFlattenPrimitive(hyperparams=hp) - flat_result = flat_primitive.produce(inputs=tables).value - - self.assertEqual(flat_result.shape, (830, 3)) - metadata = flat_result.metadata - self._check_filename_metadata(metadata, 0) - self.assertEqual(metadata.query_column(1), self.COLUMN_METADATA['time']) - self.assertEqual(metadata.query_column(2), self.COLUMN_METADATA['value']) - - def test_use_columns(self) -> None: - tables = self._load_data() - - flat_hyperparams_class = dataframe_flatten.DataFrameFlattenPrimitive.metadata.get_hyperparams() - hp = flat_hyperparams_class.defaults().replace({'use_columns': [1]}) - - flat_primitive = dataframe_flatten.DataFrameFlattenPrimitive(hyperparams=hp) - flat_result = flat_primitive.produce(inputs=tables).value - - self.assertEqual(flat_result.shape, (830, 3), [0]) - - metadata = flat_result.metadata - self._check_filename_metadata(metadata, 0) - self.assertEqual(metadata.query_column(1), self.COLUMN_METADATA['time']) - self.assertEqual(metadata.query_column(2), self.COLUMN_METADATA['value']) - - def test_exclude_columns(self) -> None: - tables = self._load_data() - - flat_hyperparams_class = dataframe_flatten.DataFrameFlattenPrimitive.metadata.get_hyperparams() - hp = flat_hyperparams_class.defaults().replace({'exclude_columns': [0]}) - - flat_primitive = dataframe_flatten.DataFrameFlattenPrimitive(hyperparams=hp) - flat_result = flat_primitive.produce(inputs=tables).value - - self.assertEqual(flat_result.shape, (830, 3), [0]) - - metadata = flat_result.metadata - self._check_filename_metadata(metadata, 0) - self.assertEqual(metadata.query_column(1), self.COLUMN_METADATA['time']) - self.assertEqual(metadata.query_column(2), self.COLUMN_METADATA['value']) - - def _load_data(self) -> container.DataFrame: - dataset_doc_path = os.path.abspath(os.path.join(os.path.dirname(__file__), 'data', 'datasets', 'timeseries_dataset_2', 'datasetDoc.json')) - - dataset = container.Dataset.load('file://{dataset_doc_path}'.format(dataset_doc_path=dataset_doc_path)) - - dataframe_hyperparams_class = dataset_to_dataframe.DatasetToDataFramePrimitive.metadata.get_hyperparams() - dataframe_primitive = dataset_to_dataframe.DatasetToDataFramePrimitive(hyperparams=dataframe_hyperparams_class.defaults().replace({'dataframe_resource': '0'})) - dataframe = dataframe_primitive.produce(inputs=dataset).value - - csv_hyperparams_class = csv_reader.CSVReaderPrimitive.metadata.get_hyperparams() - csv_primitive = csv_reader.CSVReaderPrimitive(hyperparams=csv_hyperparams_class.defaults().replace({'return_result': 'append'})) - return csv_primitive.produce(inputs=dataframe).value - - def _check_filename_metadata(self, metadata: metadata_base.Metadata, col_num: int) -> None: - self.assertEqual(metadata.query_column(col_num)['name'], 'filename') - self.assertEqual(metadata.query_column(col_num)['structural_type'], str) - self.assertEqual(metadata.query_column(col_num)['semantic_types'], ( - 'https://metadata.datadrivendiscovery.org/types/PrimaryKey', - 'https://metadata.datadrivendiscovery.org/types/FileName', - 'https://metadata.datadrivendiscovery.org/types/Timeseries')) - - -if __name__ == '__main__': - unittest.main() diff --git a/common-primitives/tests/test_dataframe_image_reader.py b/common-primitives/tests/test_dataframe_image_reader.py deleted file mode 100644 index 7368997..0000000 --- a/common-primitives/tests/test_dataframe_image_reader.py +++ /dev/null @@ -1,46 +0,0 @@ -import unittest -import os - -from d3m import container - -from common_primitives import dataset_to_dataframe, dataframe_image_reader - - -class DataFrameImageReaderPrimitiveTestCase(unittest.TestCase): - def test_basic(self): - dataset_doc_path = os.path.abspath(os.path.join(os.path.dirname(__file__), 'data', 'datasets', 'image_dataset_1', 'datasetDoc.json')) - - dataset = container.Dataset.load('file://{dataset_doc_path}'.format(dataset_doc_path=dataset_doc_path)) - - dataframe_hyperparams_class = dataset_to_dataframe.DatasetToDataFramePrimitive.metadata.get_hyperparams() - dataframe_primitive = dataset_to_dataframe.DatasetToDataFramePrimitive(hyperparams=dataframe_hyperparams_class.defaults().replace({'dataframe_resource': '0'})) - dataframe = dataframe_primitive.produce(inputs=dataset).value - - image_hyperparams_class = dataframe_image_reader.DataFrameImageReaderPrimitive.metadata.get_hyperparams() - image_primitive = dataframe_image_reader.DataFrameImageReaderPrimitive(hyperparams=image_hyperparams_class.defaults().replace({'return_result': 'replace'})) - images = image_primitive.produce(inputs=dataframe).value - - self.assertEqual(images.shape, (5, 1)) - self.assertEqual(images.iloc[0, 0].shape, (225, 150, 3)) - self.assertEqual(images.iloc[1, 0].shape, (32, 32, 3)) - self.assertEqual(images.iloc[2, 0].shape, (32, 32, 3)) - self.assertEqual(images.iloc[3, 0].shape, (28, 28, 1)) - self.assertEqual(images.iloc[4, 0].shape, (28, 28, 1)) - - self._test_metadata(images.metadata) - - self.assertEqual(images.metadata.query((0, 0))['image_reader_metadata'], { - 'jfif': 257, - 'jfif_version': (1, 1), - 'dpi': (96, 96), - 'jfif_unit': 1, - 'jfif_density': (96, 96), - }) - - def _test_metadata(self, metadata): - self.assertEqual(metadata.query_column(0)['structural_type'], container.ndarray) - self.assertEqual(metadata.query_column(0)['semantic_types'], ('https://metadata.datadrivendiscovery.org/types/PrimaryKey', 'http://schema.org/ImageObject')) - - -if __name__ == '__main__': - unittest.main() diff --git a/common-primitives/tests/test_dataframe_to_list.py b/common-primitives/tests/test_dataframe_to_list.py deleted file mode 100644 index 512396c..0000000 --- a/common-primitives/tests/test_dataframe_to_list.py +++ /dev/null @@ -1,41 +0,0 @@ -import unittest - -from d3m import container - -from common_primitives import dataframe_to_list, dataset_to_dataframe - -import utils as test_utils - - -class DataFrameToListPrimitiveTestCase(unittest.TestCase): - def test_basic(self): - # load the iris dataset - dataset = test_utils.load_iris_metadata() - - # convert the dataset into a dataframe - dataset_hyperparams_class = dataset_to_dataframe.DatasetToDataFramePrimitive.metadata.get_hyperparams() - dataframe_primitive = dataset_to_dataframe.DatasetToDataFramePrimitive(hyperparams=dataset_hyperparams_class.defaults()) - dataframe = dataframe_primitive.produce(inputs=dataset).value - - # convert the dataframe into a list - list_hyperparams_class = dataframe_to_list.DataFrameToListPrimitive.metadata.get_hyperparams() - list_primitive = dataframe_to_list.DataFrameToListPrimitive(hyperparams=list_hyperparams_class.defaults()) - list_value = list_primitive.produce(inputs=dataframe).value - - self.assertIsInstance(list_value, container.List) - - # verify dimensions - self.assertEqual(len(list_value), 150) - self.assertEqual(len(list_value[0]), 6) - - # verify data type is unchanged - for row in list_value: - for val in row: - self.assertIsInstance(val, str) - - # validate metadata - test_utils.test_iris_metadata(self, list_value.metadata, 'd3m.container.list.List', 'd3m.container.list.List') - - -if __name__ == '__main__': - unittest.main() diff --git a/common-primitives/tests/test_dataframe_to_ndarray.py b/common-primitives/tests/test_dataframe_to_ndarray.py deleted file mode 100644 index 6e79645..0000000 --- a/common-primitives/tests/test_dataframe_to_ndarray.py +++ /dev/null @@ -1,40 +0,0 @@ -import unittest - -from common_primitives import dataframe_to_ndarray, dataset_to_dataframe -from d3m import container - -import utils as test_utils - - -class DataFrameToNDArrayPrimitiveTestCase(unittest.TestCase): - def test_basic(self): - # load the iris dataset - dataset = test_utils.load_iris_metadata() - - # convert the dataset into a dataframe - dataset_hyperparams_class = dataset_to_dataframe.DatasetToDataFramePrimitive.metadata.get_hyperparams() - dataframe_primitive = dataset_to_dataframe.DatasetToDataFramePrimitive(hyperparams=dataset_hyperparams_class.defaults()) - dataframe = dataframe_primitive.produce(inputs=dataset).value - - # convert the dataframe into a numpy array - numpy_hyperparams_class = dataframe_to_ndarray.DataFrameToNDArrayPrimitive.metadata.get_hyperparams() - numpy_primitive = dataframe_to_ndarray.DataFrameToNDArrayPrimitive(hyperparams=numpy_hyperparams_class.defaults()) - numpy_array = numpy_primitive.produce(inputs=dataframe).value - - self.assertIsInstance(numpy_array, container.ndarray) - - # verify dimensions - self.assertEqual(len(numpy_array), 150) - self.assertEqual(len(numpy_array[0]), 6) - - # verify data type is unchanged - for row in numpy_array: - for val in row: - self.assertIsInstance(val, str) - - # validate metadata - test_utils.test_iris_metadata(self, numpy_array.metadata, 'd3m.container.numpy.ndarray') - - -if __name__ == '__main__': - unittest.main() diff --git a/common-primitives/tests/test_dataframe_utils.py b/common-primitives/tests/test_dataframe_utils.py deleted file mode 100644 index 9b2b7d7..0000000 --- a/common-primitives/tests/test_dataframe_utils.py +++ /dev/null @@ -1,27 +0,0 @@ -import unittest -import os - -from common_primitives import dataframe_utils -from d3m import container -from d3m.base import utils as base_utils - -import utils as test_utils - - -class DataFrameUtilsTestCase(unittest.TestCase): - def test_inclusive(self): - dataset_doc_path = os.path.abspath(os.path.join(os.path.dirname(__file__), 'data', 'datasets', 'database_dataset_1', 'datasetDoc.json')) - dataset = container.Dataset.load('file://{dataset_doc_path}'.format(dataset_doc_path=dataset_doc_path)) - resource = test_utils.get_dataframe(dataset) - - to_keep_indices = [1, 2, 5] - - output = dataframe_utils.select_rows(resource, to_keep_indices) - self.assertEqual(len(output), 3) - self.assertEqual(len(output.iloc[0]), 5) - self.assertEqual(output.iloc[1, 0], '3') - self.assertEqual(output.iloc[2, 0], '6') - - -if __name__ == '__main__': - unittest.main() diff --git a/common-primitives/tests/test_dataset_map.py b/common-primitives/tests/test_dataset_map.py deleted file mode 100644 index a789d4d..0000000 --- a/common-primitives/tests/test_dataset_map.py +++ /dev/null @@ -1,73 +0,0 @@ -import unittest -import os -import pickle -import sys - -from d3m import container, index, utils as d3m_utils - -TEST_PRIMITIVES_DIR = os.path.join(os.path.dirname(__file__), 'data', 'primitives') -sys.path.insert(0, TEST_PRIMITIVES_DIR) - -from test_primitives.null import NullTransformerPrimitive, NullUnsupervisedLearnerPrimitive - -# To hide any logging or stdout output. -with d3m_utils.silence(): - index.register_primitive('d3m.primitives.operator.null.TransformerTest', NullTransformerPrimitive) - index.register_primitive('d3m.primitives.operator.null.UnsupervisedLearnerTest', NullUnsupervisedLearnerPrimitive) - -from common_primitives import dataset_to_dataframe, denormalize, dataset_map, column_parser - -import utils as test_utils - - -class DatasetMapTestCase(unittest.TestCase): - def test_basic(self): - self.maxDiff = None - - dataset_doc_path = os.path.abspath(os.path.join(os.path.dirname(__file__), 'data', 'datasets', 'database_dataset_1', 'datasetDoc.json')) - - dataset = container.Dataset.load('file://{dataset_doc_path}'.format(dataset_doc_path=dataset_doc_path)) - - # First we try denormalizing and column parsing. - hyperparams = denormalize.DenormalizePrimitive.metadata.get_hyperparams() - primitive = denormalize.DenormalizePrimitive(hyperparams=hyperparams.defaults()) - dataset_1 = primitive.produce(inputs=dataset).value - - hyperparams = dataset_to_dataframe.DatasetToDataFramePrimitive.metadata.get_hyperparams() - primitive = dataset_to_dataframe.DatasetToDataFramePrimitive(hyperparams=hyperparams.defaults()) - dataframe_1 = primitive.produce(inputs=dataset_1).value - - hyperparams = column_parser.ColumnParserPrimitive.metadata.get_hyperparams() - primitive = column_parser.ColumnParserPrimitive(hyperparams=hyperparams.defaults().replace({'return_result': 'replace'})) - dataframe_1 = primitive.produce(inputs=dataframe_1).value - - # Second we try first column parsing and then denormalizing. - hyperparams = dataset_map.DataFrameDatasetMapPrimitive.metadata.get_hyperparams() - primitive = dataset_map.DataFrameDatasetMapPrimitive( - # We have to make an instance of the primitive ourselves. - hyperparams=hyperparams.defaults().replace({ - 'primitive': column_parser.ColumnParserPrimitive( - hyperparams=column_parser.ColumnParserPrimitive.metadata.get_hyperparams().defaults(), - ), - 'resources': 'all', - }), - - ) - dataset_2 = primitive.produce(inputs=dataset).value - - hyperparams = denormalize.DenormalizePrimitive.metadata.get_hyperparams() - primitive = denormalize.DenormalizePrimitive(hyperparams=hyperparams.defaults()) - dataset_2 = primitive.produce(inputs=dataset_2).value - - hyperparams = dataset_to_dataframe.DatasetToDataFramePrimitive.metadata.get_hyperparams() - primitive = dataset_to_dataframe.DatasetToDataFramePrimitive(hyperparams=hyperparams.defaults()) - dataframe_2 = primitive.produce(inputs=dataset_2).value - - self.assertEqual(test_utils.convert_through_json(dataframe_1), test_utils.convert_through_json(dataframe_2)) - self.assertEqual(dataframe_1.metadata.to_internal_json_structure(), dataframe_2.metadata.to_internal_json_structure()) - - pickle.dumps(primitive) - - -if __name__ == '__main__': - unittest.main() diff --git a/common-primitives/tests/test_dataset_sample.py b/common-primitives/tests/test_dataset_sample.py deleted file mode 100644 index 57da93a..0000000 --- a/common-primitives/tests/test_dataset_sample.py +++ /dev/null @@ -1,58 +0,0 @@ -import os -import pickle -import unittest -import pandas as pd - -from d3m import container -from d3m.metadata import base as metadata_base - -from common_primitives import dataset_sample - - -class DatasetSamplePrimitiveTestCase(unittest.TestCase): - def test_produce(self): - dataset_doc_path = os.path.abspath(os.path.join(os.path.dirname(__file__), '..', 'tests', 'data', 'datasets', 'database_dataset_1', 'datasetDoc.json')) - - dataset = container.Dataset.load('file://{dataset_doc_path}'.format(dataset_doc_path=dataset_doc_path)) - - hyperparams_class = dataset_sample.DatasetSamplePrimitive.metadata.get_hyperparams() - - # We set semantic types like runtime would. - dataset.metadata = dataset.metadata.add_semantic_type(('learningData', metadata_base.ALL_ELEMENTS, 4), 'https://metadata.datadrivendiscovery.org/types/Target') - dataset.metadata = dataset.metadata.add_semantic_type(('learningData', metadata_base.ALL_ELEMENTS, 4), 'https://metadata.datadrivendiscovery.org/types/TrueTarget') - - sample_sizes = [0.1, 0.5, 0.9, 4, 22, 40] - dataset_sizes = [4, 22, 40, 4, 22, 40] - for s, d in zip(sample_sizes, dataset_sizes): - primitive = dataset_sample.DatasetSamplePrimitive(hyperparams=hyperparams_class.defaults().replace({ - 'sample_size': s, - })) - result = primitive.produce(inputs=dataset).value - self.assertEqual(len(result['learningData'].iloc[:, 0]), d, s) - - def test_empty_test_set(self): - dataset_doc_path = os.path.abspath(os.path.join(os.path.dirname(__file__), '..', 'tests', 'data', 'datasets', 'iris_dataset_1', 'datasetDoc.json')) - - dataset = container.Dataset.load('file://{dataset_doc_path}'.format(dataset_doc_path=dataset_doc_path)) - - # set target columns to '' to imitate test dataset - dataset['learningData']['species'] = '' - - # We set semantic types like runtime would. - dataset.metadata = dataset.metadata.add_semantic_type(('learningData', metadata_base.ALL_ELEMENTS, 5), 'https://metadata.datadrivendiscovery.org/types/Target') - dataset.metadata = dataset.metadata.add_semantic_type(('learningData', metadata_base.ALL_ELEMENTS, 5), 'https://metadata.datadrivendiscovery.org/types/TrueTarget') - - hyperparams_class = dataset_sample.DatasetSamplePrimitive.metadata.get_hyperparams() - - # check that no rows are sampled - sample_sizes = [0.1, 0.5, 0.9] - for s in sample_sizes: - primitive = dataset_sample.DatasetSamplePrimitive(hyperparams=hyperparams_class.defaults().replace({ - 'sample_size': s, - })) - result = primitive.produce(inputs=dataset).value - self.assertEqual(len(result['learningData'].iloc[:, 0]), 150, s) - - -if __name__ == '__main__': - unittest.main() diff --git a/common-primitives/tests/test_dataset_to_dataframe.py b/common-primitives/tests/test_dataset_to_dataframe.py deleted file mode 100644 index a7718be..0000000 --- a/common-primitives/tests/test_dataset_to_dataframe.py +++ /dev/null @@ -1,93 +0,0 @@ -import os -import unittest - -from d3m import container, utils -from d3m.metadata import base as metadata_base - -from common_primitives import dataset_to_dataframe - -import utils as test_utils - - -class DatasetToDataFramePrimitiveTestCase(unittest.TestCase): - def test_basic(self): - dataset_doc_path = os.path.abspath(os.path.join(os.path.dirname(__file__), 'data', 'datasets', 'iris_dataset_1', 'datasetDoc.json')) - - dataset = container.Dataset.load('file://{dataset_doc_path}'.format(dataset_doc_path=dataset_doc_path)) - - hyperparams_class = dataset_to_dataframe.DatasetToDataFramePrimitive.metadata.get_hyperparams() - - primitive = dataset_to_dataframe.DatasetToDataFramePrimitive(hyperparams=hyperparams_class.defaults()) - - call_metadata = primitive.produce(inputs=dataset) - - dataframe = call_metadata.value - - self.assertIsInstance(dataframe, container.DataFrame) - - for row in dataframe: - for cell in row: - # Nothing should be parsed from a string. - self.assertIsInstance(cell, str) - - self.assertEqual(len(dataframe), 150) - self.assertEqual(len(dataframe.iloc[0]), 6) - - self._test_metadata(dataframe.metadata) - - def _test_metadata(self, metadata): - self.maxDiff = None - - self.assertEqual(test_utils.convert_through_json(metadata.query(())), { - 'schema': metadata_base.CONTAINER_SCHEMA_VERSION, - 'structural_type': 'd3m.container.pandas.DataFrame', - 'semantic_types': [ - 'https://metadata.datadrivendiscovery.org/types/Table', - ], - 'dimension': { - 'name': 'rows', - 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/TabularRow'], - 'length': 150, - } - }) - - self.assertEqual(test_utils.convert_through_json(metadata.query((metadata_base.ALL_ELEMENTS,))), { - 'dimension': { - 'name': 'columns', - 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/TabularColumn'], - 'length': 6, - } - }) - - self.assertEqual(test_utils.convert_through_json(metadata.query((metadata_base.ALL_ELEMENTS, 0))), { - 'name': 'd3mIndex', - 'structural_type': 'str', - 'semantic_types': [ - 'http://schema.org/Integer', - 'https://metadata.datadrivendiscovery.org/types/PrimaryKey', - ], - }) - - for i in range(1, 5): - self.assertEqual(test_utils.convert_through_json(metadata.query((metadata_base.ALL_ELEMENTS, i))), { - 'name': ['sepalLength', 'sepalWidth', 'petalLength', 'petalWidth'][i - 1], - 'structural_type': 'str', - 'semantic_types': [ - 'http://schema.org/Float', - 'https://metadata.datadrivendiscovery.org/types/Attribute', - ], - }, i) - - self.assertEqual(test_utils.convert_through_json(metadata.query((metadata_base.ALL_ELEMENTS, 5))), { - 'name': 'species', - 'structural_type': 'str', - 'semantic_types': [ - 'https://metadata.datadrivendiscovery.org/types/CategoricalData', - 'https://metadata.datadrivendiscovery.org/types/SuggestedTarget', - 'https://metadata.datadrivendiscovery.org/types/Attribute', - ], - }) - - -if __name__ == '__main__': - unittest.main() diff --git a/common-primitives/tests/test_datetime_field_compose.py b/common-primitives/tests/test_datetime_field_compose.py deleted file mode 100644 index ac93823..0000000 --- a/common-primitives/tests/test_datetime_field_compose.py +++ /dev/null @@ -1,67 +0,0 @@ -import math -import os.path -import unittest - -from datetime import datetime -from d3m import container, utils -from d3m.metadata import base as metadata_base - -from common_primitives import dataset_to_dataframe, datetime_field_compose - -import utils as test_utils - - -class DatetimeFieldComposePrimitiveTestCase(unittest.TestCase): - def test_compose_two_fields(self): - dataset_doc_path = os.path.abspath(os.path.join(os.path.dirname(__file__), 'data', 'datasets', 'timeseries_dataset_3', 'datasetDoc.json')) - dataset = container.Dataset.load('file://{dataset_doc_path}'.format(dataset_doc_path=dataset_doc_path)) - resource = test_utils.get_dataframe(dataset) - - compose_hyperparams_class = datetime_field_compose.DatetimeFieldComposePrimitive.metadata.get_hyperparams() - hp = compose_hyperparams_class({ - 'columns': [2,3], - 'join_char': '-', - 'output_name': 'timestamp' - }) - compose_primitive = datetime_field_compose.DatetimeFieldComposePrimitive(hyperparams=hp) - new_dataframe = compose_primitive.produce(inputs=resource).value - - self.assertEqual(new_dataframe.shape, (40, 6)) - self.assertEqual(datetime(2013, 11, 1), new_dataframe['timestamp'][0]) - - col_meta = new_dataframe.metadata.query((metadata_base.ALL_ELEMENTS, 5)) - self.assertEqual(col_meta['name'], 'timestamp') - self.assertTrue('https://metadata.datadrivendiscovery.org/types/Time' in col_meta['semantic_types']) - - def test_bad_join_char(self): - dataset_doc_path = os.path.abspath(os.path.join(os.path.dirname(__file__), 'data', 'datasets', 'timeseries_dataset_3', 'datasetDoc.json')) - dataset = container.Dataset.load('file://{dataset_doc_path}'.format(dataset_doc_path=dataset_doc_path)) - resource = test_utils.get_dataframe(dataset) - - compose_hyperparams_class = datetime_field_compose.DatetimeFieldComposePrimitive.metadata.get_hyperparams() - hp = compose_hyperparams_class({ - 'columns': [2,3], - 'join_char': 'cc', - 'output_name': 'timestamp' - }) - compose_primitive = datetime_field_compose.DatetimeFieldComposePrimitive(hyperparams=hp) - with self.assertRaises(ValueError): - compose_primitive.produce(inputs=resource) - - def test_bad_columns(self): - dataset_doc_path = os.path.abspath(os.path.join(os.path.dirname(__file__), 'data', 'datasets', 'timeseries_dataset_3', 'datasetDoc.json')) - dataset = container.Dataset.load('file://{dataset_doc_path}'.format(dataset_doc_path=dataset_doc_path)) - resource = test_utils.get_dataframe(dataset) - - compose_hyperparams_class = datetime_field_compose.DatetimeFieldComposePrimitive.metadata.get_hyperparams() - hp = compose_hyperparams_class({ - 'columns': [1,2], - 'join_char': '-', - 'output_name': 'timestamp' - }) - compose_primitive = datetime_field_compose.DatetimeFieldComposePrimitive(hyperparams=hp) - with self.assertRaises(ValueError): - compose_primitive.produce(inputs=resource) - -if __name__ == '__main__': - unittest.main() diff --git a/common-primitives/tests/test_datetime_range_filter.py b/common-primitives/tests/test_datetime_range_filter.py deleted file mode 100644 index d047e92..0000000 --- a/common-primitives/tests/test_datetime_range_filter.py +++ /dev/null @@ -1,149 +0,0 @@ -import unittest -import os - -from datetime import datetime -from dateutil import parser -from common_primitives import datetime_range_filter -from d3m import container - -import utils as test_utils - - -class DatetimeRangeFilterPrimitiveTestCase(unittest.TestCase): - def test_inclusive_strict(self): - dataset_doc_path = os.path.abspath(os.path.join(os.path.dirname(__file__), 'data', 'datasets', 'timeseries_dataset_1', 'datasetDoc.json')) - dataset = container.Dataset.load('file://{dataset_doc_path}'.format(dataset_doc_path=dataset_doc_path)) - resource = test_utils.get_dataframe(dataset) - - filter_hyperparams_class = datetime_range_filter.DatetimeRangeFilterPrimitive.metadata.get_hyperparams() - hp = filter_hyperparams_class.defaults().replace({ - 'column': 3, - 'min': datetime(2013, 11, 8), - 'max': datetime(2013, 12, 3), - 'strict': True, - 'inclusive': True - }) - filter_primitive = datetime_range_filter.DatetimeRangeFilterPrimitive(hyperparams=hp) - new_dataframe = filter_primitive.produce(inputs=resource).value - - self.assertGreater(new_dataframe['Date'].apply(parser.parse).min(), datetime(2013, 11, 8)) - self.assertLess(new_dataframe['Date'].apply(parser.parse).max(), datetime(2013, 12, 3)) - self.assertEqual(15, len(new_dataframe)) - - def test_inclusive_permissive(self): - dataset_doc_path = os.path.abspath(os.path.join(os.path.dirname(__file__), 'data', 'datasets', 'timeseries_dataset_1', 'datasetDoc.json')) - dataset = container.Dataset.load('file://{dataset_doc_path}'.format(dataset_doc_path=dataset_doc_path)) - resource = test_utils.get_dataframe(dataset) - - filter_hyperparams_class = datetime_range_filter.DatetimeRangeFilterPrimitive.metadata.get_hyperparams() - hp = filter_hyperparams_class.defaults().replace({ - 'column': 3, - 'min': datetime(2013, 11, 8), - 'max': datetime(2013, 12, 3), - 'strict': False, - 'inclusive': True - }) - filter_primitive = datetime_range_filter.DatetimeRangeFilterPrimitive(hyperparams=hp) - new_dataframe = filter_primitive.produce(inputs=resource).value - - self.assertGreaterEqual(new_dataframe['Date'].apply(parser.parse).min(), datetime(2013, 11, 8)) - self.assertLessEqual(new_dataframe['Date'].apply(parser.parse).max(), datetime(2013, 12, 3)) - self.assertEqual(17, len(new_dataframe)) - - def test_exclusive_strict(self): - dataset_doc_path = os.path.abspath(os.path.join(os.path.dirname(__file__), 'data', 'datasets', 'timeseries_dataset_1', 'datasetDoc.json')) - dataset = container.Dataset.load('file://{dataset_doc_path}'.format(dataset_doc_path=dataset_doc_path)) - resource = test_utils.get_dataframe(dataset) - - filter_hyperparams_class = datetime_range_filter \ - .DatetimeRangeFilterPrimitive.metadata.get_hyperparams() - hp = filter_hyperparams_class.defaults().replace({ - 'column': 3, - 'min': datetime(2013, 11, 8), - 'max': datetime(2013, 12, 3), - 'strict': True, - 'inclusive': False - }) - filter_primitive = datetime_range_filter.DatetimeRangeFilterPrimitive(hyperparams=hp) - new_dataframe = filter_primitive.produce(inputs=resource).value - - self.assertEqual( - len(new_dataframe.loc[ - (new_dataframe['Date'].apply(parser.parse) >= datetime(2013, 11, 8)) & - (new_dataframe['Date'].apply(parser.parse).max() <= datetime(2013, 12, 3))]), 0) - self.assertEqual(23, len(new_dataframe)) - - def test_exclusive_permissive(self): - dataset_doc_path = os.path.abspath(os.path.join(os.path.dirname(__file__), 'data', 'datasets', 'timeseries_dataset_1', 'datasetDoc.json')) - dataset = container.Dataset.load('file://{dataset_doc_path}'.format(dataset_doc_path=dataset_doc_path)) - resource = test_utils.get_dataframe(dataset) - - filter_hyperparams_class = datetime_range_filter \ - .DatetimeRangeFilterPrimitive.metadata.get_hyperparams() - hp = filter_hyperparams_class.defaults().replace({ - 'column': 3, - 'min': datetime(2013, 11, 8), - 'max': datetime(2013, 12, 3), - 'strict': False, - 'inclusive': False - }) - filter_primitive = datetime_range_filter.DatetimeRangeFilterPrimitive(hyperparams=hp) - new_dataframe = filter_primitive.produce(inputs=resource).value - - self.assertEqual( - len(new_dataframe.loc[ - (new_dataframe['Date'].apply(parser.parse) > datetime(2013, 11, 8)) & - (new_dataframe['Date'].apply(parser.parse).max() < datetime(2013, 12, 3))]), 0) - self.assertEqual(25, len(new_dataframe)) - - def test_row_metadata_removal(self): - dataset_doc_path = os.path.abspath(os.path.join(os.path.dirname(__file__), 'data', 'datasets', 'timeseries_dataset_1', 'datasetDoc.json')) - dataset = container.Dataset.load('file://{dataset_doc_path}'.format(dataset_doc_path=dataset_doc_path)) - - # add metadata for rows 0 and 1 - dataset.metadata = dataset.metadata.update(('learningData', 0), {'a': 0}) - dataset.metadata = dataset.metadata.update(('learningData', 5), {'b': 1}) - - resource = test_utils.get_dataframe(dataset) - - # apply filter that removes rows 0 and 1 - filter_hyperparams_class = datetime_range_filter.DatetimeRangeFilterPrimitive.metadata.get_hyperparams() - hp = filter_hyperparams_class.defaults().replace({ - 'column': 3, - 'min': datetime(2013, 11, 4), - 'max': datetime(2013, 11, 7), - 'strict': True, - 'inclusive': False - }) - filter_primitive = datetime_range_filter.DatetimeRangeFilterPrimitive(hyperparams=hp) - new_df = filter_primitive.produce(inputs=resource).value - - # verify that the length is correct - self.assertEqual(len(new_df), new_df.metadata.query(())['dimension']['length']) - - # verify that the rows were re-indexed in the metadata - self.assertEqual(new_df.metadata.query((0,))['a'], 0) - self.assertEqual(new_df.metadata.query((1,))['b'], 1) - self.assertFalse('b' in new_df.metadata.query((5,))) - - def test_bad_type_handling(self): - dataset_doc_path = os.path.abspath(os.path.join(os.path.dirname(__file__), 'data', 'datasets', 'timeseries_dataset_1', 'datasetDoc.json')) - dataset = container.Dataset.load('file://{dataset_doc_path}'.format(dataset_doc_path=dataset_doc_path)) - resource = test_utils.get_dataframe(dataset) - - filter_hyperparams_class = datetime_range_filter \ - .DatetimeRangeFilterPrimitive.metadata.get_hyperparams() - hp = filter_hyperparams_class.defaults().replace({ - 'column': 1, - 'min': datetime(2013, 11, 1), - 'max': datetime(2013, 11, 4), - 'strict': False, - 'inclusive': False, - }) - filter_primitive = datetime_range_filter.DatetimeRangeFilterPrimitive(hyperparams=hp) - with self.assertRaises(ValueError): - filter_primitive.produce(inputs=resource) - - -if __name__ == '__main__': - unittest.main() diff --git a/common-primitives/tests/test_denormalize.py b/common-primitives/tests/test_denormalize.py deleted file mode 100644 index 0737fed..0000000 --- a/common-primitives/tests/test_denormalize.py +++ /dev/null @@ -1,469 +0,0 @@ -import os -import unittest - -from d3m import container, utils -from d3m.metadata import base as metadata_base - -from common_primitives import denormalize - -import utils as test_utils - - -class DenormalizePrimitiveTestCase(unittest.TestCase): - def test_discard(self): - dataset_doc_path = os.path.abspath(os.path.join(os.path.dirname(__file__), 'data', 'datasets', 'database_dataset_1', 'datasetDoc.json')) - - dataset = container.Dataset.load('file://{dataset_doc_path}'.format(dataset_doc_path=dataset_doc_path)) - - dataset_metadata_before = dataset.metadata.to_internal_json_structure() - - hyperparams_class = denormalize.DenormalizePrimitive.metadata.get_hyperparams() - - primitive = denormalize.DenormalizePrimitive(hyperparams=hyperparams_class.defaults().replace({ - 'recursive': False, - 'discard_not_joined_tabular_resources': True, - })) - - denormalized_dataset = primitive.produce(inputs=dataset).value - - self.assertIsInstance(denormalized_dataset, container.Dataset) - - self.assertEqual(len(denormalized_dataset), 1) - - self.assertEqual(set(denormalized_dataset['learningData'].iloc[:, 1]), {'AAA', 'BBB', 'CCC'}) - self.assertEqual(set(denormalized_dataset['learningData'].iloc[:, 2]), {'AAA name', 'BBB name', 'CCC name'}) - self.assertEqual(set(denormalized_dataset['learningData'].iloc[:, 3]), {'1', '2', ''}) - self.assertEqual(set(denormalized_dataset['learningData'].iloc[:, 4]), {'aaa', 'bbb', 'ccc', 'ddd', 'eee'}) - self.assertEqual(set(denormalized_dataset['learningData'].iloc[:, 5]), {'1990', '2000', '2010'}) - - self._test_discard_metadata(denormalized_dataset.metadata, dataset_doc_path) - - self.assertEqual(dataset.metadata.to_internal_json_structure(), dataset_metadata_before) - - def _test_discard_metadata(self, metadata, dataset_doc_path): - self.maxDiff = None - - self.assertEqual(test_utils.convert_through_json(metadata.query(())), { - 'schema': metadata_base.CONTAINER_SCHEMA_VERSION, - 'structural_type': 'd3m.container.dataset.Dataset', - 'id': 'database_dataset_1', - 'version': '4.0.0', - 'name': 'A dataset simulating a database dump', - 'location_uris': [ - 'file://{dataset_doc_path}'.format(dataset_doc_path=dataset_doc_path), - ], - 'dimension': { - 'name': 'resources', - 'semantic_types': [ - 'https://metadata.datadrivendiscovery.org/types/DatasetResource', - ], - 'length': 1, - }, - 'digest': '68c435c6ba9a1c419c79507275c0d5710786dfe481e48f35591d87a7dbf5bb1a', - 'description': 'A synthetic dataset trying to be similar to a database dump, with tables with different relations between them.', - 'source': { - 'license': 'CC', - 'redacted': False, - }, - }) - - self.assertEqual(test_utils.convert_through_json(metadata.query(('learningData',))), { - 'structural_type': 'd3m.container.pandas.DataFrame', - 'semantic_types': [ - 'https://metadata.datadrivendiscovery.org/types/Table', - 'https://metadata.datadrivendiscovery.org/types/DatasetEntryPoint', - ], - 'dimension': { - 'name': 'rows', - 'semantic_types': [ - 'https://metadata.datadrivendiscovery.org/types/TabularRow', - ], - 'length': 45, - }, - }) - - self.assertEqual(test_utils.convert_through_json(metadata.query(('learningData', metadata_base.ALL_ELEMENTS,))), { - 'dimension': { - 'name': 'columns', - 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/TabularColumn'], - 'length': 7, - } - }) - - self.assertEqual(test_utils.convert_through_json(metadata.query(('learningData', metadata_base.ALL_ELEMENTS, 0))), { - 'name': 'd3mIndex', - 'structural_type': 'str', - 'semantic_types': [ - 'http://schema.org/Integer', - 'https://metadata.datadrivendiscovery.org/types/PrimaryKey', - ], - }) - - self.assertEqual(test_utils.convert_through_json(metadata.query(('learningData', metadata_base.ALL_ELEMENTS, 3))), { - 'name': 'author', - 'structural_type': 'str', - 'semantic_types': [ - 'http://schema.org/Integer', - 'https://metadata.datadrivendiscovery.org/types/Attribute', - ], - 'foreign_key': { - 'type': 'COLUMN', - 'resource_id': 'authors', - 'column_index': 0, - }, - }) - - self.assertEqual(test_utils.convert_through_json(metadata.query(('learningData', metadata_base.ALL_ELEMENTS, 1))), { - 'name': 'code', - 'structural_type': 'str', - 'semantic_types': [ - 'https://metadata.datadrivendiscovery.org/types/CategoricalData', - 'https://metadata.datadrivendiscovery.org/types/Attribute', - ], - }) - - self.assertEqual(test_utils.convert_through_json(metadata.query(('learningData', metadata_base.ALL_ELEMENTS, 2))), { - 'name': 'name', - 'structural_type': 'str', - 'semantic_types': [ - 'http://schema.org/Text', - 'https://metadata.datadrivendiscovery.org/types/Attribute', - ], - }) - - self.assertEqual(test_utils.convert_through_json(metadata.query(('learningData', metadata_base.ALL_ELEMENTS, 4))), { - 'name': 'key', - 'structural_type': 'str', - 'semantic_types': [ - 'https://metadata.datadrivendiscovery.org/types/CategoricalData', - 'https://metadata.datadrivendiscovery.org/types/Attribute', - ], - }) - - self.assertEqual(test_utils.convert_through_json(metadata.query(('learningData', metadata_base.ALL_ELEMENTS, 5))), { - 'name': 'year', - 'structural_type': 'str', - 'semantic_types': [ - 'http://schema.org/DateTime', - 'https://metadata.datadrivendiscovery.org/types/Attribute', - ], - }) - - self.assertEqual(test_utils.convert_through_json(metadata.query(('learningData', metadata_base.ALL_ELEMENTS, 6))), { - 'name': 'value', - 'structural_type': 'str', - 'semantic_types': [ - 'http://schema.org/Float', - 'https://metadata.datadrivendiscovery.org/types/SuggestedTarget', - 'https://metadata.datadrivendiscovery.org/types/Attribute', - ], - }) - - def test_recursive(self): - dataset_doc_path = os.path.abspath(os.path.join(os.path.dirname(__file__), 'data', 'datasets', 'database_dataset_1', 'datasetDoc.json')) - - dataset = container.Dataset.load('file://{dataset_doc_path}'.format(dataset_doc_path=dataset_doc_path)) - - dataset_metadata_before = dataset.metadata.to_internal_json_structure() - - hyperparams_class = denormalize.DenormalizePrimitive.metadata.get_hyperparams() - - primitive = denormalize.DenormalizePrimitive(hyperparams=hyperparams_class.defaults().replace({ - 'recursive': True, - 'discard_not_joined_tabular_resources': False, - })) - - denormalized_dataset = primitive.produce(inputs=dataset).value - - self.assertIsInstance(denormalized_dataset, container.Dataset) - - self.assertEqual(len(denormalized_dataset), 4) - - self.assertEqual(denormalized_dataset['values'].shape[0], 64) - self.assertEqual(denormalized_dataset['learningData'].shape[1], 8) - - self.assertEqual(set(denormalized_dataset['learningData'].iloc[:, 1]), {'AAA', 'BBB', 'CCC'}) - self.assertEqual(set(denormalized_dataset['learningData'].iloc[:, 2]), {'AAA name', 'BBB name', 'CCC name'}) - self.assertEqual(set(denormalized_dataset['learningData'].iloc[:, 3]), {'1', '2', ''}) - self.assertEqual(set(denormalized_dataset['learningData'].iloc[:, 4]), {'1 name', '2 name', ''}) - self.assertEqual(set(denormalized_dataset['learningData'].iloc[:, 5]), {'aaa', 'bbb', 'ccc', 'ddd', 'eee'}) - self.assertEqual(set(denormalized_dataset['learningData'].iloc[:, 6]), {'1990', '2000', '2010'}) - - self._test_recursive_metadata(denormalized_dataset.metadata, dataset_doc_path) - - self.assertEqual(dataset.metadata.to_internal_json_structure(), dataset_metadata_before) - - def _test_recursive_metadata(self, metadata, dataset_doc_path): - self.maxDiff = None - - self.assertEqual(test_utils.convert_through_json(metadata.query(())), { - 'schema': metadata_base.CONTAINER_SCHEMA_VERSION, - 'structural_type': 'd3m.container.dataset.Dataset', - 'id': 'database_dataset_1', - 'version': '4.0.0', - 'name': 'A dataset simulating a database dump', - 'location_uris': [ - 'file://{dataset_doc_path}'.format(dataset_doc_path=dataset_doc_path), - ], - 'dimension': { - 'name': 'resources', - 'semantic_types': [ - 'https://metadata.datadrivendiscovery.org/types/DatasetResource', - ], - 'length': 4, - }, - 'digest': '68c435c6ba9a1c419c79507275c0d5710786dfe481e48f35591d87a7dbf5bb1a', - 'description': 'A synthetic dataset trying to be similar to a database dump, with tables with different relations between them.', - 'source': { - 'license': 'CC', - 'redacted': False, - }, - }) - - self.assertEqual(test_utils.convert_through_json(metadata.query(('learningData',))), { - 'structural_type': 'd3m.container.pandas.DataFrame', - 'semantic_types': [ - 'https://metadata.datadrivendiscovery.org/types/Table', - 'https://metadata.datadrivendiscovery.org/types/DatasetEntryPoint', - ], - 'dimension': { - 'name': 'rows', - 'semantic_types': [ - 'https://metadata.datadrivendiscovery.org/types/TabularRow', - ], - 'length': 45, - }, - }) - - self.assertEqual(test_utils.convert_through_json(metadata.query(('learningData', metadata_base.ALL_ELEMENTS,))), { - 'dimension': { - 'name': 'columns', - 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/TabularColumn'], - 'length': 8, - } - }) - - self.assertEqual(test_utils.convert_through_json(metadata.query(('learningData', metadata_base.ALL_ELEMENTS, 0))), { - 'name': 'd3mIndex', - 'structural_type': 'str', - 'semantic_types': [ - 'http://schema.org/Integer', - 'https://metadata.datadrivendiscovery.org/types/PrimaryKey', - ], - }) - - self.assertEqual(test_utils.convert_through_json(metadata.query(('learningData', metadata_base.ALL_ELEMENTS, 3))), { - 'name': 'id', - 'structural_type': 'str', - 'semantic_types': [ - 'http://schema.org/Integer', - 'https://metadata.datadrivendiscovery.org/types/Attribute', - ], - }) - - self.assertEqual(test_utils.convert_through_json(metadata.query(('learningData', metadata_base.ALL_ELEMENTS, 1))), { - 'name': 'code', - 'structural_type': 'str', - 'semantic_types': [ - 'https://metadata.datadrivendiscovery.org/types/CategoricalData', - 'https://metadata.datadrivendiscovery.org/types/Attribute', - ], - }) - - for i in [2, 4]: - self.assertEqual(test_utils.convert_through_json(metadata.query(('learningData', metadata_base.ALL_ELEMENTS, i))), { - 'name': ['name', None, 'name'][i - 2], - 'structural_type': 'str', - 'semantic_types': [ - 'http://schema.org/Text', - 'https://metadata.datadrivendiscovery.org/types/Attribute', - ], - }, i) - - self.assertEqual(test_utils.convert_through_json(metadata.query(('learningData', metadata_base.ALL_ELEMENTS, 5))), { - 'name': 'key', - 'structural_type': 'str', - 'semantic_types': [ - 'https://metadata.datadrivendiscovery.org/types/CategoricalData', - 'https://metadata.datadrivendiscovery.org/types/Attribute', - ], - }) - - self.assertEqual(test_utils.convert_through_json(metadata.query(('learningData', metadata_base.ALL_ELEMENTS, 6))), { - 'name': 'year', - 'structural_type': 'str', - 'semantic_types': [ - 'http://schema.org/DateTime', - 'https://metadata.datadrivendiscovery.org/types/Attribute', - ], - }) - - self.assertEqual(test_utils.convert_through_json(metadata.query(('learningData', metadata_base.ALL_ELEMENTS, 7))), { - 'name': 'value', - 'structural_type': 'str', - 'semantic_types': [ - 'http://schema.org/Float', - 'https://metadata.datadrivendiscovery.org/types/SuggestedTarget', - 'https://metadata.datadrivendiscovery.org/types/Attribute', - ], - }) - - def test_row_order(self): - dataset_doc_path = os.path.abspath(os.path.join(os.path.dirname(__file__), 'data', 'datasets', 'image_dataset_1', 'datasetDoc.json')) - - dataset = container.Dataset.load('file://{dataset_doc_path}'.format(dataset_doc_path=dataset_doc_path)) - - dataset_metadata_before = dataset.metadata.to_internal_json_structure() - - hyperparams_class = denormalize.DenormalizePrimitive.metadata.get_hyperparams() - - primitive = denormalize.DenormalizePrimitive(hyperparams=hyperparams_class.defaults().replace({ - 'recursive': True, - 'discard_not_joined_tabular_resources': False, - })) - - denormalized_dataset = primitive.produce(inputs=dataset).value - - self.assertIsInstance(denormalized_dataset, container.Dataset) - - self.assertEqual(len(denormalized_dataset), 1) - - self.assertEqual(denormalized_dataset['learningData'].shape, (5, 3)) - - self.assertEqual(denormalized_dataset['learningData'].values.tolist(), [ - ['0', 'mnist_0_2.png', 'mnist'], - ['1', 'mnist_1_1.png', 'mnist'], - ['2', '001_HandPhoto_left_01.jpg', 'handgeometry'], - ['3', 'cifar10_bird_1.png', 'cifar'], - ['4', 'cifar10_bird_2.png', 'cifar'], - ]) - - self._test_row_order_metadata(denormalized_dataset.metadata, dataset_doc_path) - - self.assertEqual(dataset.metadata.to_internal_json_structure(), dataset_metadata_before) - - def _test_row_order_metadata(self, metadata, dataset_doc_path): - self.maxDiff = None - - self.assertEqual(test_utils.convert_through_json(metadata.query(())), { - 'schema': metadata_base.CONTAINER_SCHEMA_VERSION, - 'structural_type': 'd3m.container.dataset.Dataset', - 'id': 'image_dataset_1', - 'version': '4.0.0', - 'name': 'Image dataset to be used for tests', - 'location_uris': [ - 'file://{dataset_doc_path}'.format(dataset_doc_path=dataset_doc_path), - ], - 'dimension': { - 'name': 'resources', - 'semantic_types': [ - 'https://metadata.datadrivendiscovery.org/types/DatasetResource', - ], - 'length': 1, - }, - 'digest': '9b5553ce5ad84dfcefd379814dc6b11ef60a049479e3e91aa1251f7a5ef7409e', - 'description': 'There are a total of 5 image files, one is a left hand from the handgeometry dataset, two birds from cifar10 dataset and 2 figures from mnist dataset.', - 'source': { - 'license': 'Creative Commons Attribution-NonCommercial 4.0', - 'redacted': False, - }, - 'approximate_stored_size': 24000, - }) - - self.assertEqual(test_utils.convert_through_json(metadata.query(('learningData',))), { - 'structural_type': 'd3m.container.pandas.DataFrame', - 'semantic_types': [ - 'https://metadata.datadrivendiscovery.org/types/Table', - 'https://metadata.datadrivendiscovery.org/types/DatasetEntryPoint', - ], - 'dimension': { - 'name': 'rows', - 'semantic_types': [ - 'https://metadata.datadrivendiscovery.org/types/TabularRow', - ], - 'length': 5, - }, - }) - - self.assertEqual(test_utils.convert_through_json(metadata.query(('learningData', metadata_base.ALL_ELEMENTS,))), { - 'dimension': { - 'name': 'columns', - 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/TabularColumn'], - 'length': 3, - } - }) - - self.assertEqual(test_utils.convert_through_json(metadata.query(('learningData', metadata_base.ALL_ELEMENTS, 0))), { - 'name': 'd3mIndex', - 'structural_type': 'str', - 'semantic_types': [ - 'http://schema.org/Integer', - 'https://metadata.datadrivendiscovery.org/types/PrimaryKey', - ], - }) - - self.assertEqual(test_utils.convert_through_json(metadata.query(('learningData', metadata_base.ALL_ELEMENTS, 1))), { - 'name': 'filename', - 'structural_type': 'str', - 'semantic_types': [ - 'https://metadata.datadrivendiscovery.org/types/FileName', - 'http://schema.org/ImageObject', - 'https://metadata.datadrivendiscovery.org/types/Attribute', - 'https://metadata.datadrivendiscovery.org/types/UniqueKey', - ], - 'location_base_uris': [ - 'file://{dataset_base_path}/media/'.format(dataset_base_path=os.path.dirname(dataset_doc_path)), - ], - 'media_types': [ - 'image/jpeg', - 'image/png', - ], - }) - - self.assertEqual(test_utils.convert_through_json(metadata.query(('learningData', metadata_base.ALL_ELEMENTS, 2))), { - 'name': 'class', - 'structural_type': 'str', - 'semantic_types': [ - 'https://metadata.datadrivendiscovery.org/types/CategoricalData', - 'https://metadata.datadrivendiscovery.org/types/SuggestedTarget', - 'https://metadata.datadrivendiscovery.org/types/Attribute', - ], - }) - - self.assertEqual(test_utils.convert_through_json(metadata.query(('learningData', 0, 1))), { - 'name': 'filename', - 'structural_type': 'str', - 'semantic_types': [ - 'https://metadata.datadrivendiscovery.org/types/FileName', - 'http://schema.org/ImageObject', - 'https://metadata.datadrivendiscovery.org/types/Attribute', - 'https://metadata.datadrivendiscovery.org/types/UniqueKey', - ], - 'location_base_uris': [ - 'file://{dataset_base_path}/media/'.format(dataset_base_path=os.path.dirname(dataset_doc_path)), - ], - 'media_types': [ - 'image/png', - ], - }) - - self.assertEqual(test_utils.convert_through_json(metadata.query(('learningData', 2, 1))), { - 'name': 'filename', - 'structural_type': 'str', - 'semantic_types': [ - 'https://metadata.datadrivendiscovery.org/types/FileName', - 'http://schema.org/ImageObject', - 'https://metadata.datadrivendiscovery.org/types/Attribute', - 'https://metadata.datadrivendiscovery.org/types/UniqueKey', - ], - 'location_base_uris': [ - 'file://{dataset_base_path}/media/'.format(dataset_base_path=os.path.dirname(dataset_doc_path)), - ], - 'media_types': [ - 'image/jpeg', - ], - }) - - -if __name__ == '__main__': - unittest.main() diff --git a/common-primitives/tests/test_extract_columns_semantic_types.py b/common-primitives/tests/test_extract_columns_semantic_types.py deleted file mode 100644 index aff2b59..0000000 --- a/common-primitives/tests/test_extract_columns_semantic_types.py +++ /dev/null @@ -1,203 +0,0 @@ -import os -import unittest - -from d3m import container -from d3m.metadata import base as metadata_base - -from common_primitives import dataset_to_dataframe, extract_columns_semantic_types - -import utils as test_utils - - -class ExtractColumnsBySemanticTypePrimitiveTestCase(unittest.TestCase): - def test_basic(self): - dataset_doc_path = os.path.abspath(os.path.join(os.path.dirname(__file__), 'data', 'datasets', 'iris_dataset_1', 'datasetDoc.json')) - - dataset = container.Dataset.load('file://{dataset_doc_path}'.format(dataset_doc_path=dataset_doc_path)) - - # We set semantic types like runtime would. - dataset.metadata = dataset.metadata.add_semantic_type(('learningData', metadata_base.ALL_ELEMENTS, 5), 'https://metadata.datadrivendiscovery.org/types/Target') - dataset.metadata = dataset.metadata.add_semantic_type(('learningData', metadata_base.ALL_ELEMENTS, 5), 'https://metadata.datadrivendiscovery.org/types/TrueTarget') - dataset.metadata = dataset.metadata.remove_semantic_type(('learningData', metadata_base.ALL_ELEMENTS, 5), 'https://metadata.datadrivendiscovery.org/types/Attribute') - - hyperparams_class = dataset_to_dataframe.DatasetToDataFramePrimitive.metadata.get_hyperparams() - - primitive = dataset_to_dataframe.DatasetToDataFramePrimitive(hyperparams=hyperparams_class.defaults()) - - call_metadata = primitive.produce(inputs=dataset) - - dataframe = call_metadata.value - - hyperparams_class = extract_columns_semantic_types.ExtractColumnsBySemanticTypesPrimitive.metadata.get_hyperparams() - - primitive = extract_columns_semantic_types.ExtractColumnsBySemanticTypesPrimitive(hyperparams=hyperparams_class.defaults().replace({'semantic_types': ('https://metadata.datadrivendiscovery.org/types/Attribute', 'https://metadata.datadrivendiscovery.org/types/PrimaryKey')})) - - call_metadata = primitive.produce(inputs=dataframe) - - dataframe = call_metadata.value - - self._test_metadata(dataframe.metadata) - - def _test_metadata(self, metadata): - self.maxDiff = None - - self.assertEqual(test_utils.convert_through_json(metadata.query(())), { - 'schema': metadata_base.CONTAINER_SCHEMA_VERSION, - 'structural_type': 'd3m.container.pandas.DataFrame', - 'semantic_types': [ - 'https://metadata.datadrivendiscovery.org/types/Table', - ], - 'dimension': { - 'name': 'rows', - 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/TabularRow'], - 'length': 150, - } - }) - - self.assertEqual(test_utils.convert_through_json(metadata.query((metadata_base.ALL_ELEMENTS,))), { - 'dimension': { - 'name': 'columns', - 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/TabularColumn'], - 'length': 5, - } - }) - - self.assertEqual(test_utils.convert_through_json(metadata.query((metadata_base.ALL_ELEMENTS, 0))), { - 'name': 'd3mIndex', - 'structural_type': 'str', - 'semantic_types': [ - 'http://schema.org/Integer', - 'https://metadata.datadrivendiscovery.org/types/PrimaryKey', - ], - }) - - for i in range(1, 5): - self.assertEqual(test_utils.convert_through_json(metadata.query((metadata_base.ALL_ELEMENTS, i))), { - 'name': ['sepalLength', 'sepalWidth', 'petalLength', 'petalWidth'][i - 1], - 'structural_type': 'str', - 'semantic_types': [ - 'http://schema.org/Float', - 'https://metadata.datadrivendiscovery.org/types/Attribute', - ], - }, i) - - self.assertTrue(metadata.get_elements((metadata_base.ALL_ELEMENTS,)) in [[0, 1, 2, 3, 4], [metadata_base.ALL_ELEMENTS, 0, 1, 2, 3, 4]]) - - def test_set(self): - dataset_doc_path = os.path.abspath( - os.path.join( - os.path.dirname(__file__), - "data", - "datasets", - "boston_dataset_1", - "datasetDoc.json", - ) - ) - - dataset = container.Dataset.load( - "file://{dataset_doc_path}".format(dataset_doc_path=dataset_doc_path) - ) - - # We set semantic types like runtime would. - dataset.metadata = dataset.metadata.add_semantic_type( - ("learningData", metadata_base.ALL_ELEMENTS, 14), - "https://metadata.datadrivendiscovery.org/types/Target", - ) - dataset.metadata = dataset.metadata.add_semantic_type( - ("learningData", metadata_base.ALL_ELEMENTS, 14), - "https://metadata.datadrivendiscovery.org/types/TrueTarget", - ) - dataset.metadata = dataset.metadata.remove_semantic_type( - ("learningData", metadata_base.ALL_ELEMENTS, 14), - "https://metadata.datadrivendiscovery.org/types/Attribute", - ) - - hyperparams_class = ( - dataset_to_dataframe.DatasetToDataFramePrimitive.metadata.get_hyperparams() - ) - - primitive = dataset_to_dataframe.DatasetToDataFramePrimitive( - hyperparams=hyperparams_class.defaults() - ) - - call_metadata = primitive.produce(inputs=dataset) - - dataframe = call_metadata.value - - hyperparams_class = ( - extract_columns_semantic_types.ExtractColumnsBySemanticTypesPrimitive.metadata.get_hyperparams() - ) - - primitive = extract_columns_semantic_types.ExtractColumnsBySemanticTypesPrimitive( - hyperparams=hyperparams_class.defaults().replace( - { - "semantic_types": ( - "https://metadata.datadrivendiscovery.org/types/Attribute", - "http://schema.org/Integer", - ), - "match_logic": "equal", - } - ) - ) - - call_metadata = primitive.produce(inputs=dataframe) - - dataframe = call_metadata.value - - self._test_equal_metadata(dataframe.metadata) - - def _test_equal_metadata(self, metadata): - self.maxDiff = None - - self.assertEqual( - test_utils.convert_through_json(metadata.query(())), - { - "structural_type": "d3m.container.pandas.DataFrame", - "semantic_types": [ - "https://metadata.datadrivendiscovery.org/types/Table" - ], - "dimension": { - "name": "rows", - "semantic_types": [ - "https://metadata.datadrivendiscovery.org/types/TabularRow" - ], - "length": 506, - }, - "schema": "https://metadata.datadrivendiscovery.org/schemas/v0/container.json", - }, - ) - - # only one column that should match - self.assertEqual( - test_utils.convert_through_json( - metadata.query((metadata_base.ALL_ELEMENTS,)) - ), - { - "dimension": { - "name": "columns", - "semantic_types": [ - "https://metadata.datadrivendiscovery.org/types/TabularColumn" - ], - "length": 1, - } - }, - ) - - self.assertEqual( - test_utils.convert_through_json( - metadata.query((metadata_base.ALL_ELEMENTS, 0)) - ), - { - "name": "TAX", - "structural_type": "str", - "semantic_types": [ - "http://schema.org/Integer", - "https://metadata.datadrivendiscovery.org/types/Attribute", - ], - "description": "full-value property-tax rate per $10,000", - }, - ) - - -if __name__ == '__main__': - unittest.main() diff --git a/common-primitives/tests/test_extract_columns_structural_types.py b/common-primitives/tests/test_extract_columns_structural_types.py deleted file mode 100644 index 2271181..0000000 --- a/common-primitives/tests/test_extract_columns_structural_types.py +++ /dev/null @@ -1,89 +0,0 @@ -import os -import unittest - -from d3m import container -from d3m.metadata import base as metadata_base - -from common_primitives import dataset_to_dataframe, extract_columns_structural_types, column_parser - -import utils as test_utils - - -class ExtractColumnsByStructuralTypesPrimitiveTestCase(unittest.TestCase): - def test_basic(self): - dataset_doc_path = os.path.abspath(os.path.join(os.path.dirname(__file__), 'data', 'datasets', 'iris_dataset_1', 'datasetDoc.json')) - - dataset = container.Dataset.load('file://{dataset_doc_path}'.format(dataset_doc_path=dataset_doc_path)) - - hyperparams_class = dataset_to_dataframe.DatasetToDataFramePrimitive.metadata.get_hyperparams() - - primitive = dataset_to_dataframe.DatasetToDataFramePrimitive(hyperparams=hyperparams_class.defaults()) - - call_metadata = primitive.produce(inputs=dataset) - - dataframe = call_metadata.value - - hyperparams_class = column_parser.ColumnParserPrimitive.metadata.get_hyperparams() - - primitive = column_parser.ColumnParserPrimitive(hyperparams=hyperparams_class.defaults()) - - call_metadata = primitive.produce(inputs=dataframe) - - dataframe = call_metadata.value - - hyperparams_class = extract_columns_structural_types.ExtractColumnsByStructuralTypesPrimitive.metadata.get_hyperparams() - - primitive = extract_columns_structural_types.ExtractColumnsByStructuralTypesPrimitive(hyperparams=hyperparams_class.defaults().replace({'structural_types': ('int',)})) - - call_metadata = primitive.produce(inputs=dataframe) - - dataframe = call_metadata.value - - self._test_metadata(dataframe.metadata) - - def _test_metadata(self, metadata): - self.maxDiff = None - - self.assertEqual(test_utils.convert_through_json(metadata.query(())), { - 'schema': metadata_base.CONTAINER_SCHEMA_VERSION, - 'structural_type': 'd3m.container.pandas.DataFrame', - 'semantic_types': [ - 'https://metadata.datadrivendiscovery.org/types/Table', - ], - 'dimension': { - 'name': 'rows', - 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/TabularRow'], - 'length': 150, - } - }) - - self.assertEqual(test_utils.convert_through_json(metadata.query((metadata_base.ALL_ELEMENTS,))), { - 'dimension': { - 'name': 'columns', - 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/TabularColumn'], - 'length': 2, - } - }) - - self.assertEqual(test_utils.convert_through_json(metadata.query((metadata_base.ALL_ELEMENTS, 0))), { - 'name': 'd3mIndex', - 'structural_type': 'int', - 'semantic_types': [ - 'http://schema.org/Integer', - 'https://metadata.datadrivendiscovery.org/types/PrimaryKey', - ], - }) - - self.assertEqual(test_utils.convert_through_json(metadata.query((metadata_base.ALL_ELEMENTS, 1))), { - 'name': 'species', - 'structural_type': 'int', - 'semantic_types': [ - 'https://metadata.datadrivendiscovery.org/types/CategoricalData', - 'https://metadata.datadrivendiscovery.org/types/SuggestedTarget', - 'https://metadata.datadrivendiscovery.org/types/Attribute', - ], - }) - - -if __name__ == '__main__': - unittest.main() diff --git a/common-primitives/tests/test_fixed_split.py b/common-primitives/tests/test_fixed_split.py deleted file mode 100644 index 7059ada..0000000 --- a/common-primitives/tests/test_fixed_split.py +++ /dev/null @@ -1,148 +0,0 @@ -import os -import pickle -import unittest - -from d3m import container -from d3m.metadata import base as metadata_base - -from common_primitives import fixed_split - - -class FixedSplitDatasetSplitPrimitiveTestCase(unittest.TestCase): - def test_produce_train_values(self): - dataset_doc_path = os.path.abspath(os.path.join(os.path.dirname(__file__), '..', 'tests', 'data', 'datasets', 'iris_dataset_1', 'datasetDoc.json')) - - dataset = container.Dataset.load('file://{dataset_doc_path}'.format(dataset_doc_path=dataset_doc_path)) - - # We set semantic types like runtime would. - dataset.metadata = dataset.metadata.add_semantic_type(('learningData', metadata_base.ALL_ELEMENTS, 5), 'https://metadata.datadrivendiscovery.org/types/Target') - dataset.metadata = dataset.metadata.add_semantic_type(('learningData', metadata_base.ALL_ELEMENTS, 5), 'https://metadata.datadrivendiscovery.org/types/TrueTarget') - dataset.metadata = dataset.metadata.remove_semantic_type(('learningData', metadata_base.ALL_ELEMENTS, 5), 'https://metadata.datadrivendiscovery.org/types/Attribute') - - hyperparams_class = fixed_split.FixedSplitDatasetSplitPrimitive.metadata.get_hyperparams() - - hyperparams = hyperparams_class.defaults().replace({ - 'primary_index_values': ['9', '11', '13'], - }) - - # We want to make sure "primary_index_values" is encoded just as a list and not - # a pickle because runtime populates this primitive as a list from a split file. - self.assertEqual(hyperparams.values_to_json_structure(), {'primary_index_values': ['9', '11', '13'], 'row_indices': [], 'delete_recursive': False}) - - primitive = fixed_split.FixedSplitDatasetSplitPrimitive(hyperparams=hyperparams) - - primitive.set_training_data(dataset=dataset) - primitive.fit() - - # To test that pickling works. - pickle.dumps(primitive) - - results = primitive.produce(inputs=container.List([0], generate_metadata=True)).value - - self.assertEqual(len(results), 1) - - for dataset in results: - self.assertEqual(len(dataset), 1) - - self.assertEqual(results[0]['learningData'].shape[0], 147) - self.assertEqual(list(results[0]['learningData'].iloc[:, 0]), [str(i) for i in range(150) if i not in [9, 11, 13]]) - - def test_produce_score_values(self): - dataset_doc_path = os.path.abspath(os.path.join(os.path.dirname(__file__), '..', 'tests', 'data', 'datasets', 'iris_dataset_1', 'datasetDoc.json')) - - dataset = container.Dataset.load('file://{dataset_doc_path}'.format(dataset_doc_path=dataset_doc_path)) - - # We set semantic types like runtime would. - dataset.metadata = dataset.metadata.add_semantic_type(('learningData', metadata_base.ALL_ELEMENTS, 5), 'https://metadata.datadrivendiscovery.org/types/Target') - dataset.metadata = dataset.metadata.add_semantic_type(('learningData', metadata_base.ALL_ELEMENTS, 5), 'https://metadata.datadrivendiscovery.org/types/TrueTarget') - dataset.metadata = dataset.metadata.remove_semantic_type(('learningData', metadata_base.ALL_ELEMENTS, 5), 'https://metadata.datadrivendiscovery.org/types/Attribute') - - hyperparams_class = fixed_split.FixedSplitDatasetSplitPrimitive.metadata.get_hyperparams() - - hyperparams = hyperparams_class.defaults().replace({ - 'primary_index_values': ['9', '11', '13'], - }) - - # We want to make sure "primary_index_values" is encoded just as a list and not - # a pickle because runtime populates this primitive as a list from a split file. - self.assertEqual(hyperparams.values_to_json_structure(), {'primary_index_values': ['9', '11', '13'], 'row_indices': [], 'delete_recursive': False}) - - primitive = fixed_split.FixedSplitDatasetSplitPrimitive(hyperparams=hyperparams) - - primitive.set_training_data(dataset=dataset) - primitive.fit() - - results = primitive.produce_score_data(inputs=container.List([0], generate_metadata=True)).value - - self.assertEqual(len(results), 1) - - for dataset in results: - self.assertEqual(len(dataset), 1) - - self.assertEqual(results[0]['learningData'].shape[0], 3) - self.assertEqual(list(results[0]['learningData'].iloc[:, 0]), [str(i) for i in range(150) if i in [9, 11, 13]]) - - def test_produce_train_indices(self): - dataset_doc_path = os.path.abspath(os.path.join(os.path.dirname(__file__), '..', 'tests', 'data', 'datasets', 'iris_dataset_1', 'datasetDoc.json')) - - dataset = container.Dataset.load('file://{dataset_doc_path}'.format(dataset_doc_path=dataset_doc_path)) - - # We set semantic types like runtime would. - dataset.metadata = dataset.metadata.add_semantic_type(('learningData', metadata_base.ALL_ELEMENTS, 5), 'https://metadata.datadrivendiscovery.org/types/Target') - dataset.metadata = dataset.metadata.add_semantic_type(('learningData', metadata_base.ALL_ELEMENTS, 5), 'https://metadata.datadrivendiscovery.org/types/TrueTarget') - dataset.metadata = dataset.metadata.remove_semantic_type(('learningData', metadata_base.ALL_ELEMENTS, 5), 'https://metadata.datadrivendiscovery.org/types/Attribute') - - hyperparams_class = fixed_split.FixedSplitDatasetSplitPrimitive.metadata.get_hyperparams() - - primitive = fixed_split.FixedSplitDatasetSplitPrimitive(hyperparams=hyperparams_class.defaults().replace({ - 'row_indices': [9, 11, 13], - })) - - primitive.set_training_data(dataset=dataset) - primitive.fit() - - # To test that pickling works. - pickle.dumps(primitive) - - results = primitive.produce(inputs=container.List([0], generate_metadata=True)).value - - self.assertEqual(len(results), 1) - - for dataset in results: - self.assertEqual(len(dataset), 1) - - self.assertEqual(results[0]['learningData'].shape[0], 147) - self.assertEqual(list(results[0]['learningData'].iloc[:, 0]), [str(i) for i in range(150) if i not in [9, 11, 13]]) - - def test_produce_score_indices(self): - dataset_doc_path = os.path.abspath(os.path.join(os.path.dirname(__file__), '..', 'tests', 'data', 'datasets', 'iris_dataset_1', 'datasetDoc.json')) - - dataset = container.Dataset.load('file://{dataset_doc_path}'.format(dataset_doc_path=dataset_doc_path)) - - # We set semantic types like runtime would. - dataset.metadata = dataset.metadata.add_semantic_type(('learningData', metadata_base.ALL_ELEMENTS, 5), 'https://metadata.datadrivendiscovery.org/types/Target') - dataset.metadata = dataset.metadata.add_semantic_type(('learningData', metadata_base.ALL_ELEMENTS, 5), 'https://metadata.datadrivendiscovery.org/types/TrueTarget') - dataset.metadata = dataset.metadata.remove_semantic_type(('learningData', metadata_base.ALL_ELEMENTS, 5), 'https://metadata.datadrivendiscovery.org/types/Attribute') - - hyperparams_class = fixed_split.FixedSplitDatasetSplitPrimitive.metadata.get_hyperparams() - - primitive = fixed_split.FixedSplitDatasetSplitPrimitive(hyperparams=hyperparams_class.defaults().replace({ - 'row_indices': [9, 11, 13], - })) - - primitive.set_training_data(dataset=dataset) - primitive.fit() - - results = primitive.produce_score_data(inputs=container.List([0], generate_metadata=True)).value - - self.assertEqual(len(results), 1) - - for dataset in results: - self.assertEqual(len(dataset), 1) - - self.assertEqual(results[0]['learningData'].shape[0], 3) - self.assertEqual(list(results[0]['learningData'].iloc[:, 0]), [str(i) for i in range(150) if i in [9, 11, 13]]) - - -if __name__ == '__main__': - unittest.main() diff --git a/common-primitives/tests/test_grouping_field_compose.py b/common-primitives/tests/test_grouping_field_compose.py deleted file mode 100644 index 5380be8..0000000 --- a/common-primitives/tests/test_grouping_field_compose.py +++ /dev/null @@ -1,56 +0,0 @@ -import math -import os.path -import unittest - -from d3m import container, utils -from d3m.metadata import base as metadata_base - -from common_primitives import dataset_to_dataframe, grouping_field_compose - -import utils as test_utils - - -class GroupingFieldComposePrimitiveTestCase(unittest.TestCase): - def test_compose_two_suggested_fields(self): - dataset_doc_path = os.path.abspath(os.path.join(os.path.dirname(__file__), 'data', 'datasets', 'timeseries_dataset_3', 'datasetDoc.json')) - dataset = container.Dataset.load('file://{dataset_doc_path}'.format(dataset_doc_path=dataset_doc_path)) - resource = test_utils.get_dataframe(dataset) - - compose_hyperparams_class = grouping_field_compose.GroupingFieldComposePrimitive.metadata.get_hyperparams() - hp = compose_hyperparams_class.defaults().replace({ - 'join_char': '-', - 'output_name': 'grouping' - }) - compose_primitive = grouping_field_compose.GroupingFieldComposePrimitive(hyperparams=hp) - new_dataframe = compose_primitive.produce(inputs=resource).value - - self.assertEqual(new_dataframe.shape, (40, 6)) - self.assertEqual('abbv-2013', new_dataframe['grouping'][0]) - - col_meta = new_dataframe.metadata.query((metadata_base.ALL_ELEMENTS, 5)) - self.assertEqual(col_meta['name'], 'grouping') - self.assertTrue('https://metadata.datadrivendiscovery.org/types/GroupingKey' in col_meta['semantic_types']) - - def test_compose_two_specified_fields(self): - dataset_doc_path = os.path.abspath(os.path.join(os.path.dirname(__file__), 'data', 'datasets', 'timeseries_dataset_3', 'datasetDoc.json')) - dataset = container.Dataset.load('file://{dataset_doc_path}'.format(dataset_doc_path=dataset_doc_path)) - resource = test_utils.get_dataframe(dataset) - - compose_hyperparams_class = grouping_field_compose.GroupingFieldComposePrimitive.metadata.get_hyperparams() - hp = compose_hyperparams_class.defaults().replace({ - 'columns': [1,3], - 'join_char': '-', - 'output_name': 'grouping' - }) - compose_primitive = grouping_field_compose.GroupingFieldComposePrimitive(hyperparams=hp) - new_dataframe = compose_primitive.produce(inputs=resource).value - - self.assertEqual(new_dataframe.shape, (40, 6)) - self.assertEqual('abbv-11-01', new_dataframe['grouping'][0]) - - col_meta = new_dataframe.metadata.query((metadata_base.ALL_ELEMENTS, 5)) - self.assertEqual(col_meta['name'], 'grouping') - self.assertTrue('https://metadata.datadrivendiscovery.org/types/GroupingKey' in col_meta['semantic_types']) - -if __name__ == '__main__': - unittest.main() diff --git a/common-primitives/tests/test_horizontal_concat.py b/common-primitives/tests/test_horizontal_concat.py deleted file mode 100644 index 0f8e78f..0000000 --- a/common-primitives/tests/test_horizontal_concat.py +++ /dev/null @@ -1,183 +0,0 @@ -import unittest -import os - -import numpy - -from d3m import container -from d3m.metadata import base as metadata_base - -from common_primitives import dataset_to_dataframe, extract_columns_semantic_types, horizontal_concat - - -class HorizontalConcatPrimitiveTestCase(unittest.TestCase): - def test_basic(self): - test_data_inputs = {'col1': [1.0, 2.0, 3.0]} - dataframe_inputs = container.DataFrame(data=test_data_inputs, generate_metadata=True) - - test_data_targets = {'col2': [1, 2 ,3]} - dataframe_targets = container.DataFrame(data=test_data_targets, generate_metadata=True) - - hyperparams_class = horizontal_concat.HorizontalConcatPrimitive.metadata.get_hyperparams() - - primitive = horizontal_concat.HorizontalConcatPrimitive(hyperparams=hyperparams_class.defaults()) - - call_result = primitive.produce(left=dataframe_inputs, right=dataframe_targets) - - dataframe_concat = call_result.value - - self.assertEqual(dataframe_concat.values.tolist(), [[1.0, 1.0], [2.0, 2.0], [3.0, 3.0]]) - - self._test_basic_metadata(dataframe_concat.metadata) - - def _test_basic_metadata(self, metadata): - self.assertEqual(metadata.query((metadata_base.ALL_ELEMENTS,))['dimension']['length'], 2) - self.assertEqual(metadata.query((metadata_base.ALL_ELEMENTS, 0))['name'], 'col1') - self.assertEqual(metadata.query((metadata_base.ALL_ELEMENTS, 0))['structural_type'], numpy.float64) - self.assertEqual(metadata.query((metadata_base.ALL_ELEMENTS, 1))['name'], 'col2') - self.assertEqual(metadata.query((metadata_base.ALL_ELEMENTS, 1))['structural_type'], numpy.int64) - - def _get_iris(self): - dataset_doc_path = os.path.abspath(os.path.join(os.path.dirname(__file__), 'data', 'datasets', 'iris_dataset_1', 'datasetDoc.json')) - - dataset = container.Dataset.load('file://{dataset_doc_path}'.format(dataset_doc_path=dataset_doc_path)) - - # We set semantic types like runtime would. - dataset.metadata = dataset.metadata.add_semantic_type(('learningData', metadata_base.ALL_ELEMENTS, 5), 'https://metadata.datadrivendiscovery.org/types/Target') - dataset.metadata = dataset.metadata.add_semantic_type(('learningData', metadata_base.ALL_ELEMENTS, 5), 'https://metadata.datadrivendiscovery.org/types/TrueTarget') - dataset.metadata = dataset.metadata.remove_semantic_type(('learningData', metadata_base.ALL_ELEMENTS, 5), 'https://metadata.datadrivendiscovery.org/types/Attribute') - - hyperparams_class = dataset_to_dataframe.DatasetToDataFramePrimitive.metadata.get_hyperparams() - - primitive = dataset_to_dataframe.DatasetToDataFramePrimitive(hyperparams=hyperparams_class.defaults()) - - call_metadata = primitive.produce(inputs=dataset) - - dataframe = call_metadata.value - - return dataframe - - def _get_iris_columns(self): - dataframe = self._get_iris() - - hyperparams_class = extract_columns_semantic_types.ExtractColumnsBySemanticTypesPrimitive.metadata.get_hyperparams() - - primitive = extract_columns_semantic_types.ExtractColumnsBySemanticTypesPrimitive(hyperparams=hyperparams_class.defaults().replace({'semantic_types': ('https://metadata.datadrivendiscovery.org/types/PrimaryKey',)})) - - call_metadata = primitive.produce(inputs=dataframe) - - index = call_metadata.value - - primitive = extract_columns_semantic_types.ExtractColumnsBySemanticTypesPrimitive(hyperparams=hyperparams_class.defaults().replace({'semantic_types': ('https://metadata.datadrivendiscovery.org/types/Attribute',)})) - - call_metadata = primitive.produce(inputs=dataframe) - - attributes = call_metadata.value - - primitive = extract_columns_semantic_types.ExtractColumnsBySemanticTypesPrimitive(hyperparams=hyperparams_class.defaults().replace({'semantic_types': ('https://metadata.datadrivendiscovery.org/types/SuggestedTarget',)})) - - call_metadata = primitive.produce(inputs=dataframe) - - targets = call_metadata.value - - return dataframe, index, attributes, targets - - def test_iris(self): - dataframe, index, attributes, targets = self._get_iris_columns() - - hyperparams_class = horizontal_concat.HorizontalConcatPrimitive.metadata.get_hyperparams() - - primitive = horizontal_concat.HorizontalConcatPrimitive(hyperparams=hyperparams_class.defaults()) - - call_metadata = primitive.produce(left=index, right=attributes) - - call_metadata = primitive.produce(left=call_metadata.value, right=targets) - - new_dataframe = call_metadata.value - - self.assertEqual(dataframe.values.tolist(), new_dataframe.values.tolist()) - - self._test_iris_metadata(dataframe.metadata, new_dataframe.metadata) - - def _test_iris_metadata(self, metadata, new_metadata): - self.assertEqual(metadata.query((metadata_base.ALL_ELEMENTS,))['dimension']['length'], new_metadata.query((metadata_base.ALL_ELEMENTS,))['dimension']['length']) - - for i in range(new_metadata.query((metadata_base.ALL_ELEMENTS,))['dimension']['length']): - self.assertEqual(metadata.query((metadata_base.ALL_ELEMENTS, i)), new_metadata.query((metadata_base.ALL_ELEMENTS, i)), i) - - def _get_iris_columns_with_index(self): - dataframe = self._get_iris() - - hyperparams_class = extract_columns_semantic_types.ExtractColumnsBySemanticTypesPrimitive.metadata.get_hyperparams() - - primitive = extract_columns_semantic_types.ExtractColumnsBySemanticTypesPrimitive(hyperparams=hyperparams_class.defaults().replace({'semantic_types': ('https://metadata.datadrivendiscovery.org/types/PrimaryKey',)})) - - call_metadata = primitive.produce(inputs=dataframe) - - index = call_metadata.value - - primitive = extract_columns_semantic_types.ExtractColumnsBySemanticTypesPrimitive(hyperparams=hyperparams_class.defaults().replace({'semantic_types': ('https://metadata.datadrivendiscovery.org/types/PrimaryKey', 'https://metadata.datadrivendiscovery.org/types/Attribute')})) - - call_metadata = primitive.produce(inputs=dataframe) - - attributes = call_metadata.value - - primitive = extract_columns_semantic_types.ExtractColumnsBySemanticTypesPrimitive(hyperparams=hyperparams_class.defaults().replace({'semantic_types': ('https://metadata.datadrivendiscovery.org/types/PrimaryKey', 'https://metadata.datadrivendiscovery.org/types/SuggestedTarget')})) - - call_metadata = primitive.produce(inputs=dataframe) - - targets = call_metadata.value - - return dataframe, index, attributes, targets - - def test_iris_with_index_removed(self): - dataframe, index, attributes, targets = self._get_iris_columns_with_index() - - hyperparams_class = horizontal_concat.HorizontalConcatPrimitive.metadata.get_hyperparams() - - primitive = horizontal_concat.HorizontalConcatPrimitive(hyperparams=hyperparams_class.defaults().replace({'use_index': False})) - - call_metadata = primitive.produce(left=index, right=attributes) - - call_metadata = primitive.produce(left=call_metadata.value, right=targets) - - new_dataframe = call_metadata.value - - self.assertEqual(dataframe.values.tolist(), new_dataframe.values.tolist()) - - self._test_iris_with_index_removed_metadata(dataframe.metadata, new_dataframe.metadata) - - def _test_iris_with_index_removed_metadata(self, metadata, new_metadata): - self.assertEqual(metadata.query((metadata_base.ALL_ELEMENTS,))['dimension']['length'], new_metadata.query((metadata_base.ALL_ELEMENTS,))['dimension']['length']) - - for i in range(new_metadata.query((metadata_base.ALL_ELEMENTS,))['dimension']['length']): - self.assertEqual(metadata.query((metadata_base.ALL_ELEMENTS, i)), new_metadata.query((metadata_base.ALL_ELEMENTS, i)), i) - - def test_iris_with_index_reorder(self): - dataframe, index, attributes, targets = self._get_iris_columns_with_index() - - # Let's make problems. - attributes = attributes.sort_values(by='sepalLength').reset_index(drop=True) - - hyperparams_class = horizontal_concat.HorizontalConcatPrimitive.metadata.get_hyperparams() - - primitive = horizontal_concat.HorizontalConcatPrimitive(hyperparams=hyperparams_class.defaults()) - - call_metadata = primitive.produce(left=index, right=attributes) - - call_metadata = primitive.produce(left=call_metadata.value, right=targets) - - new_dataframe = call_metadata.value - - self.assertEqual(dataframe.values.tolist(), new_dataframe.values.tolist()) - - self._test_iris_with_index_reorder_metadata(dataframe.metadata, new_dataframe.metadata) - - def _test_iris_with_index_reorder_metadata(self, metadata, new_metadata): - self.assertEqual(metadata.query((metadata_base.ALL_ELEMENTS,))['dimension']['length'], new_metadata.query((metadata_base.ALL_ELEMENTS,))['dimension']['length']) - - for i in range(new_metadata.query((metadata_base.ALL_ELEMENTS,))['dimension']['length']): - self.assertEqual(metadata.query((metadata_base.ALL_ELEMENTS, i)), new_metadata.query((metadata_base.ALL_ELEMENTS, i)), i) - - -if __name__ == '__main__': - unittest.main() diff --git a/common-primitives/tests/test_kfold_split.py b/common-primitives/tests/test_kfold_split.py deleted file mode 100644 index 9983a6e..0000000 --- a/common-primitives/tests/test_kfold_split.py +++ /dev/null @@ -1,100 +0,0 @@ -import os -import pickle -import unittest - -from d3m import container -from d3m.metadata import base as metadata_base - -from common_primitives import kfold_split - - -class KFoldDatasetSplitPrimitiveTestCase(unittest.TestCase): - def test_produce_train(self): - dataset_doc_path = os.path.abspath(os.path.join(os.path.dirname(__file__), '..', 'tests', 'data', 'datasets', 'database_dataset_1', 'datasetDoc.json')) - - dataset = container.Dataset.load('file://{dataset_doc_path}'.format(dataset_doc_path=dataset_doc_path)) - - # We set semantic types like runtime would. - dataset.metadata = dataset.metadata.add_semantic_type(('learningData', metadata_base.ALL_ELEMENTS, 4), 'https://metadata.datadrivendiscovery.org/types/Target') - dataset.metadata = dataset.metadata.add_semantic_type(('learningData', metadata_base.ALL_ELEMENTS, 4), 'https://metadata.datadrivendiscovery.org/types/TrueTarget') - dataset.metadata = dataset.metadata.remove_semantic_type(('learningData', metadata_base.ALL_ELEMENTS, 4), 'https://metadata.datadrivendiscovery.org/types/Attribute') - - hyperparams_class = kfold_split.KFoldDatasetSplitPrimitive.metadata.get_hyperparams() - - primitive = kfold_split.KFoldDatasetSplitPrimitive(hyperparams=hyperparams_class.defaults().replace({ - 'number_of_folds': 10, - 'shuffle': True, - 'delete_recursive': True, - })) - - primitive.set_training_data(dataset=dataset) - primitive.fit() - - # To test that pickling works. - pickle.dumps(primitive) - - results = primitive.produce(inputs=container.List([0, 1], generate_metadata=True)).value - - self.assertEqual(len(results), 2) - - for dataset in results: - self.assertEqual(len(dataset), 4) - - self.assertEqual(results[0]['codes'].shape[0], 3) - self.assertEqual(results[1]['codes'].shape[0], 3) - - self.assertEqual(set(results[0]['codes'].iloc[:, 0]), {'AAA', 'BBB', 'CCC'}) - self.assertEqual(len(results[0]['learningData'].iloc[:, 0]), 40) - self.assertEqual(set(results[0]['learningData'].iloc[:, 1]), {'AAA', 'BBB', 'CCC'}) - self.assertEqual(set(results[0]['learningData'].iloc[:, 2]), {'aaa', 'bbb', 'ccc', 'ddd', 'eee'}) - self.assertEqual(set(results[0]['learningData'].iloc[:, 3]), {'1990', '2000', '2010'}) - - self.assertEqual(set(results[1]['codes'].iloc[:, 0]), {'AAA', 'BBB', 'CCC'}) - self.assertEqual(len(results[1]['learningData'].iloc[:, 0]), 40) - self.assertEqual(set(results[1]['learningData'].iloc[:, 1]), {'AAA', 'BBB', 'CCC'}) - self.assertEqual(set(results[1]['learningData'].iloc[:, 2]), {'aaa', 'bbb', 'ccc', 'ddd', 'eee'}) - self.assertEqual(set(results[1]['learningData'].iloc[:, 3]), {'1990', '2000', '2010'}) - - def test_produce_score(self): - dataset_doc_path = os.path.abspath(os.path.join(os.path.dirname(__file__), '..', 'tests', 'data', 'datasets', 'database_dataset_1', 'datasetDoc.json')) - - dataset = container.Dataset.load('file://{dataset_doc_path}'.format(dataset_doc_path=dataset_doc_path)) - - # We set semantic types like runtime would. - dataset.metadata = dataset.metadata.add_semantic_type(('learningData', metadata_base.ALL_ELEMENTS, 4), 'https://metadata.datadrivendiscovery.org/types/Target') - dataset.metadata = dataset.metadata.add_semantic_type(('learningData', metadata_base.ALL_ELEMENTS, 4), 'https://metadata.datadrivendiscovery.org/types/TrueTarget') - dataset.metadata = dataset.metadata.remove_semantic_type(('learningData', metadata_base.ALL_ELEMENTS, 4), 'https://metadata.datadrivendiscovery.org/types/Attribute') - - hyperparams_class = kfold_split.KFoldDatasetSplitPrimitive.metadata.get_hyperparams() - - primitive = kfold_split.KFoldDatasetSplitPrimitive(hyperparams=hyperparams_class.defaults().replace({ - 'number_of_folds': 10, - 'shuffle': True, - 'delete_recursive': True, - })) - - primitive.set_training_data(dataset=dataset) - primitive.fit() - - results = primitive.produce_score_data(inputs=container.List([0, 1], generate_metadata=True)).value - - self.assertEqual(len(results), 2) - - for dataset in results: - self.assertEqual(len(dataset), 4) - - self.assertEqual(set(results[0]['codes'].iloc[:, 0]), {'AAA', 'BBB'}) - self.assertEqual(set(results[0]['learningData'].iloc[:, 0]), {'5', '11', '28', '31', '38'}) - self.assertEqual(set(results[0]['learningData'].iloc[:, 1]), {'AAA', 'BBB'}) - self.assertEqual(set(results[0]['learningData'].iloc[:, 2]), {'aaa', 'bbb', 'ddd', 'eee'}) - self.assertEqual(set(results[0]['learningData'].iloc[:, 3]), {'1990', '2000'}) - - self.assertEqual(set(results[1]['codes'].iloc[:, 0]), {'BBB', 'CCC'}) - self.assertEqual(set(results[1]['learningData'].iloc[:, 0]), {'12', '26', '29', '32', '39'}) - self.assertEqual(set(results[1]['learningData'].iloc[:, 1]), {'BBB', 'CCC'}) - self.assertEqual(set(results[1]['learningData'].iloc[:, 2]), {'bbb', 'ccc', 'ddd', 'eee'}) - self.assertEqual(set(results[1]['learningData'].iloc[:, 3]), {'1990', '2000', '2010'}) - - -if __name__ == '__main__': - unittest.main() diff --git a/common-primitives/tests/test_kfold_timeseries_split.py b/common-primitives/tests/test_kfold_timeseries_split.py deleted file mode 100644 index 885ab2e..0000000 --- a/common-primitives/tests/test_kfold_timeseries_split.py +++ /dev/null @@ -1,223 +0,0 @@ -import os -import pickle -import unittest - -from d3m import container -from d3m.metadata import base as metadata_base - -from common_primitives import kfold_split_timeseries - - -class KFoldTimeSeriesSplitPrimitiveTestCase(unittest.TestCase): - def test_produce_train_timeseries_1(self): - dataset_doc_path = os.path.abspath(os.path.join(os.path.dirname(__file__), '..', 'tests', 'data', 'datasets', 'timeseries_dataset_1', 'datasetDoc.json')) - - dataset = container.Dataset.load('file://{dataset_doc_path}'.format(dataset_doc_path=dataset_doc_path)) - - # We set semantic types like runtime would. - dataset.metadata = dataset.metadata.add_semantic_type(('learningData', metadata_base.ALL_ELEMENTS, 4), 'https://metadata.datadrivendiscovery.org/types/Target') - dataset.metadata = dataset.metadata.add_semantic_type(('learningData', metadata_base.ALL_ELEMENTS, 4), 'https://metadata.datadrivendiscovery.org/types/TrueTarget') - dataset.metadata = dataset.metadata.remove_semantic_type(('learningData', metadata_base.ALL_ELEMENTS, 4), 'https://metadata.datadrivendiscovery.org/types/Attribute') - - hyperparams_class = kfold_split_timeseries.KFoldTimeSeriesSplitPrimitive.metadata.get_hyperparams() - - folds = 5 - primitive = kfold_split_timeseries.KFoldTimeSeriesSplitPrimitive(hyperparams=hyperparams_class.defaults().replace({ - 'number_of_folds': folds, - 'number_of_window_folds': 1, - })) - - primitive.set_training_data(dataset=dataset) - primitive.fit() - - # To test that pickling works. - pickle.dumps(primitive) - - results = primitive.produce(inputs=container.List([0, 1], generate_metadata=True)).value - - self.assertEqual(len(results), 2) - - for dataset in results: - self.assertEqual(len(dataset), 1) - - self.assertEqual(len(results[0]['learningData'].iloc[:, 0]), 8) - self.assertEqual(set(results[0]['learningData'].iloc[:, 3]), {'2013-11-05', '2013-11-06', '2013-11-07', '2013-11-08', '2013-11-11', - '2013-11-12', '2013-11-13', '2013-11-14'}) - - self.assertEqual(len(results[1]['learningData'].iloc[:, 0]), 8) - self.assertEqual(set(results[1]['learningData'].iloc[:, 3]), {'2013-11-13', '2013-11-14', '2013-11-15', '2013-11-18', '2013-11-19', - '2013-11-20', '2013-11-21', '2013-11-22'}) - - def test_produce_score_timeseries_1(self): - dataset_doc_path = os.path.abspath(os.path.join(os.path.dirname(__file__), '..', 'tests', 'data', 'datasets', 'timeseries_dataset_1', 'datasetDoc.json')) - - dataset = container.Dataset.load('file://{dataset_doc_path}'.format(dataset_doc_path=dataset_doc_path)) - - # We set semantic types like runtime would. - dataset.metadata = dataset.metadata.add_semantic_type(('learningData', metadata_base.ALL_ELEMENTS, 4), 'https://metadata.datadrivendiscovery.org/types/Target') - dataset.metadata = dataset.metadata.add_semantic_type(('learningData', metadata_base.ALL_ELEMENTS, 4), 'https://metadata.datadrivendiscovery.org/types/TrueTarget') - dataset.metadata = dataset.metadata.remove_semantic_type(('learningData', metadata_base.ALL_ELEMENTS, 4), 'https://metadata.datadrivendiscovery.org/types/Attribute') - - hyperparams_class = kfold_split_timeseries.KFoldTimeSeriesSplitPrimitive.metadata.get_hyperparams() - - folds = 5 - primitive = kfold_split_timeseries.KFoldTimeSeriesSplitPrimitive(hyperparams=hyperparams_class.defaults().replace({ - 'number_of_folds': folds, - 'number_of_window_folds': 1, - })) - - primitive.set_training_data(dataset=dataset) - primitive.fit() - - results = primitive.produce_score_data(inputs=container.List([0, 1], generate_metadata=True)).value - - self.assertEqual(len(results), 2) - - for dataset in results: - self.assertEqual(len(dataset), 1) - - self.assertEqual(len(results[0]['learningData'].iloc[:, 0]), 6) - self.assertEqual(set(results[0]['learningData'].iloc[:, 3]), {'2013-11-15', '2013-11-18', '2013-11-19', - '2013-11-20', '2013-11-21', '2013-11-22'}) - - self.assertEqual(len(results[1]['learningData'].iloc[:, 0]), 6) - self.assertEqual(set(results[1]['learningData'].iloc[:, 3]), {'2013-11-25', '2013-11-26', '2013-11-27', - '2013-11-29', '2013-12-02', '2013-12-03'}) - - def test_produce_train(self): - dataset_doc_path = os.path.abspath(os.path.join(os.path.dirname(__file__), '..', 'tests', 'data', 'datasets', 'database_dataset_1', 'datasetDoc.json')) - - dataset = container.Dataset.load('file://{dataset_doc_path}'.format(dataset_doc_path=dataset_doc_path)) - - # We set semantic types like runtime would. - dataset.metadata = dataset.metadata.add_semantic_type(('learningData', metadata_base.ALL_ELEMENTS, 4), 'https://metadata.datadrivendiscovery.org/types/Target') - dataset.metadata = dataset.metadata.add_semantic_type(('learningData', metadata_base.ALL_ELEMENTS, 4), 'https://metadata.datadrivendiscovery.org/types/TrueTarget') - dataset.metadata = dataset.metadata.remove_semantic_type(('learningData', metadata_base.ALL_ELEMENTS, 4), 'https://metadata.datadrivendiscovery.org/types/Attribute') - - # We fake that the dataset is time-series. - dataset.metadata = dataset.metadata.add_semantic_type(('learningData', metadata_base.ALL_ELEMENTS, 3), 'https://metadata.datadrivendiscovery.org/types/Time') - - hyperparams_class = kfold_split_timeseries.KFoldTimeSeriesSplitPrimitive.metadata.get_hyperparams() - - folds = 5 - primitive = kfold_split_timeseries.KFoldTimeSeriesSplitPrimitive(hyperparams=hyperparams_class.defaults().replace({ - 'number_of_folds': folds, - 'number_of_window_folds': 1, - })) - - primitive.set_training_data(dataset=dataset) - primitive.fit() - - # To test that pickling works. - pickle.dumps(primitive) - - results = primitive.produce(inputs=container.List([0, 1], generate_metadata=True)).value - - self.assertEqual(len(results), 2) - - for dataset in results: - self.assertEqual(len(dataset), 4) - - self.assertEqual(results[0]['codes'].shape[0], 3) - self.assertEqual(results[1]['codes'].shape[0], 3) - - self.assertEqual(set(results[0]['codes'].iloc[:, 0]), {'AAA', 'BBB', 'CCC'}) - self.assertEqual(len(results[0]['learningData'].iloc[:, 0]), 9) - self.assertEqual(set(results[0]['learningData'].iloc[:, 1]), {'AAA', 'BBB', 'CCC'}) - self.assertEqual(set(results[0]['learningData'].iloc[:, 2]), {'bbb', 'ccc', 'ddd'}) - self.assertEqual(set(results[0]['learningData'].iloc[:, 3]), {'1990'}) - - self.assertEqual(set(results[1]['codes'].iloc[:, 0]), {'AAA', 'BBB', 'CCC'}) - self.assertEqual(len(results[1]['learningData'].iloc[:, 0]), 9) - self.assertEqual(set(results[1]['learningData'].iloc[:, 1]), {'AAA', 'BBB', 'CCC'}) - self.assertEqual(set(results[1]['learningData'].iloc[:, 2]), {'aaa', 'bbb', 'ddd', 'eee'}) - self.assertEqual(set(results[1]['learningData'].iloc[:, 3]), {'1990', '2000'}) - - def test_produce_score(self): - dataset_doc_path = os.path.abspath(os.path.join(os.path.dirname(__file__), '..', 'tests', 'data', 'datasets', 'database_dataset_1', 'datasetDoc.json')) - - dataset = container.Dataset.load('file://{dataset_doc_path}'.format(dataset_doc_path=dataset_doc_path)) - - # We set semantic types like runtime would. - dataset.metadata = dataset.metadata.add_semantic_type(('learningData', metadata_base.ALL_ELEMENTS, 4), 'https://metadata.datadrivendiscovery.org/types/Target') - dataset.metadata = dataset.metadata.add_semantic_type(('learningData', metadata_base.ALL_ELEMENTS, 4), 'https://metadata.datadrivendiscovery.org/types/TrueTarget') - dataset.metadata = dataset.metadata.remove_semantic_type(('learningData', metadata_base.ALL_ELEMENTS, 4), 'https://metadata.datadrivendiscovery.org/types/Attribute') - - # We fake that the dataset is time-series. - dataset.metadata = dataset.metadata.add_semantic_type(('learningData', metadata_base.ALL_ELEMENTS, 3), 'https://metadata.datadrivendiscovery.org/types/Time') - - hyperparams_class = kfold_split_timeseries.KFoldTimeSeriesSplitPrimitive.metadata.get_hyperparams() - - folds = 5 - primitive = kfold_split_timeseries.KFoldTimeSeriesSplitPrimitive(hyperparams=hyperparams_class.defaults().replace({ - 'number_of_folds': folds, - 'number_of_window_folds': 1, - })) - - primitive.set_training_data(dataset=dataset) - primitive.fit() - - results = primitive.produce_score_data(inputs=container.List([0, 1], generate_metadata=True)).value - - self.assertEqual(len(results), 2) - - for dataset in results: - self.assertEqual(len(dataset), 4) - - self.assertEqual(results[0]['codes'].shape[0], 3) - self.assertEqual(results[1]['codes'].shape[0], 3) - - self.assertEqual(set(results[0]['codes'].iloc[:, 0]), {'AAA', 'BBB', 'CCC'}) - self.assertEqual(set(results[0]['learningData'].iloc[:, 0]), {'2', '3', '32', '33', '37', '38', '39'}) - self.assertEqual(set(results[0]['learningData'].iloc[:, 1]), {'AAA', 'BBB', 'CCC'}) - self.assertEqual(set(results[0]['learningData'].iloc[:, 2]), {'aaa', 'ddd', 'eee'}) - self.assertEqual(set(results[0]['learningData'].iloc[:, 3]), {'1990', '2000'}) - - self.assertEqual(set(results[1]['codes'].iloc[:, 0]), {'AAA', 'BBB', 'CCC'}) - self.assertEqual(set(results[1]['learningData'].iloc[:, 0]), {'22', '23', '24', '31', '40', '41', '42'}) - self.assertEqual(set(results[1]['learningData'].iloc[:, 1]), {'AAA', 'BBB', 'CCC'}) - self.assertEqual(set(results[1]['learningData'].iloc[:, 2]), {'ccc', 'ddd', 'eee'}) - self.assertEqual(set(results[1]['learningData'].iloc[:, 3]), {'2000'}) - - def test_unsorted_datetimes_timeseries_4(self): - dataset_doc_path = os.path.abspath(os.path.join(os.path.dirname(__file__), '..', 'tests', 'data', 'datasets', 'timeseries_dataset_4', 'datasetDoc.json')) - - dataset = container.Dataset.load('file://{dataset_doc_path}'.format(dataset_doc_path=dataset_doc_path)) - - # We set semantic types like runtime would. - dataset.metadata = dataset.metadata.add_semantic_type(('learningData', metadata_base.ALL_ELEMENTS, 4), 'https://metadata.datadrivendiscovery.org/types/Target') - dataset.metadata = dataset.metadata.add_semantic_type(('learningData', metadata_base.ALL_ELEMENTS, 4), 'https://metadata.datadrivendiscovery.org/types/TrueTarget') - dataset.metadata = dataset.metadata.remove_semantic_type(('learningData', metadata_base.ALL_ELEMENTS, 4), 'https://metadata.datadrivendiscovery.org/types/Attribute') - - hyperparams_class = kfold_split_timeseries.KFoldTimeSeriesSplitPrimitive.metadata.get_hyperparams() - - folds = 5 - primitive = kfold_split_timeseries.KFoldTimeSeriesSplitPrimitive(hyperparams=hyperparams_class.defaults().replace({ - 'number_of_folds': folds, - 'number_of_window_folds': 1, - })) - - primitive.set_training_data(dataset=dataset) - primitive.fit() - - # To test that pickling works. - pickle.dumps(primitive) - - results = primitive.produce(inputs=container.List([0, 1], generate_metadata=True)).value - - self.assertEqual(len(results), 2) - - for dataset in results: - self.assertEqual(len(dataset), 1) - - self.assertEqual(len(results[0]['learningData'].iloc[:, 0]), 8) - self.assertEqual(set(results[0]['learningData'].iloc[:, 3]), {'2013-11-05', '2013-11-06', '2013-11-07', '2013-11-08', '2013-11-11', - '2013-11-12', '2013-11-13', '2013-11-14'}) - - self.assertEqual(len(results[1]['learningData'].iloc[:, 0]), 8) - self.assertEqual(set(results[1]['learningData'].iloc[:, 3]), {'2013-11-13', '2013-11-14', '2013-11-15', '2013-11-18', '2013-11-19', - '2013-11-20', '2013-11-21', '2013-11-22'}) - - -if __name__ == '__main__': - unittest.main() diff --git a/common-primitives/tests/test_lgbm_classifier.py b/common-primitives/tests/test_lgbm_classifier.py deleted file mode 100644 index 90d7d43..0000000 --- a/common-primitives/tests/test_lgbm_classifier.py +++ /dev/null @@ -1,571 +0,0 @@ -import os -import pickle -import unittest - -import numpy as np - -from d3m import container, utils -from d3m.metadata import base as metadata_base - -from common_primitives import dataset_to_dataframe, extract_columns_semantic_types, lgbm_classifier, column_parser - - -def _add_categorical_col(attributes): - rand_str = ['a', 'b', 'c', 'd', 'e'] - attributes = attributes.append_columns(container.DataFrame(data={ - 'mock_cat_col': np.random.choice(rand_str, attributes.shape[0]) - }, generate_metadata=True)) - attributes.metadata = attributes.metadata.add_semantic_type([metadata_base.ALL_ELEMENTS, attributes.shape[-1] - 1], - 'https://metadata.datadrivendiscovery.org/types/CategoricalData') - attributes.metadata = attributes.metadata.add_semantic_type([metadata_base.ALL_ELEMENTS, attributes.shape[-1] - 1], - 'https://metadata.datadrivendiscovery.org/types/Attribute') - return attributes - - -def _get_iris(): - dataset_doc_path = os.path.abspath( - os.path.join(os.path.dirname(__file__), 'data', 'datasets', 'iris_dataset_1', 'datasetDoc.json')) - - dataset = container.Dataset.load('file://{dataset_doc_path}'.format(dataset_doc_path=dataset_doc_path)) - - hyperparams_class = \ - dataset_to_dataframe.DatasetToDataFramePrimitive.metadata.query()['primitive_code']['class_type_arguments'][ - 'Hyperparams'] - primitive = dataset_to_dataframe.DatasetToDataFramePrimitive(hyperparams=hyperparams_class.defaults()) - - dataframe = primitive.produce(inputs=dataset).value - return dataframe - - -def _get_iris_columns(): - dataframe = _get_iris() - - # We set custom metadata on columns. - for column_index in range(1, 5): - dataframe.metadata = dataframe.metadata.update_column(column_index, {'custom_metadata': 'attributes'}) - for column_index in range(5, 6): - dataframe.metadata = dataframe.metadata.update_column(column_index, {'custom_metadata': 'targets'}) - - # We set semantic types like runtime would. - dataframe.metadata = dataframe.metadata.add_semantic_type((metadata_base.ALL_ELEMENTS, 5), - 'https://metadata.datadrivendiscovery.org/types/Target') - dataframe.metadata = dataframe.metadata.add_semantic_type((metadata_base.ALL_ELEMENTS, 5), - 'https://metadata.datadrivendiscovery.org/types/TrueTarget') - dataframe.metadata = dataframe.metadata.remove_semantic_type((metadata_base.ALL_ELEMENTS, 5), - 'https://metadata.datadrivendiscovery.org/types/Attribute') - dataframe = _add_categorical_col(dataframe) - - # Parsing. - hyperparams_class = \ - column_parser.ColumnParserPrimitive.metadata.query()['primitive_code']['class_type_arguments'][ - 'Hyperparams'] - primitive = column_parser.ColumnParserPrimitive(hyperparams=hyperparams_class.defaults()) - dataframe = primitive.produce(inputs=dataframe).value - - hyperparams_class = \ - extract_columns_semantic_types.ExtractColumnsBySemanticTypesPrimitive.metadata.query()['primitive_code'][ - 'class_type_arguments']['Hyperparams'] - - primitive = extract_columns_semantic_types.ExtractColumnsBySemanticTypesPrimitive( - hyperparams=hyperparams_class.defaults().replace( - {'semantic_types': ('https://metadata.datadrivendiscovery.org/types/Attribute',)})) - attributes = primitive.produce(inputs=dataframe).value - - primitive = extract_columns_semantic_types.ExtractColumnsBySemanticTypesPrimitive( - hyperparams=hyperparams_class.defaults().replace( - {'semantic_types': ('https://metadata.datadrivendiscovery.org/types/SuggestedTarget',)})) - targets = primitive.produce(inputs=dataframe).value - - return dataframe, attributes, targets - - -class LGBMTestCase(unittest.TestCase): - attributes: container.DataFrame = None - targets: container.DataFrame = None - dataframe: container.DataFrame = None - - @classmethod - def setUpClass(cls) -> None: - cls.dataframe, cls.attributes, cls.targets = _get_iris_columns() - cls.excp_attributes = cls.attributes.copy() - - def test_single_target(self): - self.assertEqual(list(self.targets.columns), ['species']) - - hyperparams_class = \ - lgbm_classifier.LightGBMClassifierPrimitive.metadata.query()['primitive_code']['class_type_arguments'][ - 'Hyperparams'] - primitive = lgbm_classifier.LightGBMClassifierPrimitive( - hyperparams=hyperparams_class.defaults().replace({'return_result': 'new', 'add_index_columns': False})) - - primitive.set_training_data(inputs=self.attributes, outputs=self.targets) - primitive.fit() - - predictions = primitive.produce(inputs=self.attributes).value - - self.assertEqual(list(predictions.columns), ['species']) - - self.assertEqual(predictions.shape, (150, 1)) - self.assertEqual(predictions.iloc[0, 0], 'Iris-setosa') - self.assertTrue(predictions.metadata.has_semantic_type((metadata_base.ALL_ELEMENTS, 0), - 'https://metadata.datadrivendiscovery.org/types/PredictedTarget')) - self.assertFalse(predictions.metadata.has_semantic_type((metadata_base.ALL_ELEMENTS, 0), - 'https://metadata.datadrivendiscovery.org/types/TrueTarget')) - self.assertEqual(predictions.metadata.query_column(0)['name'], 'species') - self.assertEqual(predictions.metadata.query_column(0)['custom_metadata'], 'targets') - - self._test_single_target_metadata(predictions.metadata) - - samples = primitive.sample(inputs=self.attributes).value - - self.assertEqual(list(samples[0].columns), ['species']) - - self.assertEqual(len(samples), 1) - self.assertEqual(samples[0].shape, (150, 1)) - self.assertEqual(samples[0].iloc[0, 0], 'Iris-setosa') - self.assertTrue(samples[0].metadata.has_semantic_type((metadata_base.ALL_ELEMENTS, 0), - 'https://metadata.datadrivendiscovery.org/types/PredictedTarget')) - self.assertFalse(samples[0].metadata.has_semantic_type((metadata_base.ALL_ELEMENTS, 0), - 'https://metadata.datadrivendiscovery.org/types/TrueTarget')) - self.assertEqual(samples[0].metadata.query_column(0)['name'], 'species') - self.assertEqual(samples[0].metadata.query_column(0)['custom_metadata'], 'targets') - - log_likelihoods = primitive.log_likelihoods(inputs=self.attributes, outputs=self.targets).value - - self.assertEqual(list(log_likelihoods.columns), ['species']) - - self.assertEqual(log_likelihoods.shape, (150, 1)) - self.assertEqual(log_likelihoods.metadata.query_column(0)['name'], 'species') - - log_likelihood = primitive.log_likelihood(inputs=self.attributes, outputs=self.targets).value - - self.assertEqual(list(log_likelihood.columns), ['species']) - - self.assertEqual(log_likelihood.shape, (1, 1)) - self.assertAlmostEqual(log_likelihood.iloc[0, 0], -6.338635478886032) - self.assertEqual(log_likelihood.metadata.query_column(0)['name'], 'species') - - def test_single_target_continue_fit(self): - hyperparams_class = \ - lgbm_classifier.LightGBMClassifierPrimitive.metadata.query()['primitive_code']['class_type_arguments'][ - 'Hyperparams'] - primitive = lgbm_classifier.LightGBMClassifierPrimitive( - hyperparams=hyperparams_class.defaults().replace({'return_result': 'new', 'add_index_columns': False})) - - primitive.set_training_data(inputs=self.attributes, outputs=self.targets) - primitive.fit() - # reset the training data to make continue_fit() work. - primitive.set_training_data(inputs=self.attributes, outputs=self.targets) - primitive.continue_fit() - params = primitive.get_params() - self.assertEqual(params['booster'].current_iteration(), - primitive.hyperparams['n_estimators'] + primitive.hyperparams['n_more_estimators']) - predictions = primitive.produce(inputs=self.attributes).value - - self.assertEqual(predictions.shape, (150, 1)) - self.assertEqual(predictions.iloc[0, 0], 'Iris-setosa') - self.assertTrue(predictions.metadata.has_semantic_type((metadata_base.ALL_ELEMENTS, 0), - 'https://metadata.datadrivendiscovery.org/types/PredictedTarget')) - self.assertFalse(predictions.metadata.has_semantic_type((metadata_base.ALL_ELEMENTS, 0), - 'https://metadata.datadrivendiscovery.org/types/TrueTarget')) - self.assertEqual(predictions.metadata.query_column(0)['name'], 'species') - self.assertEqual(predictions.metadata.query_column(0)['custom_metadata'], 'targets') - - self._test_single_target_metadata(predictions.metadata) - - samples = primitive.sample(inputs=self.attributes).value - - self.assertEqual(len(samples), 1) - self.assertEqual(samples[0].shape, (150, 1)) - self.assertEqual(samples[0].iloc[0, 0], 'Iris-setosa') - self.assertTrue(samples[0].metadata.has_semantic_type((metadata_base.ALL_ELEMENTS, 0), - 'https://metadata.datadrivendiscovery.org/types/PredictedTarget')) - self.assertFalse(samples[0].metadata.has_semantic_type((metadata_base.ALL_ELEMENTS, 0), - 'https://metadata.datadrivendiscovery.org/types/TrueTarget')) - self.assertEqual(samples[0].metadata.query_column(0)['name'], 'species') - self.assertEqual(samples[0].metadata.query_column(0)['custom_metadata'], 'targets') - - log_likelihoods = primitive.log_likelihoods(inputs=self.attributes, outputs=self.targets).value - - self.assertEqual(log_likelihoods.shape, (150, 1)) - self.assertEqual(log_likelihoods.metadata.query_column(0)['name'], 'species') - - log_likelihood = primitive.log_likelihood(inputs=self.attributes, outputs=self.targets).value - - self.assertEqual(log_likelihood.shape, (1, 1)) - self.assertAlmostEqual(log_likelihood.iloc[0, 0], -3.723258225143776) - self.assertEqual(log_likelihoods.metadata.query_column(0)['name'], 'species') - - def _test_single_target_metadata(self, predictions_metadata): - expected_metadata = [{ - 'selector': [], - 'metadata': { - 'schema': metadata_base.CONTAINER_SCHEMA_VERSION, - 'structural_type': 'd3m.container.pandas.DataFrame', - 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/Table'], - 'dimension': { - 'name': 'rows', - 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/TabularRow'], - 'length': 150, - }, - }, - }, { - 'selector': ['__ALL_ELEMENTS__'], - 'metadata': { - 'dimension': { - 'name': 'columns', - 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/TabularColumn'], - 'length': 1, - }, - }, - }, { - 'selector': ['__ALL_ELEMENTS__', 0], - 'metadata': { - 'structural_type': 'str', - 'name': 'species', - 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/CategoricalData', - 'https://metadata.datadrivendiscovery.org/types/SuggestedTarget', - 'https://metadata.datadrivendiscovery.org/types/Target', - 'https://metadata.datadrivendiscovery.org/types/PredictedTarget'], - 'custom_metadata': 'targets', - }, - }] - - self.assertEqual(utils.to_json_structure(predictions_metadata.to_internal_simple_structure()), expected_metadata) - - def test_semantic_types(self): - # dataframe, attributes, targets = self._get_iris_columns() - - hyperparams_class = \ - lgbm_classifier.LightGBMClassifierPrimitive.metadata.query()['primitive_code']['class_type_arguments'][ - 'Hyperparams'] - primitive = lgbm_classifier.LightGBMClassifierPrimitive( - hyperparams=hyperparams_class.defaults().replace({'return_result': 'new', 'add_index_columns': False})) - - primitive.set_training_data(inputs=self.dataframe, outputs=self.dataframe) - primitive.fit() - - predictions = primitive.produce(inputs=self.dataframe).value - - self.assertEqual(list(predictions.columns), ['species']) - - self.assertEqual(predictions.shape, (150, 1)) - self.assertEqual(predictions.iloc[0, 0], 'Iris-setosa') - self.assertTrue(predictions.metadata.has_semantic_type((metadata_base.ALL_ELEMENTS, 0), - 'https://metadata.datadrivendiscovery.org/types/PredictedTarget')) - self.assertFalse(predictions.metadata.has_semantic_type((metadata_base.ALL_ELEMENTS, 0), - 'https://metadata.datadrivendiscovery.org/types/TrueTarget')) - self.assertEqual(predictions.metadata.query_column(0)['name'], 'species') - self.assertEqual(predictions.metadata.query_column(0)['custom_metadata'], 'targets') - - samples = primitive.sample(inputs=self.dataframe).value - self.assertEqual(list(samples[0].columns), ['species']) - - self.assertEqual(len(samples), 1) - self.assertEqual(samples[0].shape, (150, 1)) - self.assertEqual(samples[0].iloc[0, 0], 'Iris-setosa') - self.assertTrue(samples[0].metadata.has_semantic_type((metadata_base.ALL_ELEMENTS, 0), - 'https://metadata.datadrivendiscovery.org/types/PredictedTarget')) - self.assertFalse(samples[0].metadata.has_semantic_type((metadata_base.ALL_ELEMENTS, 0), - 'https://metadata.datadrivendiscovery.org/types/TrueTarget')) - self.assertEqual(samples[0].metadata.query_column(0)['name'], 'species') - self.assertEqual(samples[0].metadata.query_column(0)['custom_metadata'], 'targets') - - log_likelihoods = primitive.log_likelihoods(inputs=self.dataframe, outputs=self.dataframe).value - self.assertEqual(list(log_likelihoods.columns), ['species']) - - self.assertEqual(log_likelihoods.shape, (150, 1)) - self.assertEqual(log_likelihoods.metadata.query_column(0)['name'], 'species') - - log_likelihood = primitive.log_likelihood(inputs=self.dataframe, outputs=self.dataframe).value - self.assertEqual(list(log_likelihood.columns), ['species']) - - self.assertEqual(log_likelihood.shape, (1, 1)) - self.assertAlmostEqual(log_likelihood.iloc[0, 0], -6.338635478886032) - self.assertEqual(log_likelihoods.metadata.query_column(0)['name'], 'species') - - feature_importances = primitive.produce_feature_importances().value - self.assertEqual(list(feature_importances), - ['sepalLength', 'sepalWidth', 'petalLength', 'petalWidth', 'mock_cat_col']) - self.assertEqual(feature_importances.metadata.query_column(0)['name'], 'sepalLength') - self.assertEqual(feature_importances.metadata.query_column(1)['name'], 'sepalWidth') - self.assertEqual(feature_importances.metadata.query_column(2)['name'], 'petalLength') - self.assertEqual(feature_importances.metadata.query_column(3)['name'], 'petalWidth') - - self.assertEqual(feature_importances.values.tolist(), - [[0.22740524781341107, 0.18513119533527697, 0.3323615160349854, 0.25510204081632654, 0.0]]) - - def test_return_append(self): - hyperparams_class = \ - lgbm_classifier.LightGBMClassifierPrimitive.metadata.query()['primitive_code']['class_type_arguments'][ - 'Hyperparams'] - primitive = lgbm_classifier.LightGBMClassifierPrimitive(hyperparams=hyperparams_class.defaults()) - - primitive.set_training_data(inputs=self.dataframe, outputs=self.dataframe) - primitive.fit() - - predictions = primitive.produce(inputs=self.dataframe).value - self.assertEqual(list(predictions.columns), [ - 'd3mIndex', - 'sepalLength', - 'sepalWidth', - 'petalLength', - 'petalWidth', - 'species', - 'mock_cat_col', - 'species', - ]) - self.assertEqual(predictions.shape, (150, 8)) - self.assertEqual(predictions.iloc[0, 7], 'Iris-setosa') - self.assertTrue(predictions.metadata.has_semantic_type((metadata_base.ALL_ELEMENTS, 7), - 'https://metadata.datadrivendiscovery.org/types/PredictedTarget')) - self.assertFalse(predictions.metadata.has_semantic_type((metadata_base.ALL_ELEMENTS, 7), - 'https://metadata.datadrivendiscovery.org/types/TrueTarget')) - self.assertEqual(predictions.metadata.query_column(7)['name'], 'species') - self.assertEqual(predictions.metadata.query_column(7)['custom_metadata'], 'targets') - - self._test_return_append_metadata(predictions.metadata) - - def _test_return_append_metadata(self, predictions_metadata): - self.assertEqual(utils.to_json_structure(predictions_metadata.to_internal_simple_structure()), [{ - 'metadata': {'dimension': {'length': 150, - 'name': 'rows', - 'semantic_types': [ - 'https://metadata.datadrivendiscovery.org/types/TabularRow']}, - 'schema': 'https://metadata.datadrivendiscovery.org/schemas/v0/container.json', - 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/Table'], - 'structural_type': 'd3m.container.pandas.DataFrame'}, - 'selector': []}, - {'metadata': {'dimension': {'length': 8, - 'name': 'columns', - 'semantic_types': [ - 'https://metadata.datadrivendiscovery.org/types/TabularColumn']}}, - 'selector': ['__ALL_ELEMENTS__']}, - {'metadata': {'name': 'd3mIndex', - 'semantic_types': ['http://schema.org/Integer', - 'https://metadata.datadrivendiscovery.org/types/PrimaryKey'], - 'structural_type': 'int'}, - 'selector': ['__ALL_ELEMENTS__', 0]}, - {'metadata': {'custom_metadata': 'attributes', - 'name': 'sepalLength', - 'semantic_types': ['http://schema.org/Float', - 'https://metadata.datadrivendiscovery.org/types/Attribute'], - 'structural_type': 'float'}, - 'selector': ['__ALL_ELEMENTS__', 1]}, - {'metadata': {'custom_metadata': 'attributes', - 'name': 'sepalWidth', - 'semantic_types': ['http://schema.org/Float', - 'https://metadata.datadrivendiscovery.org/types/Attribute'], - 'structural_type': 'float'}, - 'selector': ['__ALL_ELEMENTS__', 2]}, - {'metadata': {'custom_metadata': 'attributes', - 'name': 'petalLength', - 'semantic_types': ['http://schema.org/Float', - 'https://metadata.datadrivendiscovery.org/types/Attribute'], - 'structural_type': 'float'}, - 'selector': ['__ALL_ELEMENTS__', 3]}, - {'metadata': {'custom_metadata': 'attributes', - 'name': 'petalWidth', - 'semantic_types': ['http://schema.org/Float', - 'https://metadata.datadrivendiscovery.org/types/Attribute'], - 'structural_type': 'float'}, - 'selector': ['__ALL_ELEMENTS__', 4]}, - {'metadata': {'custom_metadata': 'targets', - 'name': 'species', - 'semantic_types': [ - 'https://metadata.datadrivendiscovery.org/types/CategoricalData', - 'https://metadata.datadrivendiscovery.org/types/SuggestedTarget', - 'https://metadata.datadrivendiscovery.org/types/Target', - 'https://metadata.datadrivendiscovery.org/types/TrueTarget'], - 'structural_type': 'str'}, - 'selector': ['__ALL_ELEMENTS__', 5]}, - {'metadata': {'name': 'mock_cat_col', - 'semantic_types': [ - 'https://metadata.datadrivendiscovery.org/types/CategoricalData', - 'https://metadata.datadrivendiscovery.org/types/Attribute'], - 'structural_type': 'int'}, - 'selector': ['__ALL_ELEMENTS__', 6]}, - {'metadata': {'custom_metadata': 'targets', - 'name': 'species', - 'semantic_types': [ - 'https://metadata.datadrivendiscovery.org/types/CategoricalData', - 'https://metadata.datadrivendiscovery.org/types/SuggestedTarget', - 'https://metadata.datadrivendiscovery.org/types/Target', - 'https://metadata.datadrivendiscovery.org/types/PredictedTarget'], - 'structural_type': 'str'}, - 'selector': ['__ALL_ELEMENTS__', 7]}] - ) - - def test_return_new(self): - hyperparams_class = \ - lgbm_classifier.LightGBMClassifierPrimitive.metadata.query()['primitive_code']['class_type_arguments'][ - 'Hyperparams'] - primitive = lgbm_classifier.LightGBMClassifierPrimitive( - hyperparams=hyperparams_class.defaults().replace({'return_result': 'new'})) - - primitive.set_training_data(inputs=self.dataframe, outputs=self.dataframe) - primitive.fit() - - predictions = primitive.produce(inputs=self.dataframe).value - - self.assertEqual(list(predictions.columns), [ - 'd3mIndex', - 'species', - ]) - - self.assertEqual(predictions.shape, (150, 2)) - self.assertEqual(predictions.iloc[0, 1], 'Iris-setosa') - self.assertTrue(predictions.metadata.has_semantic_type((metadata_base.ALL_ELEMENTS, 1), - 'https://metadata.datadrivendiscovery.org/types/PredictedTarget')) - self.assertFalse(predictions.metadata.has_semantic_type((metadata_base.ALL_ELEMENTS, 1), - 'https://metadata.datadrivendiscovery.org/types/TrueTarget')) - self.assertEqual(predictions.metadata.query_column(1)['name'], 'species') - self.assertEqual(predictions.metadata.query_column(1)['custom_metadata'], 'targets') - - self._test_return_new_metadata(predictions.metadata) - - def _test_return_new_metadata(self, predictions_metadata): - expected_metadata = [{ - 'selector': [], - 'metadata': { - 'structural_type': 'd3m.container.pandas.DataFrame', - 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/Table'], - 'dimension': { - 'name': 'rows', - 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/TabularRow'], - 'length': 150, - }, - 'schema': 'https://metadata.datadrivendiscovery.org/schemas/v0/container.json', - }, - }, { - 'selector': ['__ALL_ELEMENTS__'], - 'metadata': { - 'dimension': { - 'name': 'columns', - 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/TabularColumn'], - 'length': 2, - }, - }, - }, { - 'selector': ['__ALL_ELEMENTS__', 0], - 'metadata': { - 'name': 'd3mIndex', - 'structural_type': 'int', - 'semantic_types': ['http://schema.org/Integer', - 'https://metadata.datadrivendiscovery.org/types/PrimaryKey'], - }, - }, { - 'selector': ['__ALL_ELEMENTS__', 1], - 'metadata': { - 'structural_type': 'str', - 'name': 'species', - 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/CategoricalData', - 'https://metadata.datadrivendiscovery.org/types/SuggestedTarget', - 'https://metadata.datadrivendiscovery.org/types/Target', - 'https://metadata.datadrivendiscovery.org/types/PredictedTarget'], - 'custom_metadata': 'targets', - }, - }] - - self.assertEqual(utils.to_json_structure(predictions_metadata.to_internal_simple_structure()), expected_metadata) - - def test_return_replace(self): - hyperparams_class = \ - lgbm_classifier.LightGBMClassifierPrimitive.metadata.query()['primitive_code']['class_type_arguments'][ - 'Hyperparams'] - primitive = lgbm_classifier.LightGBMClassifierPrimitive( - hyperparams=hyperparams_class.defaults().replace({'return_result': 'replace'})) - - primitive.set_training_data(inputs=self.dataframe, outputs=self.dataframe) - primitive.fit() - - predictions = primitive.produce(inputs=self.dataframe).value - self.assertEqual(list(predictions.columns), [ - 'd3mIndex', - 'species', - 'species', - ]) - self.assertEqual(predictions.shape, (150, 3)) - self.assertEqual(predictions.iloc[0, 1], 'Iris-setosa') - self.assertTrue(predictions.metadata.has_semantic_type((metadata_base.ALL_ELEMENTS, 1), - 'https://metadata.datadrivendiscovery.org/types/PredictedTarget')) - self.assertFalse(predictions.metadata.has_semantic_type((metadata_base.ALL_ELEMENTS, 1), - 'https://metadata.datadrivendiscovery.org/types/TrueTarget')) - self.assertEqual(predictions.metadata.query_column(1)['name'], 'species') - self.assertEqual(predictions.metadata.query_column(1)['custom_metadata'], 'targets') - - self._test_return_replace_metadata(predictions.metadata) - - def test_pickle_unpickle(self): - hyperparams_class = \ - lgbm_classifier.LightGBMClassifierPrimitive.metadata.query()['primitive_code']['class_type_arguments'][ - 'Hyperparams'] - primitive = lgbm_classifier.LightGBMClassifierPrimitive( - hyperparams=hyperparams_class.defaults().replace({'return_result': 'new', 'add_index_columns': False})) - - primitive.set_training_data(inputs=self.attributes, outputs=self.targets) - primitive.fit() - - before_pickled_prediction = primitive.produce(inputs=self.attributes).value - pickle_object = pickle.dumps(primitive) - primitive = pickle.loads(pickle_object) - after_unpickled_prediction = primitive.produce(inputs=self.attributes).value - self.assertTrue(container.DataFrame.equals(before_pickled_prediction, after_unpickled_prediction)) - - def _test_return_replace_metadata(self, predictions_metadata): - self.assertEqual(utils.to_json_structure(predictions_metadata.to_internal_simple_structure()), [{ - 'selector': [], - 'metadata': { - 'structural_type': 'd3m.container.pandas.DataFrame', - 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/Table'], - 'dimension': { - 'name': 'rows', - 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/TabularRow'], - 'length': 150, - }, - 'schema': 'https://metadata.datadrivendiscovery.org/schemas/v0/container.json', - }, - }, { - 'selector': ['__ALL_ELEMENTS__'], - 'metadata': { - 'dimension': { - 'name': 'columns', - 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/TabularColumn'], - 'length': 3, - }, - }, - }, { - 'selector': ['__ALL_ELEMENTS__', 0], - 'metadata': { - 'name': 'd3mIndex', - 'structural_type': 'int', - 'semantic_types': ['http://schema.org/Integer', - 'https://metadata.datadrivendiscovery.org/types/PrimaryKey'], - }, - }, { - 'selector': ['__ALL_ELEMENTS__', 1], - 'metadata': { - 'structural_type': 'str', - 'name': 'species', - 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/CategoricalData', - 'https://metadata.datadrivendiscovery.org/types/SuggestedTarget', - 'https://metadata.datadrivendiscovery.org/types/Target', - 'https://metadata.datadrivendiscovery.org/types/PredictedTarget'], - 'custom_metadata': 'targets', - }, - }, { - 'selector': ['__ALL_ELEMENTS__', 2], - 'metadata': { - 'name': 'species', - 'structural_type': 'str', - 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/CategoricalData', - 'https://metadata.datadrivendiscovery.org/types/SuggestedTarget', - 'https://metadata.datadrivendiscovery.org/types/Target', - 'https://metadata.datadrivendiscovery.org/types/TrueTarget'], - 'custom_metadata': 'targets', - }, - }]) - - -if __name__ == '__main__': - unittest.main() diff --git a/common-primitives/tests/test_list_to_dataframe.py b/common-primitives/tests/test_list_to_dataframe.py deleted file mode 100644 index 0860981..0000000 --- a/common-primitives/tests/test_list_to_dataframe.py +++ /dev/null @@ -1,185 +0,0 @@ -import unittest - -import numpy - -from d3m import container, utils -from d3m.metadata import base as metadata_base - -from common_primitives import list_to_dataframe - - -class ListToDataFramePrimitiveTestCase(unittest.TestCase): - def test_basic(self): - data = container.List([container.List([1, 2, 3]), container.List([4, 5, 6])], generate_metadata=True) - - list_hyperparams_class = list_to_dataframe.ListToDataFramePrimitive.metadata.get_hyperparams() - list_primitive = list_to_dataframe.ListToDataFramePrimitive(hyperparams=list_hyperparams_class.defaults()) - dataframe = list_primitive.produce(inputs=data).value - - self._test_basic_metadata(dataframe.metadata, 'numpy.int64', True) - - def _test_basic_metadata(self, metadata, structural_type, add_individual_columns): - expected_metadata = [{ - 'selector': [], - 'metadata': { - 'schema': metadata_base.CONTAINER_SCHEMA_VERSION, - 'structural_type': 'd3m.container.pandas.DataFrame', - 'dimension': { - 'length': 2, - 'name': 'rows', - 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/TabularRow'], - }, - 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/Table'], - }, - }, { - 'selector': ['__ALL_ELEMENTS__'], - 'metadata': { - 'dimension': { - 'length': 3, - 'name': 'columns', - 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/TabularColumn'], - }, - 'structural_type': '__NO_VALUE__', - }, - }, { - 'selector': ['__ALL_ELEMENTS__', '__ALL_ELEMENTS__'], - 'metadata': { - 'structural_type': 'int', - }, - }] - - if add_individual_columns: - expected_metadata.extend([{ - 'selector': ['__ALL_ELEMENTS__', 0], - 'metadata': { - 'structural_type': structural_type, - }, - }, { - 'selector': ['__ALL_ELEMENTS__', 1], - 'metadata': { - 'structural_type': structural_type, - }, - }, { - 'selector': ['__ALL_ELEMENTS__', 2], - 'metadata': { - 'structural_type': structural_type, - }, - }]) - - self.assertEqual(utils.to_json_structure(metadata.to_internal_simple_structure()), expected_metadata) - - def test_just_list(self): - data = container.List([1, 2, 3], generate_metadata=True) - - list_hyperparams_class = list_to_dataframe.ListToDataFramePrimitive.metadata.get_hyperparams() - list_primitive = list_to_dataframe.ListToDataFramePrimitive(hyperparams=list_hyperparams_class.defaults()) - dataframe = list_primitive.produce(inputs=data).value - - self._test_just_list_metadata(dataframe.metadata, 'numpy.int64', True) - - def _test_just_list_metadata(self, metadata, structural_type, use_individual_columns): - expected_metadata = [{ - 'selector': [], - 'metadata': { - 'schema': metadata_base.CONTAINER_SCHEMA_VERSION, - 'structural_type': 'd3m.container.pandas.DataFrame', - 'dimension': { - 'length': 3, - 'name': 'rows', - 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/TabularRow'], - }, - 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/Table'], - }, - }, { - 'selector': ['__ALL_ELEMENTS__'], - 'metadata': { - 'dimension': { - 'length': 1, - 'name': 'columns', - 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/TabularColumn'], - }, - 'structural_type': '__NO_VALUE__', - }, - }, { - 'selector': ['__ALL_ELEMENTS__', '__ALL_ELEMENTS__'], - 'metadata': { - 'structural_type': structural_type, - }, - }] - - if use_individual_columns: - expected_metadata[-1]['selector'] = ['__ALL_ELEMENTS__', 0] - - self.assertEqual(utils.to_json_structure(metadata.to_internal_simple_structure()), expected_metadata) - - def test_list_ndarray(self): - data = container.List([container.ndarray(numpy.array([1, 2, 3])), container.ndarray(numpy.array([4, 5, 6]))], generate_metadata=True) - - list_hyperparams_class = list_to_dataframe.ListToDataFramePrimitive.metadata.get_hyperparams() - list_primitive = list_to_dataframe.ListToDataFramePrimitive(hyperparams=list_hyperparams_class.defaults()) - dataframe = list_primitive.produce(inputs=data).value - - self._test_list_ndarray_metadata(dataframe.metadata, True) - - def _test_list_ndarray_metadata(self, metadata, add_individual_columns): - expected_metadata = [{ - 'selector': [], - 'metadata': { - 'schema': metadata_base.CONTAINER_SCHEMA_VERSION, - 'structural_type': 'd3m.container.pandas.DataFrame', - 'dimension': { - 'length': 2, - 'name': 'rows', - 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/TabularRow'], - }, - 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/Table'], - }, - }, { - 'selector': ['__ALL_ELEMENTS__'], - 'metadata': { - 'dimension': { - 'length': 3, - 'name': 'columns', - 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/TabularColumn'], - }, - 'structural_type': '__NO_VALUE__', - }, - }, { - 'selector': ['__ALL_ELEMENTS__', '__ALL_ELEMENTS__'], - 'metadata': { - 'structural_type': 'numpy.int64', - }, - }] - - if add_individual_columns: - expected_metadata.extend([{ - 'selector': ['__ALL_ELEMENTS__', 0], - 'metadata': { - 'structural_type': 'numpy.int64', - }, - }, { - 'selector': ['__ALL_ELEMENTS__', 1], - 'metadata': { - 'structural_type': 'numpy.int64', - }, - }, { - 'selector': ['__ALL_ELEMENTS__', 2], - 'metadata': { - 'structural_type': 'numpy.int64', - }, - }]) - - self.assertEqual(utils.to_json_structure(metadata.to_internal_simple_structure()), expected_metadata) - - def test_list_deeper_ndarray(self): - data = container.List([container.ndarray(numpy.array([[1, 2, 3], [11, 12, 13]])), container.ndarray(numpy.array([[4, 5, 6], [14, 15, 16]]))], generate_metadata=True) - - list_hyperparams_class = list_to_dataframe.ListToDataFramePrimitive.metadata.get_hyperparams() - list_primitive = list_to_dataframe.ListToDataFramePrimitive(hyperparams=list_hyperparams_class.defaults()) - - with self.assertRaisesRegex(ValueError, 'Must pass 2-d input'): - list_primitive.produce(inputs=data).value - - -if __name__ == '__main__': - unittest.main() diff --git a/common-primitives/tests/test_list_to_ndarray.py b/common-primitives/tests/test_list_to_ndarray.py deleted file mode 100644 index 07d6d23..0000000 --- a/common-primitives/tests/test_list_to_ndarray.py +++ /dev/null @@ -1,132 +0,0 @@ -import unittest - -import numpy - -from d3m import container, utils -from d3m.metadata import base as metadata_base - -from common_primitives import list_to_ndarray - - -class ListToNDRrrayPrimitiveTestCase(unittest.TestCase): - def test_basic(self): - data = container.List([container.List([1, 2, 3]), container.List([4, 5, 6])], generate_metadata=True) - - list_hyperparams_class = list_to_ndarray.ListToNDArrayPrimitive.metadata.get_hyperparams() - list_primitive = list_to_ndarray.ListToNDArrayPrimitive(hyperparams=list_hyperparams_class.defaults()) - array = list_primitive.produce(inputs=data).value - - self._test_basic_metadata(array.metadata, 'numpy.int64') - - def _test_basic_metadata(self, metadata, structural_type): - self.maxDiff = None - - self.assertEqual(utils.to_json_structure(metadata.to_internal_simple_structure()), [{ - 'selector': [], - 'metadata': { - 'schema': metadata_base.CONTAINER_SCHEMA_VERSION, - 'structural_type': 'd3m.container.numpy.ndarray', - 'dimension': { - 'length': 2, - 'name': 'rows', - 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/TabularRow'], - }, - 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/Table'], - }, - }, { - 'selector': ['__ALL_ELEMENTS__'], - 'metadata': { - 'dimension': { - 'length': 3, - 'name': 'columns', - 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/TabularColumn'], - }, - 'structural_type': '__NO_VALUE__', - }, - }, { - 'selector': ['__ALL_ELEMENTS__', '__ALL_ELEMENTS__'], - 'metadata': { - 'structural_type': structural_type, - }, - }]) - - def test_just_list(self): - data = container.List([1, 2, 3], generate_metadata=True) - - list_hyperparams_class = list_to_ndarray.ListToNDArrayPrimitive.metadata.get_hyperparams() - list_primitive = list_to_ndarray.ListToNDArrayPrimitive(hyperparams=list_hyperparams_class.defaults()) - array = list_primitive.produce(inputs=data).value - - self._test_just_list_metadata(array.metadata, 'numpy.int64') - - def _test_just_list_metadata(self, metadata, structural_type): - self.assertEqual(utils.to_json_structure(metadata.to_internal_simple_structure()),[{ - 'selector': [], - 'metadata': { - 'schema': metadata_base.CONTAINER_SCHEMA_VERSION, - 'structural_type': 'd3m.container.numpy.ndarray', - 'dimension': { - 'length': 3, - }, - }, - }, { - 'selector': ['__ALL_ELEMENTS__'], - 'metadata': { - 'structural_type': structural_type, - }, - }]) - - def test_list_ndarray(self): - data = container.List([container.ndarray(numpy.array([[1, 2, 3], [11, 12, 13]])), container.ndarray(numpy.array([[4, 5, 6], [14, 15, 16]]))], generate_metadata=True) - - list_hyperparams_class = list_to_ndarray.ListToNDArrayPrimitive.metadata.get_hyperparams() - list_primitive = list_to_ndarray.ListToNDArrayPrimitive(hyperparams=list_hyperparams_class.defaults()) - array = list_primitive.produce(inputs=data).value - - self._test_list_ndarray_metadata(array.metadata) - - def _test_list_ndarray_metadata(self, metadata): - self.maxDiff = None - - self.assertEqual(utils.to_json_structure(metadata.to_internal_simple_structure()), [{ - 'selector': [], - 'metadata': { - 'schema': metadata_base.CONTAINER_SCHEMA_VERSION, - 'structural_type': 'd3m.container.numpy.ndarray', - 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/Table'], - 'dimension': { - 'length': 2, - 'name': 'rows', - 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/TabularRow'], - }, - }, - }, { - 'selector': ['__ALL_ELEMENTS__'], - 'metadata': { - 'semantic_types': '__NO_VALUE__', - 'dimension': { - 'length': 2, - 'name': 'columns', - 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/TabularColumn'], - }, - 'structural_type': '__NO_VALUE__', - }, - }, { - 'selector': ['__ALL_ELEMENTS__', '__ALL_ELEMENTS__'], - 'metadata': { - 'dimension': { - 'length': 3, - 'semantic_types': '__NO_VALUE__', - 'name': '__NO_VALUE__', - }, - }, - }, { - 'selector': ['__ALL_ELEMENTS__', '__ALL_ELEMENTS__', '__ALL_ELEMENTS__'], - 'metadata': { - 'structural_type': 'numpy.int64', - }, - }]) - - -if __name__ == '__main__': - unittest.main() diff --git a/common-primitives/tests/test_ndarray_to_dataframe.py b/common-primitives/tests/test_ndarray_to_dataframe.py deleted file mode 100644 index d2987e2..0000000 --- a/common-primitives/tests/test_ndarray_to_dataframe.py +++ /dev/null @@ -1,99 +0,0 @@ -import unittest - -import numpy - -from d3m import container, utils -from d3m.metadata import base as metadata_base - -from common_primitives import dataframe_to_ndarray, dataset_to_dataframe, ndarray_to_dataframe - -import utils as test_utils - - -class NDArrayToDataFramePrimitiveTestCase(unittest.TestCase): - def test_basic(self): - # TODO: Find a less cumbersome way to get a numpy array loaded with a dataset - # load the iris dataset - dataset = test_utils.load_iris_metadata() - - # convert the dataset into a dataframe - dataset_hyperparams_class = dataset_to_dataframe.DatasetToDataFramePrimitive.metadata.get_hyperparams() - dataset_primitive = dataset_to_dataframe.DatasetToDataFramePrimitive(hyperparams=dataset_hyperparams_class.defaults()) - dataframe_dataset = dataset_primitive.produce(inputs=dataset).value - - # convert the dataframe into a numpy array - numpy_hyperparams_class = dataframe_to_ndarray.DataFrameToNDArrayPrimitive.metadata.get_hyperparams() - numpy_primitive = dataframe_to_ndarray.DataFrameToNDArrayPrimitive(hyperparams=numpy_hyperparams_class.defaults()) - numpy_array = numpy_primitive.produce(inputs=dataframe_dataset).value - - # convert the numpy array back into a dataframe - dataframe_hyperparams_class = ndarray_to_dataframe.NDArrayToDataFramePrimitive.metadata.get_hyperparams() - dataframe_primitive = ndarray_to_dataframe.NDArrayToDataFramePrimitive(hyperparams=dataframe_hyperparams_class.defaults()) - dataframe = dataframe_primitive.produce(inputs=numpy_array).value - - self.assertIsInstance(dataframe, container.DataFrame) - - # verify dimensions - self.assertEqual(len(dataframe), 150) - self.assertEqual(len(dataframe.iloc[0]), 6) - - # ensure column names added to dataframe - self.assertListEqual(list(dataframe.columns.values), ['d3mIndex', 'sepalLength', 'sepalWidth', 'petalLength', 'petalWidth', 'species']) - - # verify data type is unchanged - for row in dataframe: - for cell in row: - self.assertIsInstance(cell, str) - - # validate metadata - test_utils.test_iris_metadata(self, dataframe.metadata, 'd3m.container.pandas.DataFrame') - - def test_vector(self): - data = container.ndarray(numpy.array([1, 2, 3]), generate_metadata=True) - - dataframe_hyperparams_class = ndarray_to_dataframe.NDArrayToDataFramePrimitive.metadata.get_hyperparams() - dataframe_primitive = ndarray_to_dataframe.NDArrayToDataFramePrimitive(hyperparams=dataframe_hyperparams_class.defaults()) - dataframe = dataframe_primitive.produce(inputs=data).value - - self._test_vector_metadata(dataframe.metadata, True) - - def _test_vector_metadata(self, metadata, use_individual_columns): - self.maxDiff = None - - expected_metadata = [{ - 'selector': [], - 'metadata': { - 'schema': metadata_base.CONTAINER_SCHEMA_VERSION, - 'structural_type': 'd3m.container.pandas.DataFrame', - 'dimension': { - 'length': 3, - 'name': 'rows', - 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/TabularRow'], - }, - 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/Table'], - }, - }, { - 'selector': ['__ALL_ELEMENTS__'], - 'metadata': { - 'dimension': { - 'length': 1, - 'name': 'columns', - 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/TabularColumn'], - }, - 'structural_type': '__NO_VALUE__', - }, - }, { - 'selector': ['__ALL_ELEMENTS__', '__ALL_ELEMENTS__'], - 'metadata': { - 'structural_type': 'numpy.int64', - }, - }] - - if use_individual_columns: - expected_metadata[-1]['selector'] = ['__ALL_ELEMENTS__', 0] - - self.assertEqual(utils.to_json_structure(metadata.to_internal_simple_structure()), expected_metadata) - - -if __name__ == '__main__': - unittest.main() diff --git a/common-primitives/tests/test_ndarray_to_list.py b/common-primitives/tests/test_ndarray_to_list.py deleted file mode 100644 index b2c6555..0000000 --- a/common-primitives/tests/test_ndarray_to_list.py +++ /dev/null @@ -1,116 +0,0 @@ -import unittest - -import numpy - -from d3m import container, utils -from d3m.metadata import base as metadata_base - -from common_primitives import dataframe_to_ndarray, dataset_to_dataframe, ndarray_to_list - -import utils as test_utils - - -class NDArrayToListPrimitiveTestCase(unittest.TestCase): - def test_basic(self): - # TODO: Find a less cumbersome way to get a numpy array loaded with a dataset - # load the iris dataset - dataset = test_utils.load_iris_metadata() - - # convert the dataset into a dataframe - dataset_hyperparams_class = dataset_to_dataframe.DatasetToDataFramePrimitive.metadata.get_hyperparams() - dataset_primitive = dataset_to_dataframe.DatasetToDataFramePrimitive(hyperparams=dataset_hyperparams_class.defaults()) - dataframe_dataset = dataset_primitive.produce(inputs=dataset).value - - # convert the dataframe into a numpy array - numpy_hyperparams_class = dataframe_to_ndarray.DataFrameToNDArrayPrimitive.metadata.get_hyperparams() - numpy_primitive = dataframe_to_ndarray.DataFrameToNDArrayPrimitive(hyperparams=numpy_hyperparams_class.defaults()) - numpy_array = numpy_primitive.produce(inputs=dataframe_dataset).value - - list_hyperparams_class = ndarray_to_list.NDArrayToListPrimitive.metadata.get_hyperparams() - list_primitive = ndarray_to_list.NDArrayToListPrimitive(hyperparams=list_hyperparams_class.defaults()) - list_value = list_primitive.produce(inputs=numpy_array).value - - self.assertIsInstance(list_value, container.List) - - # verify dimensions - self.assertEqual(len(list_value), 150) - self.assertEqual(len(list_value[0]), 6) - - # validate metadata - test_utils.test_iris_metadata(self, list_value.metadata, 'd3m.container.list.List', 'd3m.container.numpy.ndarray') - - def test_vector(self): - data = container.ndarray(numpy.array([1, 2, 3]), generate_metadata=True) - - list_hyperparams_class = ndarray_to_list.NDArrayToListPrimitive.metadata.get_hyperparams() - list_primitive = ndarray_to_list.NDArrayToListPrimitive(hyperparams=list_hyperparams_class.defaults()) - list_value = list_primitive.produce(inputs=data).value - - self._test_vector_metadata(list_value.metadata) - - def _test_vector_metadata(self, metadata): - self.assertEqual(utils.to_json_structure(metadata.to_internal_simple_structure()), [{ - 'selector': [], - 'metadata': { - 'schema': metadata_base.CONTAINER_SCHEMA_VERSION, - 'structural_type': 'd3m.container.list.List', - 'dimension': { - 'length': 3, - }, - }, - }, { - 'selector': ['__ALL_ELEMENTS__'], - 'metadata': { - 'structural_type': 'numpy.int64', - }, - }]) - - def test_deep_array(self): - data = container.ndarray(numpy.array(range(2 * 3 * 4)).reshape((2, 3, 4)), generate_metadata=True) - - list_hyperparams_class = ndarray_to_list.NDArrayToListPrimitive.metadata.get_hyperparams() - list_primitive = ndarray_to_list.NDArrayToListPrimitive(hyperparams=list_hyperparams_class.defaults()) - list_value = list_primitive.produce(inputs=data).value - - self._test_deep_vector_metadata(list_value.metadata) - - def _test_deep_vector_metadata(self, metadata): - self.assertEqual(utils.to_json_structure(metadata.to_internal_simple_structure()), [{ - 'selector': [], - 'metadata': { - 'schema': metadata_base.CONTAINER_SCHEMA_VERSION, - 'structural_type': 'd3m.container.list.List', - 'dimension': { - 'length': 2, - }, - }, - }, { - 'selector': ['__ALL_ELEMENTS__'], - 'metadata': { - 'dimension': { - 'length': 3, - 'name': 'rows', - 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/TabularRow'], - }, - 'structural_type': 'd3m.container.numpy.ndarray', - 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/Table'], - }, - }, { - 'selector': ['__ALL_ELEMENTS__', '__ALL_ELEMENTS__'], - 'metadata': { - 'dimension': { - 'length': 4, - 'name': 'columns', - 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/TabularColumn'], - }, - }, - }, { - 'selector': ['__ALL_ELEMENTS__', '__ALL_ELEMENTS__', '__ALL_ELEMENTS__'], - 'metadata': { - 'structural_type': 'numpy.int64', - }, - }]) - - -if __name__ == '__main__': - unittest.main() diff --git a/common-primitives/tests/test_no_split.py b/common-primitives/tests/test_no_split.py deleted file mode 100644 index f61f476..0000000 --- a/common-primitives/tests/test_no_split.py +++ /dev/null @@ -1,71 +0,0 @@ -import os -import pickle -import unittest - -from d3m import container -from d3m.metadata import base as metadata_base - -from common_primitives import no_split - - -class NoSplitDatasetSplitPrimitiveTestCase(unittest.TestCase): - def test_produce_train(self): - dataset_doc_path = os.path.abspath(os.path.join(os.path.dirname(__file__), '..', 'tests', 'data', 'datasets', 'iris_dataset_1', 'datasetDoc.json')) - - dataset = container.Dataset.load('file://{dataset_doc_path}'.format(dataset_doc_path=dataset_doc_path)) - - # We set semantic types like runtime would. - dataset.metadata = dataset.metadata.add_semantic_type(('learningData', metadata_base.ALL_ELEMENTS, 5), 'https://metadata.datadrivendiscovery.org/types/Target') - dataset.metadata = dataset.metadata.add_semantic_type(('learningData', metadata_base.ALL_ELEMENTS, 5), 'https://metadata.datadrivendiscovery.org/types/TrueTarget') - dataset.metadata = dataset.metadata.remove_semantic_type(('learningData', metadata_base.ALL_ELEMENTS, 5), 'https://metadata.datadrivendiscovery.org/types/Attribute') - - hyperparams_class = no_split.NoSplitDatasetSplitPrimitive.metadata.get_hyperparams() - - primitive = no_split.NoSplitDatasetSplitPrimitive(hyperparams=hyperparams_class.defaults()) - - primitive.set_training_data(dataset=dataset) - primitive.fit() - - # To test that pickling works. - pickle.dumps(primitive) - - results = primitive.produce(inputs=container.List([0], generate_metadata=True)).value - - self.assertEqual(len(results), 1) - - for dataset in results: - self.assertEqual(len(dataset), 1) - - self.assertEqual(results[0]['learningData'].shape[0], 150) - self.assertEqual(list(results[0]['learningData'].iloc[:, 0]), [str(i) for i in range(150)]) - - def test_produce_score(self): - dataset_doc_path = os.path.abspath(os.path.join(os.path.dirname(__file__), '..', 'tests', 'data', 'datasets', 'iris_dataset_1', 'datasetDoc.json')) - - dataset = container.Dataset.load('file://{dataset_doc_path}'.format(dataset_doc_path=dataset_doc_path)) - - # We set semantic types like runtime would. - dataset.metadata = dataset.metadata.add_semantic_type(('learningData', metadata_base.ALL_ELEMENTS, 5), 'https://metadata.datadrivendiscovery.org/types/Target') - dataset.metadata = dataset.metadata.add_semantic_type(('learningData', metadata_base.ALL_ELEMENTS, 5), 'https://metadata.datadrivendiscovery.org/types/TrueTarget') - dataset.metadata = dataset.metadata.remove_semantic_type(('learningData', metadata_base.ALL_ELEMENTS, 5), 'https://metadata.datadrivendiscovery.org/types/Attribute') - - hyperparams_class = no_split.NoSplitDatasetSplitPrimitive.metadata.get_hyperparams() - - primitive = no_split.NoSplitDatasetSplitPrimitive(hyperparams=hyperparams_class.defaults()) - - primitive.set_training_data(dataset=dataset) - primitive.fit() - - results = primitive.produce_score_data(inputs=container.List([0], generate_metadata=True)).value - - self.assertEqual(len(results), 1) - - for dataset in results: - self.assertEqual(len(dataset), 1) - - self.assertEqual(results[0]['learningData'].shape[0], 150) - self.assertEqual(list(results[0]['learningData'].iloc[:, 0]), [str(i) for i in range(150)]) - - -if __name__ == '__main__': - unittest.main() diff --git a/common-primitives/tests/test_normalize_column_references.py b/common-primitives/tests/test_normalize_column_references.py deleted file mode 100644 index 363ecb0..0000000 --- a/common-primitives/tests/test_normalize_column_references.py +++ /dev/null @@ -1,597 +0,0 @@ -import os -import unittest - -from d3m import container, utils - -from common_primitives import normalize_column_references - -import utils as test_utils - - -class NormalizeColumnReferencesPrimitiveTestCase(unittest.TestCase): - def test_basic(self): - dataset_doc_path = os.path.abspath( - os.path.join(os.path.dirname(__file__), 'data', 'datasets', 'database_dataset_1', 'datasetDoc.json') - ) - - dataset = container.Dataset.load('file://{dataset_doc_path}'.format(dataset_doc_path=dataset_doc_path)) - - metadata_before = dataset.metadata.to_internal_json_structure() - - self._test_metadata_before(utils.to_json_structure(dataset.metadata.to_internal_simple_structure()), dataset_doc_path) - - hyperparams_class = normalize_column_references.NormalizeColumnReferencesPrimitive.metadata.get_hyperparams() - - primitive = normalize_column_references.NormalizeColumnReferencesPrimitive( - hyperparams=hyperparams_class.defaults() - ) - - normalized_dataset = primitive.produce(inputs=dataset).value - - self.assertIsInstance(normalized_dataset, container.Dataset) - - self._test_metadata_after(utils.to_json_structure(normalized_dataset.metadata.to_internal_simple_structure()), dataset_doc_path) - - self.assertEqual(metadata_before, dataset.metadata.to_internal_json_structure()) - - def _test_metadata_before(self, metadata, dataset_doc_path): - self.maxDiff = None - - self.assertEqual( - test_utils.convert_through_json(metadata), - [ - { - 'selector': [], - 'metadata': { - 'description': 'A synthetic dataset trying to be similar to a database dump, with tables with different relations between them.', - 'digest': '68c435c6ba9a1c419c79507275c0d5710786dfe481e48f35591d87a7dbf5bb1a', - 'dimension': { - 'length': 4, - 'name': 'resources', - 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/DatasetResource'], - }, - 'id': 'database_dataset_1', - 'location_uris': [ - 'file://{dataset_doc_path}'.format(dataset_doc_path=dataset_doc_path), - ], - 'name': 'A dataset simulating a database dump', - 'schema': 'https://metadata.datadrivendiscovery.org/schemas/v0/container.json', - 'source': {'license': 'CC', 'redacted': False}, - 'structural_type': 'd3m.container.dataset.Dataset', - 'version': '4.0.0', - }, - }, - { - 'selector': ['authors'], - 'metadata': { - 'dimension': { - 'length': 3, - 'name': 'rows', - 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/TabularRow'], - }, - 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/Table'], - 'structural_type': 'd3m.container.pandas.DataFrame', - }, - }, - { - 'selector': ['authors', '__ALL_ELEMENTS__'], - 'metadata': { - 'dimension': { - 'length': 2, - 'name': 'columns', - 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/TabularColumn'], - } - }, - }, - { - 'selector': ['authors', '__ALL_ELEMENTS__', 0], - 'metadata': { - 'name': 'id', - 'semantic_types': [ - 'http://schema.org/Integer', - 'https://metadata.datadrivendiscovery.org/types/PrimaryKey', - ], - 'structural_type': 'str', - }, - }, - { - 'selector': ['authors', '__ALL_ELEMENTS__', 1], - 'metadata': { - 'name': 'name', - 'semantic_types': [ - 'http://schema.org/Text', - 'https://metadata.datadrivendiscovery.org/types/Attribute', - ], - 'structural_type': 'str', - }, - }, - { - 'selector': ['codes'], - 'metadata': { - 'dimension': { - 'length': 3, - 'name': 'rows', - 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/TabularRow'], - }, - 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/Table'], - 'structural_type': 'd3m.container.pandas.DataFrame', - }, - }, - { - 'selector': ['codes', '__ALL_ELEMENTS__'], - 'metadata': { - 'dimension': { - 'length': 3, - 'name': 'columns', - 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/TabularColumn'], - } - }, - }, - { - 'selector': ['codes', '__ALL_ELEMENTS__', 0], - 'metadata': { - 'name': 'code', - 'semantic_types': [ - 'https://metadata.datadrivendiscovery.org/types/CategoricalData', - 'https://metadata.datadrivendiscovery.org/types/PrimaryKey', - ], - 'structural_type': 'str', - }, - }, - { - 'selector': ['codes', '__ALL_ELEMENTS__', 1], - 'metadata': { - 'name': 'name', - 'semantic_types': [ - 'http://schema.org/Text', - 'https://metadata.datadrivendiscovery.org/types/Attribute', - ], - 'structural_type': 'str', - }, - }, - { - 'selector': ['codes', '__ALL_ELEMENTS__', 2], - 'metadata': { - 'foreign_key': {'column_index': 0, 'resource_id': 'authors', 'type': 'COLUMN'}, - 'name': 'author', - 'semantic_types': [ - 'http://schema.org/Integer', - 'https://metadata.datadrivendiscovery.org/types/Attribute', - ], - 'structural_type': 'str', - }, - }, - { - 'selector': ['learningData'], - 'metadata': { - 'dimension': { - 'length': 45, - 'name': 'rows', - 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/TabularRow'], - }, - 'semantic_types': [ - 'https://metadata.datadrivendiscovery.org/types/Table', - 'https://metadata.datadrivendiscovery.org/types/DatasetEntryPoint', - ], - 'structural_type': 'd3m.container.pandas.DataFrame', - }, - }, - { - 'selector': ['learningData', '__ALL_ELEMENTS__'], - 'metadata': { - 'dimension': { - 'length': 5, - 'name': 'columns', - 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/TabularColumn'], - } - }, - }, - { - 'selector': ['learningData', '__ALL_ELEMENTS__', 0], - 'metadata': { - 'name': 'd3mIndex', - 'semantic_types': [ - 'http://schema.org/Integer', - 'https://metadata.datadrivendiscovery.org/types/PrimaryKey', - ], - 'structural_type': 'str', - }, - }, - { - 'selector': ['learningData', '__ALL_ELEMENTS__', 1], - 'metadata': { - 'foreign_key': {'column_name': 'code', 'resource_id': 'codes', 'type': 'COLUMN'}, - 'name': 'code', - 'semantic_types': [ - 'https://metadata.datadrivendiscovery.org/types/CategoricalData', - 'https://metadata.datadrivendiscovery.org/types/Attribute', - ], - 'structural_type': 'str', - }, - }, - { - 'selector': ['learningData', '__ALL_ELEMENTS__', 2], - 'metadata': { - 'name': 'key', - 'semantic_types': [ - 'https://metadata.datadrivendiscovery.org/types/CategoricalData', - 'https://metadata.datadrivendiscovery.org/types/Attribute', - ], - 'structural_type': 'str', - }, - }, - { - 'selector': ['learningData', '__ALL_ELEMENTS__', 3], - 'metadata': { - 'name': 'year', - 'semantic_types': [ - 'http://schema.org/DateTime', - 'https://metadata.datadrivendiscovery.org/types/Attribute', - ], - 'structural_type': 'str', - }, - }, - { - 'selector': ['learningData', '__ALL_ELEMENTS__', 4], - 'metadata': { - 'name': 'value', - 'semantic_types': [ - 'http://schema.org/Float', - 'https://metadata.datadrivendiscovery.org/types/SuggestedTarget', - 'https://metadata.datadrivendiscovery.org/types/Attribute', - ], - 'structural_type': 'str', - }, - }, - { - 'selector': ['values'], - 'metadata': { - 'dimension': { - 'length': 64, - 'name': 'rows', - 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/TabularRow'], - }, - 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/Table'], - 'structural_type': 'd3m.container.pandas.DataFrame', - }, - }, - { - 'selector': ['values', '__ALL_ELEMENTS__'], - 'metadata': { - 'dimension': { - 'length': 4, - 'name': 'columns', - 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/TabularColumn'], - } - }, - }, - { - 'selector': ['values', '__ALL_ELEMENTS__', 0], - 'metadata': { - 'foreign_key': {'column_name': 'code', 'resource_id': 'codes', 'type': 'COLUMN'}, - 'name': 'code', - 'semantic_types': [ - 'https://metadata.datadrivendiscovery.org/types/CategoricalData', - 'https://metadata.datadrivendiscovery.org/types/Attribute', - ], - 'structural_type': 'str', - }, - }, - { - 'selector': ['values', '__ALL_ELEMENTS__', 1], - 'metadata': { - 'name': 'key', - 'semantic_types': [ - 'https://metadata.datadrivendiscovery.org/types/CategoricalData', - 'https://metadata.datadrivendiscovery.org/types/Attribute', - ], - 'structural_type': 'str', - }, - }, - { - 'selector': ['values', '__ALL_ELEMENTS__', 2], - 'metadata': { - 'name': 'year', - 'semantic_types': [ - 'http://schema.org/DateTime', - 'https://metadata.datadrivendiscovery.org/types/Attribute', - ], - 'structural_type': 'str', - }, - }, - { - 'selector': ['values', '__ALL_ELEMENTS__', 3], - 'metadata': { - 'name': 'value', - 'semantic_types': [ - 'http://schema.org/Float', - 'https://metadata.datadrivendiscovery.org/types/Attribute', - ], - 'structural_type': 'str', - }, - }, - ], - ) - - def _test_metadata_after(self, metadata, dataset_doc_path): - self.maxDiff = None - - self.assertEqual( - test_utils.convert_through_json(metadata), - [ - { - 'selector': [], - 'metadata': { - 'description': 'A synthetic dataset trying to be similar to a database dump, with tables with different relations between them.', - 'digest': '68c435c6ba9a1c419c79507275c0d5710786dfe481e48f35591d87a7dbf5bb1a', - 'dimension': { - 'length': 4, - 'name': 'resources', - 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/DatasetResource'], - }, - 'id': 'database_dataset_1', - 'location_uris': [ - 'file://{dataset_doc_path}'.format(dataset_doc_path=dataset_doc_path), - ], - 'name': 'A dataset simulating a database dump', - 'schema': 'https://metadata.datadrivendiscovery.org/schemas/v0/container.json', - 'source': {'license': 'CC', 'redacted': False}, - 'structural_type': 'd3m.container.dataset.Dataset', - 'version': '4.0.0', - }, - }, - { - 'selector': ['authors'], - 'metadata': { - 'dimension': { - 'length': 3, - 'name': 'rows', - 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/TabularRow'], - }, - 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/Table'], - 'structural_type': 'd3m.container.pandas.DataFrame', - }, - }, - { - 'selector': ['authors', '__ALL_ELEMENTS__'], - 'metadata': { - 'dimension': { - 'length': 2, - 'name': 'columns', - 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/TabularColumn'], - } - }, - }, - { - 'selector': ['authors', '__ALL_ELEMENTS__', 0], - 'metadata': { - 'name': 'id', - 'semantic_types': [ - 'http://schema.org/Integer', - 'https://metadata.datadrivendiscovery.org/types/PrimaryKey', - ], - 'structural_type': 'str', - }, - }, - { - 'selector': ['authors', '__ALL_ELEMENTS__', 1], - 'metadata': { - 'name': 'name', - 'semantic_types': [ - 'http://schema.org/Text', - 'https://metadata.datadrivendiscovery.org/types/Attribute', - ], - 'structural_type': 'str', - }, - }, - { - 'selector': ['codes'], - 'metadata': { - 'dimension': { - 'length': 3, - 'name': 'rows', - 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/TabularRow'], - }, - 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/Table'], - 'structural_type': 'd3m.container.pandas.DataFrame', - }, - }, - { - 'selector': ['codes', '__ALL_ELEMENTS__'], - 'metadata': { - 'dimension': { - 'length': 3, - 'name': 'columns', - 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/TabularColumn'], - } - }, - }, - { - 'selector': ['codes', '__ALL_ELEMENTS__', 0], - 'metadata': { - 'name': 'code', - 'semantic_types': [ - 'https://metadata.datadrivendiscovery.org/types/CategoricalData', - 'https://metadata.datadrivendiscovery.org/types/PrimaryKey', - ], - 'structural_type': 'str', - }, - }, - { - 'selector': ['codes', '__ALL_ELEMENTS__', 1], - 'metadata': { - 'name': 'name', - 'semantic_types': [ - 'http://schema.org/Text', - 'https://metadata.datadrivendiscovery.org/types/Attribute', - ], - 'structural_type': 'str', - }, - }, - { - 'selector': ['codes', '__ALL_ELEMENTS__', 2], - 'metadata': { - 'foreign_key': {'column_index': 0, 'resource_id': 'authors', 'type': 'COLUMN'}, - 'name': 'author', - 'semantic_types': [ - 'http://schema.org/Integer', - 'https://metadata.datadrivendiscovery.org/types/Attribute', - ], - 'structural_type': 'str', - }, - }, - { - 'selector': ['learningData'], - 'metadata': { - 'dimension': { - 'length': 45, - 'name': 'rows', - 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/TabularRow'], - }, - 'semantic_types': [ - 'https://metadata.datadrivendiscovery.org/types/Table', - 'https://metadata.datadrivendiscovery.org/types/DatasetEntryPoint', - ], - 'structural_type': 'd3m.container.pandas.DataFrame', - }, - }, - { - 'selector': ['learningData', '__ALL_ELEMENTS__'], - 'metadata': { - 'dimension': { - 'length': 5, - 'name': 'columns', - 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/TabularColumn'], - } - }, - }, - { - 'selector': ['learningData', '__ALL_ELEMENTS__', 0], - 'metadata': { - 'name': 'd3mIndex', - 'semantic_types': [ - 'http://schema.org/Integer', - 'https://metadata.datadrivendiscovery.org/types/PrimaryKey', - ], - 'structural_type': 'str', - }, - }, - { - 'selector': ['learningData', '__ALL_ELEMENTS__', 1], - 'metadata': { - 'foreign_key': {'column_index': 0, 'column_name': '__NO_VALUE__', 'resource_id': 'codes', 'type': 'COLUMN'}, - 'name': 'code', - 'semantic_types': [ - 'https://metadata.datadrivendiscovery.org/types/CategoricalData', - 'https://metadata.datadrivendiscovery.org/types/Attribute', - ], - 'structural_type': 'str', - }, - }, - { - 'selector': ['learningData', '__ALL_ELEMENTS__', 2], - 'metadata': { - 'name': 'key', - 'semantic_types': [ - 'https://metadata.datadrivendiscovery.org/types/CategoricalData', - 'https://metadata.datadrivendiscovery.org/types/Attribute', - ], - 'structural_type': 'str', - }, - }, - { - 'selector': ['learningData', '__ALL_ELEMENTS__', 3], - 'metadata': { - 'name': 'year', - 'semantic_types': [ - 'http://schema.org/DateTime', - 'https://metadata.datadrivendiscovery.org/types/Attribute', - ], - 'structural_type': 'str', - }, - }, - { - 'selector': ['learningData', '__ALL_ELEMENTS__', 4], - 'metadata': { - 'name': 'value', - 'semantic_types': [ - 'http://schema.org/Float', - 'https://metadata.datadrivendiscovery.org/types/SuggestedTarget', - 'https://metadata.datadrivendiscovery.org/types/Attribute', - ], - 'structural_type': 'str', - }, - }, - { - 'selector': ['values'], - 'metadata': { - 'dimension': { - 'length': 64, - 'name': 'rows', - 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/TabularRow'], - }, - 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/Table'], - 'structural_type': 'd3m.container.pandas.DataFrame', - }, - }, - { - 'selector': ['values', '__ALL_ELEMENTS__'], - 'metadata': { - 'dimension': { - 'length': 4, - 'name': 'columns', - 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/TabularColumn'], - } - }, - }, - { - 'selector': ['values', '__ALL_ELEMENTS__', 0], - 'metadata': { - 'foreign_key': {'column_index': 0, 'column_name': '__NO_VALUE__', 'resource_id': 'codes', 'type': 'COLUMN'}, - 'name': 'code', - 'semantic_types': [ - 'https://metadata.datadrivendiscovery.org/types/CategoricalData', - 'https://metadata.datadrivendiscovery.org/types/Attribute', - ], - 'structural_type': 'str', - }, - }, - { - 'selector': ['values', '__ALL_ELEMENTS__', 1], - 'metadata': { - 'name': 'key', - 'semantic_types': [ - 'https://metadata.datadrivendiscovery.org/types/CategoricalData', - 'https://metadata.datadrivendiscovery.org/types/Attribute', - ], - 'structural_type': 'str', - }, - }, - { - 'selector': ['values', '__ALL_ELEMENTS__', 2], - 'metadata': { - 'name': 'year', - 'semantic_types': [ - 'http://schema.org/DateTime', - 'https://metadata.datadrivendiscovery.org/types/Attribute', - ], - 'structural_type': 'str', - }, - }, - { - 'selector': ['values', '__ALL_ELEMENTS__', 3], - 'metadata': { - 'name': 'value', - 'semantic_types': [ - 'http://schema.org/Float', - 'https://metadata.datadrivendiscovery.org/types/Attribute', - ], - 'structural_type': 'str', - }, - }, - ], - ) - - -if __name__ == '__main__': - unittest.main() diff --git a/common-primitives/tests/test_normalize_graphs.py b/common-primitives/tests/test_normalize_graphs.py deleted file mode 100644 index e6eeb8d..0000000 --- a/common-primitives/tests/test_normalize_graphs.py +++ /dev/null @@ -1,207 +0,0 @@ -import os -import unittest - -from d3m import container, utils -from d3m.metadata import base as metadata_base - -from common_primitives import normalize_graphs, denormalize, dataset_map, column_parser, normalize_column_references, simple_profiler - -import utils as test_utils - - -class NormalizeGraphsPrimitiveTestCase(unittest.TestCase): - def _parse_columns(self, dataset): - hyperparams_class = dataset_map.DataFrameDatasetMapPrimitive.metadata.get_hyperparams() - - primitive = dataset_map.DataFrameDatasetMapPrimitive( - # We have to make an instance of the primitive ourselves. - hyperparams=hyperparams_class.defaults().replace({ - 'primitive': column_parser.ColumnParserPrimitive( - hyperparams=column_parser.ColumnParserPrimitive.metadata.get_hyperparams().defaults(), - ), - 'resources': 'all', - }), - - ) - - return primitive.produce(inputs=dataset).value - - def _normalize_column_references(self, dataset): - hyperparams_class = normalize_column_references.NormalizeColumnReferencesPrimitive.metadata.get_hyperparams() - - primitive = normalize_column_references.NormalizeColumnReferencesPrimitive( - hyperparams=hyperparams_class.defaults(), - ) - - return primitive.produce(inputs=dataset).value - - def _get_dataset_1(self): - dataset_doc_path = os.path.abspath( - os.path.join(os.path.dirname(__file__), 'data', 'datasets', 'graph_dataset_1', 'datasetDoc.json') - ) - - dataset = container.Dataset.load('file://{dataset_doc_path}'.format(dataset_doc_path=dataset_doc_path)) - - # We set semantic types like runtime would. - dataset.metadata = dataset.metadata.add_semantic_type(('learningData', metadata_base.ALL_ELEMENTS, 2), 'https://metadata.datadrivendiscovery.org/types/Target') - dataset.metadata = dataset.metadata.add_semantic_type(('learningData', metadata_base.ALL_ELEMENTS, 2), 'https://metadata.datadrivendiscovery.org/types/TrueTarget') - dataset.metadata = dataset.metadata.remove_semantic_type(('learningData', metadata_base.ALL_ELEMENTS, 2), 'https://metadata.datadrivendiscovery.org/types/Attribute') - - metadata_before = dataset.metadata.to_internal_json_structure() - - normalized_dataset = self._normalize_column_references(dataset) - - hyperparams_class = normalize_graphs.NormalizeGraphsPrimitive.metadata.get_hyperparams() - - primitive = normalize_graphs.NormalizeGraphsPrimitive( - hyperparams=hyperparams_class.defaults(), - ) - - normalized_dataset = primitive.produce(inputs=normalized_dataset).value - - hyperparams_class = dataset_map.DataFrameDatasetMapPrimitive.metadata.get_hyperparams() - - primitive = dataset_map.DataFrameDatasetMapPrimitive( - # We have to make an instance of the primitive ourselves. - hyperparams=hyperparams_class.defaults().replace({ - 'primitive': simple_profiler.SimpleProfilerPrimitive( - hyperparams=simple_profiler.SimpleProfilerPrimitive.metadata.get_hyperparams().defaults().replace({ - 'detect_semantic_types': [ - 'http://schema.org/Boolean', 'https://metadata.datadrivendiscovery.org/types/CategoricalData', - 'http://schema.org/Integer', 'http://schema.org/Float', 'http://schema.org/Text', - 'https://metadata.datadrivendiscovery.org/types/FloatVector', 'http://schema.org/DateTime', - 'https://metadata.datadrivendiscovery.org/types/Attribute', - 'https://metadata.datadrivendiscovery.org/types/Time', - 'https://metadata.datadrivendiscovery.org/types/UnknownType', - ], - }), - ), - 'resources': 'all', - }), - - ) - - primitive.set_training_data(inputs=normalized_dataset) - primitive.fit() - normalized_dataset = primitive.produce(inputs=normalized_dataset).value - - normalized_dataset = self._parse_columns(normalized_dataset) - - hyperparams_class = denormalize.DenormalizePrimitive.metadata.get_hyperparams() - - primitive = denormalize.DenormalizePrimitive( - hyperparams=hyperparams_class.defaults(), - ) - - normalized_dataset = primitive.produce(inputs=normalized_dataset).value - - # To make metadata match in recorded structural types. - normalized_dataset.metadata = normalized_dataset.metadata.generate(normalized_dataset) - - self.assertEqual(metadata_before, dataset.metadata.to_internal_json_structure()) - - return normalized_dataset - - def _get_dataset_2(self): - dataset_doc_path = os.path.abspath( - os.path.join(os.path.dirname(__file__), 'data', 'datasets', 'graph_dataset_2', 'datasetDoc.json') - ) - - dataset = container.Dataset.load('file://{dataset_doc_path}'.format(dataset_doc_path=dataset_doc_path)) - - # We set semantic types like runtime would. - dataset.metadata = dataset.metadata.add_semantic_type(('learningData', metadata_base.ALL_ELEMENTS, 4), 'https://metadata.datadrivendiscovery.org/types/Target') - dataset.metadata = dataset.metadata.add_semantic_type(('learningData', metadata_base.ALL_ELEMENTS, 4), 'https://metadata.datadrivendiscovery.org/types/TrueTarget') - dataset.metadata = dataset.metadata.remove_semantic_type(('learningData', metadata_base.ALL_ELEMENTS, 4), 'https://metadata.datadrivendiscovery.org/types/Attribute') - - metadata_before = dataset.metadata.to_internal_json_structure() - - normalized_dataset = self._normalize_column_references(dataset) - - hyperparams_class = normalize_graphs.NormalizeGraphsPrimitive.metadata.get_hyperparams() - - primitive = normalize_graphs.NormalizeGraphsPrimitive( - hyperparams=hyperparams_class.defaults(), - ) - - normalized_dataset = primitive.produce(inputs=normalized_dataset).value - - normalized_dataset = self._parse_columns(normalized_dataset) - - # To make metadata match in recorded structural types. - normalized_dataset.metadata = normalized_dataset.metadata.generate(normalized_dataset) - - self.assertEqual(metadata_before, dataset.metadata.to_internal_json_structure()) - - return normalized_dataset - - def test_basic(self): - self.maxDiff = None - - dataset_1 = self._get_dataset_1() - dataset_2 = self._get_dataset_2() - - # Making some changes to make resulting datasets the same. - dataset_2['G1_edges'] = dataset_2['edgeList'] - del dataset_2['edgeList'] - - dataset_2.metadata = dataset_2.metadata.copy_to(dataset_2.metadata, ('edgeList',), ('G1_edges',)) - dataset_2.metadata = dataset_2.metadata.remove(('edgeList',), recursive=True) - - for field in ['description', 'digest', 'id', 'location_uris', 'name']: - dataset_1.metadata = dataset_1.metadata.update((), {field: metadata_base.NO_VALUE}) - dataset_2.metadata = dataset_2.metadata.update((), {field: metadata_base.NO_VALUE}) - - dataset_1_metadata = test_utils.effective_metadata(dataset_1.metadata) - dataset_2_metadata = test_utils.effective_metadata(dataset_2.metadata) - - # Removing an ALL_ELEMENTS selector which does not really apply to any element anymore - # (it is overridden by more specific selectors). - del dataset_1_metadata[3] - - self.assertEqual(dataset_1_metadata, dataset_2_metadata) - - self.assertCountEqual(dataset_1.keys(), dataset_2.keys()) - - for resource_id in dataset_1.keys(): - self.assertTrue(dataset_1[resource_id].equals(dataset_2[resource_id]), resource_id) - - def test_idempotent_dataset_1(self): - dataset = self._get_dataset_1() - - hyperparams_class = normalize_graphs.NormalizeGraphsPrimitive.metadata.get_hyperparams() - - primitive = normalize_graphs.NormalizeGraphsPrimitive( - hyperparams=hyperparams_class.defaults(), - ) - - normalized_dataset = primitive.produce(inputs=dataset).value - - self.assertEqual(utils.to_json_structure(dataset.metadata.to_internal_simple_structure()), normalized_dataset.metadata.to_internal_json_structure()) - - self.assertCountEqual(dataset.keys(), normalized_dataset.keys()) - - for resource_id in dataset.keys(): - self.assertTrue(dataset[resource_id].equals(normalized_dataset[resource_id]), resource_id) - - def test_idempotent_dataset_2(self): - dataset = self._get_dataset_2() - - hyperparams_class = normalize_graphs.NormalizeGraphsPrimitive.metadata.get_hyperparams() - - primitive = normalize_graphs.NormalizeGraphsPrimitive( - hyperparams=hyperparams_class.defaults(), - ) - - normalized_dataset = primitive.produce(inputs=dataset).value - - self.assertEqual(dataset.metadata.to_internal_json_structure(), normalized_dataset.metadata.to_internal_json_structure()) - - self.assertCountEqual(dataset.keys(), normalized_dataset.keys()) - - for resource_id in dataset.keys(): - self.assertTrue(dataset[resource_id].equals(normalized_dataset[resource_id]), resource_id) - - -if __name__ == '__main__': - unittest.main() diff --git a/common-primitives/tests/test_numeric_range_filter.py b/common-primitives/tests/test_numeric_range_filter.py deleted file mode 100644 index df340af..0000000 --- a/common-primitives/tests/test_numeric_range_filter.py +++ /dev/null @@ -1,143 +0,0 @@ -import unittest -import os - -from common_primitives import numeric_range_filter -from d3m import container - -import utils as test_utils - - -class NumericRangeFilterPrimitiveTestCase(unittest.TestCase): - def test_inclusive_strict(self): - # load the iris dataset - dataset = test_utils.load_iris_metadata() - resource = test_utils.get_dataframe(dataset) - - filter_hyperparams_class = numeric_range_filter.NumericRangeFilterPrimitive.metadata.get_hyperparams() - hp = filter_hyperparams_class({ - 'column': 1, - 'min': 6.5, - 'max': 6.7, - 'strict': True, - 'inclusive': True - }) - filter_primitive = numeric_range_filter.NumericRangeFilterPrimitive(hyperparams=hp) - new_dataframe = filter_primitive.produce(inputs=resource).value - - self.assertGreater(new_dataframe['sepalLength'].astype(float).min(), 6.5) - self.assertLess(new_dataframe['sepalLength'].astype(float).max(), 6.7) - - def test_inclusive_permissive(self): - # load the iris dataset - dataset = test_utils.load_iris_metadata() - resource = test_utils.get_dataframe(dataset) - - filter_hyperparams_class = numeric_range_filter.NumericRangeFilterPrimitive.metadata.get_hyperparams() - hp = filter_hyperparams_class({ - 'column': 1, - 'min': 6.5, - 'max': 6.7, - 'strict': False, - 'inclusive': True - }) - filter_primitive = numeric_range_filter.NumericRangeFilterPrimitive(hyperparams=hp) - new_dataframe = filter_primitive.produce(inputs=resource).value - - self.assertGreaterEqual(new_dataframe['sepalLength'].astype(float).min(), 6.5) - self.assertLessEqual(new_dataframe['sepalLength'].astype(float).max(), 6.7) - - def test_exclusive_strict(self): - # load the iris dataset - dataset = test_utils.load_iris_metadata() - resource = test_utils.get_dataframe(dataset) - - filter_hyperparams_class = numeric_range_filter \ - .NumericRangeFilterPrimitive.metadata.get_hyperparams() - hp = filter_hyperparams_class({ - 'column': 1, - 'min': 6.5, - 'max': 6.7, - 'strict': True, - 'inclusive': False - }) - filter_primitive = numeric_range_filter.NumericRangeFilterPrimitive(hyperparams=hp) - new_dataframe = filter_primitive.produce(inputs=resource).value - - self.assertEqual( - len(new_dataframe.loc[ - (new_dataframe['sepalLength'].astype(float) >= 6.5) & - (new_dataframe['sepalLength'].astype(float) <= 6.7)]), 0) - - def test_exclusive_permissive(self): - # load the iris dataset - dataset = test_utils.load_iris_metadata() - resource = test_utils.get_dataframe(dataset) - - filter_hyperparams_class = numeric_range_filter \ - .NumericRangeFilterPrimitive.metadata.get_hyperparams() - hp = filter_hyperparams_class({ - 'column': 1, - 'min': 6.5, - 'max': 6.7, - 'strict': False, - 'inclusive': False - }) - filter_primitive = numeric_range_filter.NumericRangeFilterPrimitive(hyperparams=hp) - new_dataframe = filter_primitive.produce(inputs=resource).value - - self.assertEqual( - len(new_dataframe.loc[ - (new_dataframe['sepalLength'].astype(float) > 6.5) & - (new_dataframe['sepalLength'].astype(float) < 6.7)]), 0) - - def test_row_metadata_removal(self): - # load the iris dataset - dataset = test_utils.load_iris_metadata() - - # add metadata for rows 0 and 1 - dataset.metadata = dataset.metadata.update(('learningData', 0), {'a': 0}) - dataset.metadata = dataset.metadata.update(('learningData', 5), {'b': 1}) - - resource = test_utils.get_dataframe(dataset) - - # apply filter that removes rows 0 and 1 - filter_hyperparams_class = numeric_range_filter.NumericRangeFilterPrimitive.metadata.get_hyperparams() - hp = filter_hyperparams_class({ - 'column': 0, - 'min': 1, - 'max': 4, - 'strict': True, - 'inclusive': False - }) - filter_primitive = numeric_range_filter.NumericRangeFilterPrimitive(hyperparams=hp) - new_df = filter_primitive.produce(inputs=resource).value - - # verify that the length is correct - self.assertEqual(len(new_df), new_df.metadata.query(())['dimension']['length']) - - # verify that the rows were re-indexed in the metadata - self.assertEqual(new_df.metadata.query((0,))['a'], 0) - self.assertEqual(new_df.metadata.query((1,))['b'], 1) - self.assertFalse('b' in new_df.metadata.query((5,))) - - def test_bad_type_handling(self): - dataset_doc_path = os.path.abspath(os.path.join(os.path.dirname(__file__), 'data', 'datasets', 'timeseries_dataset_1', 'datasetDoc.json')) - dataset = container.Dataset.load('file://{dataset_doc_path}'.format(dataset_doc_path=dataset_doc_path)) - resource = test_utils.get_dataframe(dataset) - - filter_hyperparams_class = numeric_range_filter \ - .NumericRangeFilterPrimitive.metadata.get_hyperparams() - hp = filter_hyperparams_class({ - 'column': 1, - 'min': 6.5, - 'max': 6.7, - 'strict': False, - 'inclusive': False - }) - filter_primitive = numeric_range_filter.NumericRangeFilterPrimitive(hyperparams=hp) - with self.assertRaises(ValueError): - filter_primitive.produce(inputs=resource) - - -if __name__ == '__main__': - unittest.main() diff --git a/common-primitives/tests/test_one_hot_maker.py b/common-primitives/tests/test_one_hot_maker.py deleted file mode 100644 index 245fd70..0000000 --- a/common-primitives/tests/test_one_hot_maker.py +++ /dev/null @@ -1,516 +0,0 @@ -import os -import time -import unittest -import numpy as np -import pickle -from d3m import container, exceptions, utils -from d3m.metadata import base as metadata_base - -from common_primitives import dataset_to_dataframe, extract_columns_semantic_types, one_hot_maker, column_parser - - -def _copy_target_as_categorical_feature(attributes, targets): - attributes = targets.append_columns(attributes) - for column_name in targets.columns.values: - column_mask = attributes.columns.get_loc(column_name) - if isinstance(column_mask, int): - column_index = column_mask - else: - column_index = np.where(column_mask)[0][-1].item() - attributes.metadata = attributes.metadata.remove_semantic_type( - (metadata_base.ALL_ELEMENTS, column_index), - 'https://metadata.datadrivendiscovery.org/types/SuggestedTarget') - attributes.metadata = attributes.metadata.remove_semantic_type( - (metadata_base.ALL_ELEMENTS, column_index), - 'https://metadata.datadrivendiscovery.org/types/Target') - attributes.metadata = attributes.metadata.remove_semantic_type( - (metadata_base.ALL_ELEMENTS, column_index), - 'https://metadata.datadrivendiscovery.org/types/TrueTarget') - attributes.metadata = attributes.metadata.add_semantic_type( - (metadata_base.ALL_ELEMENTS, column_index), - 'https://metadata.datadrivendiscovery.org/types/CategoricalData') - attributes.metadata = attributes.metadata.add_semantic_type( - (metadata_base.ALL_ELEMENTS, column_index), - 'https://metadata.datadrivendiscovery.org/types/Attribute') - attributes.metadata = attributes.metadata.update_column(column_index, - {'custom_metadata': metadata_base.NO_VALUE}) - return attributes - - -def _get_iris(): - dataset_doc_path = os.path.abspath( - os.path.join(os.path.dirname(__file__), 'data', 'datasets', 'iris_dataset_1', 'datasetDoc.json')) - - dataset = container.Dataset.load('file://{dataset_doc_path}'.format(dataset_doc_path=dataset_doc_path)) - - hyperparams_class = \ - dataset_to_dataframe.DatasetToDataFramePrimitive.metadata.query()['primitive_code']['class_type_arguments'][ - 'Hyperparams'] - primitive = dataset_to_dataframe.DatasetToDataFramePrimitive(hyperparams=hyperparams_class.defaults()) - - dataframe = primitive.produce(inputs=dataset).value - - return dataframe - - -def _get_iris_columns(): - dataframe = _get_iris() - - # We set custom metadata on columns. - for column_index in range(1, 5): - dataframe.metadata = dataframe.metadata.update_column(column_index, {'custom_metadata': 'attributes'}) - for column_index in range(5, 6): - dataframe.metadata = dataframe.metadata.update_column(column_index, {'custom_metadata': 'targets'}) - - # We set semantic types like runtime would. - dataframe.metadata = dataframe.metadata.add_semantic_type((metadata_base.ALL_ELEMENTS, 5), - 'https://metadata.datadrivendiscovery.org/types/Target') - dataframe.metadata = dataframe.metadata.add_semantic_type((metadata_base.ALL_ELEMENTS, 5), - 'https://metadata.datadrivendiscovery.org/types/TrueTarget') - dataframe.metadata = dataframe.metadata.remove_semantic_type((metadata_base.ALL_ELEMENTS, 5), - 'https://metadata.datadrivendiscovery.org/types/Attribute') - - # Parsing. - hyperparams_class = \ - column_parser.ColumnParserPrimitive.metadata.query()['primitive_code']['class_type_arguments'][ - 'Hyperparams'] - primitive = column_parser.ColumnParserPrimitive(hyperparams=hyperparams_class.defaults()) - dataframe = primitive.produce(inputs=dataframe).value - - hyperparams_class = \ - extract_columns_semantic_types.ExtractColumnsBySemanticTypesPrimitive.metadata.query()['primitive_code'][ - 'class_type_arguments']['Hyperparams'] - - primitive = extract_columns_semantic_types.ExtractColumnsBySemanticTypesPrimitive( - hyperparams=hyperparams_class.defaults().replace( - {'semantic_types': ('https://metadata.datadrivendiscovery.org/types/Attribute',)})) - attributes = primitive.produce(inputs=dataframe).value - - primitive = extract_columns_semantic_types.ExtractColumnsBySemanticTypesPrimitive( - hyperparams=hyperparams_class.defaults().replace( - {'semantic_types': ('https://metadata.datadrivendiscovery.org/types/SuggestedTarget',)})) - targets = primitive.produce(inputs=dataframe).value - - return dataframe, attributes, targets - - -class OneHotTestCase(unittest.TestCase): - attributes: container.DataFrame = None - excp_attributes: container.DataFrame = None - targets: container.DataFrame = None - dataframe: container.DataFrame = None - unseen_species: str = 'Unseen-Species' - missing_value: float = np.NaN - - @classmethod - def setUpClass(cls) -> None: - cls.dataframe, cls.attributes, cls.targets = _get_iris_columns() - cls.attributes = _copy_target_as_categorical_feature(attributes=cls.attributes, targets=cls.targets) - cls.excp_attributes = cls.attributes.copy() - - def tearDown(self): - self.attributes.iloc[:3, 0] = 'Iris-setosa' - self.excp_attributes.iloc[:3, 0] = 'Iris-setosa' - - def test_fit_produce(self): - attributes = _copy_target_as_categorical_feature(self.attributes, - self.targets.rename(columns={'species': '2-species'})) - attributes.metadata = attributes.metadata.update_column(1, { - 'name': '2-species' - }) - - hyperparams_class = \ - one_hot_maker.OneHotMakerPrimitive.metadata.query()['primitive_code']['class_type_arguments'][ - 'Hyperparams'] - primitive = one_hot_maker.OneHotMakerPrimitive( - hyperparams=hyperparams_class.defaults().replace({'return_result': 'replace'})) - - primitive.set_training_data(inputs=attributes) - primitive.fit() - after_onehot = primitive.produce(inputs=attributes).value - # 1 for the original, so we remove it. - self.assertEqual(after_onehot.shape[1], 2 * (len(self.targets['species'].unique()) - 1) + attributes.shape[1]) - self.assertEqual(after_onehot.shape[0], self.targets.shape[0]) - # 3 unique value for 2 (species, 2-species) 3 * 2 = 6 - self.assertTrue(all(dtype == 'uint8' for dtype in after_onehot.dtypes[:6])) - self.assertEqual(list(after_onehot.columns.values), [ - 'species.Iris-setosa', 'species.Iris-versicolor', 'species.Iris-virginica', - '2-species.Iris-setosa', '2-species.Iris-versicolor', '2-species.Iris-virginica', - 'sepalLength', 'sepalWidth', 'petalLength', 'petalWidth']) - self._test_metadata_return_replace(after_onehot.metadata) - - def test_error_unseen_categories_ignore(self): - # default(ignore) case - self.excp_attributes.iloc[0, 0] = self.unseen_species - self.excp_attributes.iloc[1, 0] = self.unseen_species + '-2' - self.excp_attributes.iloc[2, 0] = np.NaN - hyperparams_class = \ - one_hot_maker.OneHotMakerPrimitive.metadata.query()['primitive_code']['class_type_arguments'][ - 'Hyperparams'] - primitive = one_hot_maker.OneHotMakerPrimitive( - hyperparams=hyperparams_class.defaults().replace({'return_result': 'replace'})) - - primitive.set_training_data(inputs=self.attributes) - primitive.fit() - one_hot_result = primitive.produce(inputs=self.excp_attributes).value - self.assertEqual(one_hot_result.shape[1], len(self.targets['species'].unique()) + self.attributes.shape[1] - 1) - self.assertEqual(one_hot_result.shape[0], self.targets.shape[0]) - self.assertTrue(all(dtype == 'uint8' for dtype in one_hot_result.dtypes[:3])) - - def test_error_unseen_categories_error(self): - # error case - self.excp_attributes.iloc[0, 0] = self.unseen_species - self.excp_attributes.iloc[1, 0] = self.unseen_species + '-2' - self.excp_attributes.iloc[2, 0] = np.NaN - hyperparams_class = \ - one_hot_maker.OneHotMakerPrimitive.metadata.query()['primitive_code']['class_type_arguments'][ - 'Hyperparams'] - primitive = one_hot_maker.OneHotMakerPrimitive( - hyperparams=hyperparams_class.defaults().replace({'return_result': 'replace', 'handle_unseen': 'error'})) - - primitive.set_training_data(inputs=self.attributes) - primitive.fit() - self.assertRaises(exceptions.UnexpectedValueError, primitive.produce, inputs=self.excp_attributes) - - def test_unseen_categories_handle(self): - # handle case - self.excp_attributes.iloc[0, 0] = self.unseen_species - self.excp_attributes.iloc[1, 0] = self.unseen_species + '-2' - self.excp_attributes.iloc[2, 0] = np.NaN - hyperparams_class = \ - one_hot_maker.OneHotMakerPrimitive.metadata.query()['primitive_code']['class_type_arguments'][ - 'Hyperparams'] - primitive = one_hot_maker.OneHotMakerPrimitive( - hyperparams=hyperparams_class.defaults().replace({'return_result': 'replace', 'handle_unseen': 'column'})) - - primitive.set_training_data(inputs=self.attributes) - primitive.fit() - one_hot_result = primitive.produce(inputs=self.excp_attributes).value - self.assertEqual(one_hot_result.shape[1], - len(self.targets['species'].unique()) + self.attributes.shape[1] - 1 + 1) - # unseen cell should be 1 - self.assertEqual(one_hot_result.iloc[0, 3], 1) - self.assertEqual(one_hot_result.shape[0], self.targets.shape[0]) - self.assertTrue(all(dtype == 'uint8' for dtype in one_hot_result.dtypes[:3])) - self.assertEqual(set(one_hot_result.columns.values), {'petalLength', - 'petalWidth', - 'sepalLength', - 'sepalWidth', - 'species.Iris-setosa', - 'species.Iris-versicolor', - 'species.Iris-virginica', - 'species.Unseen'}) - self._test_metadata_unseen_handle_return_replace(one_hot_result.metadata) - - def test_missing_value_ignore(self): - self.excp_attributes.iloc[0, 0] = self.missing_value - self.excp_attributes.iloc[1, 0] = self.missing_value - - # missing present during fit - hyperparams_class = \ - one_hot_maker.OneHotMakerPrimitive.metadata.query()['primitive_code']['class_type_arguments'][ - 'Hyperparams'] - primitive = one_hot_maker.OneHotMakerPrimitive( - hyperparams=hyperparams_class.defaults().replace({'return_result': 'replace'})) - - primitive.set_training_data(inputs=self.excp_attributes) - primitive.fit() - one_hot_result = primitive.produce(inputs=self.excp_attributes).value - self.assertEqual(one_hot_result.shape[1], len(self.targets['species'].unique()) + self.attributes.shape[1] - 1) - self.assertEqual(one_hot_result.shape[0], self.targets.shape[0]) - self.assertTrue(all(dtype == 'uint8' for dtype in one_hot_result.dtypes[:3])) - self.assertEqual(set(one_hot_result.columns.values), { - 'species.Iris-setosa', 'species.Iris-versicolor', 'species.Iris-virginica', - 'sepalLength', 'sepalWidth', 'petalLength', 'petalWidth'}) - - hyperparams_class = \ - one_hot_maker.OneHotMakerPrimitive.metadata.query()['primitive_code']['class_type_arguments'][ - 'Hyperparams'] - primitive = one_hot_maker.OneHotMakerPrimitive( - hyperparams=hyperparams_class.defaults().replace({'return_result': 'replace'})) - - primitive.set_training_data(inputs=self.attributes) - primitive.fit() - one_hot_result = primitive.produce(inputs=self.excp_attributes).value - self.assertEqual(one_hot_result.shape[1], len(self.targets['species'].unique()) + self.attributes.shape[1] - 1) - self.assertEqual(one_hot_result.shape[0], self.targets.shape[0]) - self.assertTrue(all(dtype == 'uint8' for dtype in one_hot_result.dtypes[:3])) - self.assertEqual(set(one_hot_result.columns.values), { - 'species.Iris-setosa', 'species.Iris-versicolor', 'species.Iris-virginica', - 'sepalLength', 'sepalWidth', 'petalLength', 'petalWidth'}) - - def test_missing_value_error(self): - self.excp_attributes.iloc[0, 0] = np.NaN - self.excp_attributes.iloc[1, 0] = None - # error - hyperparams_class = \ - one_hot_maker.OneHotMakerPrimitive.metadata.query()['primitive_code']['class_type_arguments'][ - 'Hyperparams'] - primitive = one_hot_maker.OneHotMakerPrimitive( - hyperparams=hyperparams_class.defaults().replace({ - 'return_result': 'replace', - 'handle_missing_value': 'error', - })) - - primitive.set_training_data(inputs=self.excp_attributes) - self.assertRaises(exceptions.MissingValueError, primitive.fit) - - def test_missing_value_column(self): - self.excp_attributes.iloc[0, 0] = np.NaN - self.excp_attributes.iloc[1, 0] = np.NaN - self.excp_attributes.iloc[2, 0] = 'Unseen-Species' - # column - hyperparams_class = \ - one_hot_maker.OneHotMakerPrimitive.metadata.query()['primitive_code']['class_type_arguments'][ - 'Hyperparams'] - primitive = one_hot_maker.OneHotMakerPrimitive( - hyperparams=hyperparams_class.defaults().replace({ - 'return_result': 'replace', - 'handle_missing_value': 'column', - })) - - primitive.set_training_data(inputs=self.attributes) - primitive.fit() - one_hot_result = primitive.produce(inputs=self.excp_attributes).value - self.assertEqual(one_hot_result.shape[1], - len(self.targets['species'].unique()) + 1 + self.attributes.shape[1] - 1) - self.assertEqual(one_hot_result.shape[0], self.targets.shape[0]) - self.assertTrue(all(dtype == 'uint8' for dtype in one_hot_result.dtypes[:4])) - self.assertEqual(set(one_hot_result.columns.values), {'petalLength', - 'petalWidth', - 'sepalLength', - 'sepalWidth', - 'species.Iris-setosa', - 'species.Iris-versicolor', - 'species.Iris-virginica', - 'species.Missing'}) - - def test_unseen_column_and_missing_value_column(self): - self.excp_attributes.iloc[0, 0] = np.NaN - self.excp_attributes.iloc[1, 0] = np.NaN - self.excp_attributes.iloc[2, 0] = 'Unseen-Species' - # column - hyperparams_class = \ - one_hot_maker.OneHotMakerPrimitive.metadata.query()['primitive_code']['class_type_arguments'][ - 'Hyperparams'] - primitive = one_hot_maker.OneHotMakerPrimitive( - hyperparams=hyperparams_class.defaults().replace({ - 'return_result': 'replace', - 'handle_missing_value': 'column', - 'handle_unseen': 'column' - })) - - primitive.set_training_data(inputs=self.attributes) - primitive.fit() - one_hot_result = primitive.produce(inputs=self.excp_attributes).value - self.assertEqual(one_hot_result.shape[1], - len(self.targets['species'].unique()) + 2 + self.attributes.shape[1] - 1) - self.assertEqual(one_hot_result.shape[0], self.targets.shape[0]) - self.assertTrue(all(dtype == 'uint8' for dtype in one_hot_result.dtypes[:4])) - self.assertEqual(set(one_hot_result.columns.values), {'petalLength', - 'petalWidth', - 'sepalLength', - 'sepalWidth', - 'species.Iris-setosa', - 'species.Iris-versicolor', - 'species.Iris-virginica', - 'species.Missing', - 'species.Unseen'}) - - def test_pickle_unpickle(self): - hyperparams_class = \ - one_hot_maker.OneHotMakerPrimitive.metadata.query()['primitive_code']['class_type_arguments'][ - 'Hyperparams'] - primitive = one_hot_maker.OneHotMakerPrimitive( - hyperparams=hyperparams_class.defaults().replace({ - 'return_result': 'replace', - 'handle_missing_value': 'column', - 'handle_unseen': 'column' - })) - - primitive.set_training_data(inputs=self.attributes) - primitive.fit() - - before_pickled_prediction = primitive.produce(inputs=self.attributes).value - pickle_object = pickle.dumps(primitive) - primitive = pickle.loads(pickle_object) - after_unpickled_prediction = primitive.produce(inputs=self.attributes).value - self.assertTrue(container.DataFrame.equals(before_pickled_prediction, after_unpickled_prediction)) - - def _test_metadata_unseen_handle_return_replace(self, after_onehot_metadata): - self.assertEqual(utils.to_json_structure(after_onehot_metadata.to_internal_simple_structure()), [{ - 'metadata': { - 'dimension': { - 'length': 150, - 'name': 'rows', - 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/TabularRow'] - }, - 'schema': 'https://metadata.datadrivendiscovery.org/schemas/v0/container.json', - 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/Table'], - 'structural_type': 'd3m.container.pandas.DataFrame' - }, - 'selector': [] - }, - { - 'metadata': { - 'dimension': { - 'length': 8, - 'name': 'columns', - 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/TabularColumn'] - } - }, - 'selector': ['__ALL_ELEMENTS__'] - }, - { - 'metadata': { - 'custom_metadata': '__NO_VALUE__', - 'name': 'species.Iris-setosa', - 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/Attribute'], - 'structural_type': 'numpy.uint8' - }, - 'selector': ['__ALL_ELEMENTS__', 0] - }, - { - 'metadata': { - 'custom_metadata': '__NO_VALUE__', - 'name': 'species.Iris-versicolor', - 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/Attribute'], - 'structural_type': 'numpy.uint8' - }, - 'selector': ['__ALL_ELEMENTS__', 1]}, - { - 'metadata': { - 'custom_metadata': '__NO_VALUE__', - 'name': 'species.Iris-virginica', - 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/Attribute'], - 'structural_type': 'numpy.uint8' - }, - 'selector': ['__ALL_ELEMENTS__', 2]}, - { - 'metadata': {'custom_metadata': '__NO_VALUE__', - 'name': 'species.Unseen', - 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/Attribute'], - 'structural_type': 'numpy.uint8'}, - 'selector': ['__ALL_ELEMENTS__', 3] - }, - { - 'metadata': { - 'custom_metadata': 'attributes', - 'name': 'sepalLength', - 'semantic_types': ['http://schema.org/Float', - 'https://metadata.datadrivendiscovery.org/types/Attribute' - ], - 'structural_type': 'float' - }, - 'selector': ['__ALL_ELEMENTS__', 4] - }, - { - 'metadata': { - 'custom_metadata': 'attributes', - 'name': 'sepalWidth', - 'semantic_types': ['http://schema.org/Float', - 'https://metadata.datadrivendiscovery.org/types/Attribute' - ], - 'structural_type': 'float' - }, - 'selector': ['__ALL_ELEMENTS__', 5] - }, - { - 'metadata': { - 'custom_metadata': 'attributes', - 'name': 'petalLength', - 'semantic_types': ['http://schema.org/Float', - 'https://metadata.datadrivendiscovery.org/types/Attribute' - ], - 'structural_type': 'float' - }, - 'selector': ['__ALL_ELEMENTS__', 6] - }, - { - 'metadata': { - 'custom_metadata': 'attributes', - 'name': 'petalWidth', - 'semantic_types': ['http://schema.org/Float', - 'https://metadata.datadrivendiscovery.org/types/Attribute' - ], - 'structural_type': 'float' - }, - 'selector': ['__ALL_ELEMENTS__', 7] - } - ]) - - def _test_metadata_return_replace(self, after_onehot_metadata): - self.assertEqual( - utils.to_json_structure(after_onehot_metadata.to_internal_simple_structure()), - [{'metadata': {'dimension': {'length': 150, - 'name': 'rows', - 'semantic_types': [ - 'https://metadata.datadrivendiscovery.org/types/TabularRow']}, - 'schema': 'https://metadata.datadrivendiscovery.org/schemas/v0/container.json', - 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/Table'], - 'structural_type': 'd3m.container.pandas.DataFrame'}, - 'selector': []}, - {'metadata': {'dimension': {'length': 10, - 'name': 'columns', - 'semantic_types': [ - 'https://metadata.datadrivendiscovery.org/types/TabularColumn']}}, - 'selector': ['__ALL_ELEMENTS__']}, - {'metadata': {'custom_metadata': '__NO_VALUE__', - 'name': 'species.Iris-setosa', - 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/Attribute'], - 'structural_type': 'numpy.uint8'}, - 'selector': ['__ALL_ELEMENTS__', 0]}, - {'metadata': {'custom_metadata': '__NO_VALUE__', - 'name': 'species.Iris-versicolor', - 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/Attribute'], - 'structural_type': 'numpy.uint8'}, - 'selector': ['__ALL_ELEMENTS__', 1]}, - {'metadata': {'custom_metadata': '__NO_VALUE__', - 'name': 'species.Iris-virginica', - 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/Attribute'], - 'structural_type': 'numpy.uint8'}, - 'selector': ['__ALL_ELEMENTS__', 2]}, - {'metadata': {'custom_metadata': '__NO_VALUE__', - 'name': '2-species.Iris-setosa', - 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/Attribute'], - 'structural_type': 'numpy.uint8'}, - 'selector': ['__ALL_ELEMENTS__', 3]}, - {'metadata': {'custom_metadata': '__NO_VALUE__', - 'name': '2-species.Iris-versicolor', - 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/Attribute'], - 'structural_type': 'numpy.uint8'}, - 'selector': ['__ALL_ELEMENTS__', 4]}, - {'metadata': {'custom_metadata': '__NO_VALUE__', - 'name': '2-species.Iris-virginica', - 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/Attribute'], - 'structural_type': 'numpy.uint8'}, - 'selector': ['__ALL_ELEMENTS__', 5]}, - {'metadata': {'custom_metadata': 'attributes', - 'name': 'sepalLength', - 'semantic_types': ['http://schema.org/Float', - 'https://metadata.datadrivendiscovery.org/types/Attribute'], - 'structural_type': 'float'}, - 'selector': ['__ALL_ELEMENTS__', 6]}, - {'metadata': {'custom_metadata': 'attributes', - 'name': 'sepalWidth', - 'semantic_types': ['http://schema.org/Float', - 'https://metadata.datadrivendiscovery.org/types/Attribute'], - 'structural_type': 'float'}, - 'selector': ['__ALL_ELEMENTS__', 7]}, - {'metadata': {'custom_metadata': 'attributes', - 'name': 'petalLength', - 'semantic_types': ['http://schema.org/Float', - 'https://metadata.datadrivendiscovery.org/types/Attribute'], - 'structural_type': 'float'}, - 'selector': ['__ALL_ELEMENTS__', 8]}, - {'metadata': {'custom_metadata': 'attributes', - 'name': 'petalWidth', - 'semantic_types': ['http://schema.org/Float', - 'https://metadata.datadrivendiscovery.org/types/Attribute'], - 'structural_type': 'float'}, - 'selector': ['__ALL_ELEMENTS__', 9]}] - ) - - -if __name__ == '__main__': - unittest.main() diff --git a/common-primitives/tests/test_pandas_onehot_encoder.py b/common-primitives/tests/test_pandas_onehot_encoder.py deleted file mode 100644 index d7b4b30..0000000 --- a/common-primitives/tests/test_pandas_onehot_encoder.py +++ /dev/null @@ -1,178 +0,0 @@ -import unittest -import pandas as pd - -from d3m import container, utils -from common_primitives.pandas_onehot_encoder import PandasOneHotEncoderPrimitive -from d3m.metadata import base as metadata_base - -import utils as test_utils - - -class PandasOneHotEncoderPrimitiveTestCase(unittest.TestCase): - def test_basic(self): - training = pd.DataFrame({'Name': ['Henry', 'Diane', 'Kitty', 'Peter']}) - training = container.DataFrame(training, generate_metadata=True) - training.metadata = training.metadata.add_semantic_type((metadata_base.ALL_ELEMENTS, 0), 'https://metadata.datadrivendiscovery.org/types/CategoricalData',) - training.metadata = training.metadata.add_semantic_type((metadata_base.ALL_ELEMENTS, 0), 'https://metadata.datadrivendiscovery.org/types/Attribute',) - - testing = pd.DataFrame({'Name': ['John', 'Alex','Henry','Diane']}) - testing = container.DataFrame(testing, generate_metadata=True) - testing.metadata = testing.metadata.add_semantic_type((metadata_base.ALL_ELEMENTS, 0), 'https://metadata.datadrivendiscovery.org/types/CategoricalData') - testing.metadata = testing.metadata.add_semantic_type((metadata_base.ALL_ELEMENTS, 0), 'https://metadata.datadrivendiscovery.org/types/Attribute',) - testing.metadata = testing.metadata.update_column(0, { - 'custom_metadata': 42, - }) - - Hyperparams = PandasOneHotEncoderPrimitive.metadata.get_hyperparams() - ht = PandasOneHotEncoderPrimitive(hyperparams=Hyperparams.defaults()) - - ht.set_training_data(inputs=training) - ht.fit() - - result_df = ht.produce(inputs=testing).value - - self.assertEqual(list(result_df.columns), ['Name_Diane', 'Name_Henry', 'Name_Kitty', 'Name_Peter']) - - self.assertEqual(list(result_df['Name_Henry']), [0, 0, 1, 0]) - self.assertEqual(list(result_df['Name_Diane']), [0, 0, 0, 1]) - self.assertEqual(list(result_df['Name_Kitty']), [0, 0, 0, 0]) - self.assertEqual(list(result_df['Name_Peter']), [0, 0, 0, 0]) - - self.assertEqual(test_utils.convert_metadata(utils.to_json_structure(result_df.metadata.to_internal_simple_structure())), [{ - 'selector': [], - 'metadata': { - 'dimension': { - 'length': 4, - 'name': 'rows', - 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/TabularRow'], - }, - 'schema': 'https://metadata.datadrivendiscovery.org/schemas/v0/container.json', - 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/Table'], - 'structural_type': 'd3m.container.pandas.DataFrame', - }, - }, { - 'selector': ['__ALL_ELEMENTS__'], - 'metadata': { - 'dimension': { - 'length': 4, - 'name': 'columns', - 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/TabularColumn'], - }, - }, - }, { - 'selector': ['__ALL_ELEMENTS__', 0], - 'metadata': { - 'custom_metadata': 42, - 'name': 'Name_Diane', - 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/Attribute'], - 'structural_type': 'numpy.uint8', - }, - }, { - 'selector': ['__ALL_ELEMENTS__', 1], - 'metadata': { - 'custom_metadata': 42, - 'name': 'Name_Henry', - 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/Attribute'], - 'structural_type': 'numpy.uint8', - }, - }, { - 'selector': ['__ALL_ELEMENTS__', 2], - 'metadata': { - 'custom_metadata': 42, - 'name': 'Name_Kitty', - 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/Attribute'], - 'structural_type': 'numpy.uint8', - }, - }, { - 'selector': ['__ALL_ELEMENTS__', 3], - 'metadata': { - 'custom_metadata': 42, - 'name': 'Name_Peter', - 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/Attribute'], - 'structural_type': 'numpy.uint8', - }, - }]) - - ht = PandasOneHotEncoderPrimitive(hyperparams=Hyperparams.defaults().replace({ - 'dummy_na': True, - })) - - ht.set_training_data(inputs=training) - ht.fit() - - result_df = ht.produce(inputs=testing).value - - self.assertEqual(list(result_df.columns), ['Name_Diane', 'Name_Henry', 'Name_Kitty', 'Name_Peter', 'Name_nan']) - - self.assertEqual(list(result_df['Name_Henry']), [0, 0, 1, 0]) - self.assertEqual(list(result_df['Name_Diane']), [0, 0, 0, 1]) - self.assertEqual(list(result_df['Name_Kitty']), [0, 0, 0, 0]) - self.assertEqual(list(result_df['Name_Peter']), [0, 0, 0, 0]) - self.assertEqual(list(result_df['Name_nan']), [1, 1, 0, 0]) - - self.assertEqual(test_utils.convert_metadata(utils.to_json_structure(result_df.metadata.to_internal_simple_structure())), [{ - 'selector': [], - 'metadata': { - 'dimension': { - 'length': 4, - 'name': 'rows', - 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/TabularRow'], - }, - 'schema': 'https://metadata.datadrivendiscovery.org/schemas/v0/container.json', - 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/Table'], - 'structural_type': 'd3m.container.pandas.DataFrame', - }, - }, { - 'selector': ['__ALL_ELEMENTS__'], - 'metadata': { - 'dimension': { - 'length': 5, - 'name': 'columns', - 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/TabularColumn'], - }, - }, - }, { - 'selector': ['__ALL_ELEMENTS__', 0], - 'metadata': { - 'custom_metadata': 42, - 'name': 'Name_Diane', - 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/Attribute'], - 'structural_type': 'numpy.uint8', - }, - }, { - 'selector': ['__ALL_ELEMENTS__', 1], - 'metadata': { - 'custom_metadata': 42, - 'name': 'Name_Henry', - 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/Attribute'], - 'structural_type': 'numpy.uint8', - }, - }, { - 'selector': ['__ALL_ELEMENTS__', 2], - 'metadata': { - 'custom_metadata': 42, - 'name': 'Name_Kitty', - 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/Attribute'], - 'structural_type': 'numpy.uint8', - }, - }, { - 'selector': ['__ALL_ELEMENTS__', 3], - 'metadata': { - 'custom_metadata': 42, - 'name': 'Name_Peter', - 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/Attribute'], - 'structural_type': 'numpy.uint8', - }, - }, { - 'selector': ['__ALL_ELEMENTS__', 4], - 'metadata': { - 'custom_metadata': 42, - 'name': 'Name_nan', - 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/Attribute'], - 'structural_type': 'numpy.uint8', - }, - }]) - - -if __name__ == '__main__': - unittest.main() diff --git a/common-primitives/tests/test_random_forest.py b/common-primitives/tests/test_random_forest.py deleted file mode 100644 index 5daee9c..0000000 --- a/common-primitives/tests/test_random_forest.py +++ /dev/null @@ -1,701 +0,0 @@ -import logging -import os -import pickle -import unittest - -from d3m import container, utils -from d3m.metadata import base as metadata_base - -from common_primitives import dataset_to_dataframe, extract_columns_semantic_types, random_forest, column_parser - - -class RandomForestTestCase(unittest.TestCase): - def _get_iris(self): - dataset_doc_path = os.path.abspath(os.path.join(os.path.dirname(__file__), 'data', 'datasets', 'iris_dataset_1', 'datasetDoc.json')) - - dataset = container.Dataset.load('file://{dataset_doc_path}'.format(dataset_doc_path=dataset_doc_path)) - - hyperparams_class = dataset_to_dataframe.DatasetToDataFramePrimitive.metadata.get_hyperparams() - primitive = dataset_to_dataframe.DatasetToDataFramePrimitive(hyperparams=hyperparams_class.defaults()) - - dataframe = primitive.produce(inputs=dataset).value - - return dataframe - - def _get_iris_columns(self): - dataframe = self._get_iris() - - # We set custom metadata on columns. - for column_index in range(1, 5): - dataframe.metadata = dataframe.metadata.update_column(column_index, {'custom_metadata': 'attributes'}) - for column_index in range(5, 6): - dataframe.metadata = dataframe.metadata.update_column(column_index, {'custom_metadata': 'targets'}) - - # We set semantic types like runtime would. - dataframe.metadata = dataframe.metadata.add_semantic_type((metadata_base.ALL_ELEMENTS, 5), 'https://metadata.datadrivendiscovery.org/types/Target') - dataframe.metadata = dataframe.metadata.add_semantic_type((metadata_base.ALL_ELEMENTS, 5), 'https://metadata.datadrivendiscovery.org/types/TrueTarget') - dataframe.metadata = dataframe.metadata.remove_semantic_type((metadata_base.ALL_ELEMENTS, 5), 'https://metadata.datadrivendiscovery.org/types/Attribute') - - # Parsing. - hyperparams_class = column_parser.ColumnParserPrimitive.metadata.get_hyperparams() - primitive = column_parser.ColumnParserPrimitive(hyperparams=hyperparams_class.defaults()) - dataframe = primitive.produce(inputs=dataframe).value - - hyperparams_class = extract_columns_semantic_types.ExtractColumnsBySemanticTypesPrimitive.metadata.get_hyperparams() - - primitive = extract_columns_semantic_types.ExtractColumnsBySemanticTypesPrimitive(hyperparams=hyperparams_class.defaults().replace({'semantic_types': ('https://metadata.datadrivendiscovery.org/types/Attribute',)})) - attributes = primitive.produce(inputs=dataframe).value - - primitive = extract_columns_semantic_types.ExtractColumnsBySemanticTypesPrimitive(hyperparams=hyperparams_class.defaults().replace({'semantic_types': ('https://metadata.datadrivendiscovery.org/types/SuggestedTarget',)})) - targets = primitive.produce(inputs=dataframe).value - - return dataframe, attributes, targets - - def test_single_target(self): - dataframe, attributes, targets = self._get_iris_columns() - - self.assertEqual(list(targets.columns), ['species']) - - hyperparams_class = random_forest.RandomForestClassifierPrimitive.metadata.get_hyperparams() - primitive = random_forest.RandomForestClassifierPrimitive(hyperparams=hyperparams_class.defaults().replace({'return_result': 'new', 'add_index_columns': False})) - - primitive.set_training_data(inputs=attributes, outputs=targets) - primitive.fit() - - predictions = primitive.produce(inputs=attributes).value - - self.assertEqual(list(predictions.columns), ['species']) - - self.assertEqual(predictions.shape, (150, 1)) - self.assertEqual(predictions.iloc[0, 0], 'Iris-setosa') - self.assertTrue(predictions.metadata.has_semantic_type((metadata_base.ALL_ELEMENTS, 0), 'https://metadata.datadrivendiscovery.org/types/PredictedTarget')) - self.assertFalse(predictions.metadata.has_semantic_type((metadata_base.ALL_ELEMENTS, 0), 'https://metadata.datadrivendiscovery.org/types/TrueTarget')) - self.assertEqual(predictions.metadata.query_column(0)['name'], 'species') - self.assertEqual(predictions.metadata.query_column(0)['custom_metadata'], 'targets') - - self._test_single_target_metadata(predictions.metadata) - - samples = primitive.sample(inputs=attributes).value - - self.assertEqual(list(samples[0].columns), ['species']) - - self.assertEqual(len(samples), 1) - self.assertEqual(samples[0].shape, (150, 1)) - self.assertEqual(samples[0].iloc[0, 0], 'Iris-setosa') - self.assertTrue(samples[0].metadata.has_semantic_type((metadata_base.ALL_ELEMENTS, 0), 'https://metadata.datadrivendiscovery.org/types/PredictedTarget')) - self.assertFalse(samples[0].metadata.has_semantic_type((metadata_base.ALL_ELEMENTS, 0), 'https://metadata.datadrivendiscovery.org/types/TrueTarget')) - self.assertEqual(samples[0].metadata.query_column(0)['name'], 'species') - self.assertEqual(samples[0].metadata.query_column(0)['custom_metadata'], 'targets') - - log_likelihoods = primitive.log_likelihoods(inputs=attributes, outputs=targets).value - - self.assertEqual(list(log_likelihoods.columns), ['species']) - - self.assertEqual(log_likelihoods.shape, (150, 1)) - self.assertEqual(log_likelihoods.metadata.query_column(0)['name'], 'species') - - log_likelihood = primitive.log_likelihood(inputs=attributes, outputs=targets).value - - self.assertEqual(list(log_likelihood.columns), ['species']) - - self.assertEqual(log_likelihood.shape, (1, 1)) - self.assertAlmostEqual(log_likelihood.iloc[0, 0], -3.72702785304761) - self.assertEqual(log_likelihoods.metadata.query_column(0)['name'], 'species') - - feature_importances = primitive.produce_feature_importances().value - - self.assertEqual(list(feature_importances), ['sepalLength', 'sepalWidth', 'petalLength', 'petalWidth']) - self.assertEqual(feature_importances.metadata.query_column(0)['name'], 'sepalLength') - self.assertEqual(feature_importances.metadata.query_column(1)['name'], 'sepalWidth') - self.assertEqual(feature_importances.metadata.query_column(2)['name'], 'petalLength') - self.assertEqual(feature_importances.metadata.query_column(3)['name'], 'petalWidth') - - self.assertEqual(feature_importances.values.tolist(), [[0.09090795402103087, - 0.024531041234715757, - 0.46044473961715215, - 0.42411626512710127, - ]]) - - def _test_single_target_metadata(self, predictions_metadata): - expected_metadata = [{ - 'selector': [], - 'metadata': { - 'schema': metadata_base.CONTAINER_SCHEMA_VERSION, - 'structural_type': 'd3m.container.pandas.DataFrame', - 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/Table'], - 'dimension': { - 'name': 'rows', - 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/TabularRow'], - 'length': 150, - }, - }, - }, { - 'selector': ['__ALL_ELEMENTS__'], - 'metadata': { - 'dimension': { - 'name': 'columns', - 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/TabularColumn'], - 'length': 1, - }, - }, - }, { - 'selector': ['__ALL_ELEMENTS__', 0], - 'metadata': { - 'structural_type': 'str', - 'name': 'species', - 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/CategoricalData', 'https://metadata.datadrivendiscovery.org/types/SuggestedTarget', 'https://metadata.datadrivendiscovery.org/types/Target', 'https://metadata.datadrivendiscovery.org/types/PredictedTarget'], - 'custom_metadata': 'targets', - }, - }] - - self.assertEqual(utils.to_json_structure(predictions_metadata.to_internal_simple_structure()), expected_metadata) - - def test_multiple_targets(self): - dataframe, attributes, targets = self._get_iris_columns() - - targets = targets.append_columns(targets) - - self.assertEqual(list(targets.columns), ['species', 'species']) - - hyperparams_class = random_forest.RandomForestClassifierPrimitive.metadata.get_hyperparams() - primitive = random_forest.RandomForestClassifierPrimitive(hyperparams=hyperparams_class.defaults().replace({'return_result': 'new', 'add_index_columns': False})) - - primitive.set_training_data(inputs=attributes, outputs=targets) - primitive.fit() - - predictions = primitive.produce(inputs=attributes).value - - self.assertEqual(list(predictions.columns), ['species', 'species']) - - self.assertEqual(predictions.shape, (150, 2)) - for column_index in range(2): - self.assertEqual(predictions.iloc[0, column_index], 'Iris-setosa') - self.assertTrue(predictions.metadata.has_semantic_type((metadata_base.ALL_ELEMENTS, column_index), 'https://metadata.datadrivendiscovery.org/types/PredictedTarget')) - self.assertFalse(predictions.metadata.has_semantic_type((metadata_base.ALL_ELEMENTS, column_index), 'https://metadata.datadrivendiscovery.org/types/TrueTarget')) - self.assertEqual(predictions.metadata.query_column(column_index)['name'], 'species') - self.assertEqual(predictions.metadata.query_column(column_index)['custom_metadata'], 'targets') - - samples = primitive.sample(inputs=attributes).value - - self.assertEqual(list(samples[0].columns), ['species', 'species']) - - self.assertEqual(len(samples), 1) - self.assertEqual(samples[0].shape, (150, 2)) - for column_index in range(2): - self.assertEqual(samples[0].iloc[0, column_index], 'Iris-setosa') - self.assertTrue(samples[0].metadata.has_semantic_type((metadata_base.ALL_ELEMENTS, column_index), 'https://metadata.datadrivendiscovery.org/types/PredictedTarget')) - self.assertFalse(samples[0].metadata.has_semantic_type((metadata_base.ALL_ELEMENTS, column_index), 'https://metadata.datadrivendiscovery.org/types/TrueTarget')) - self.assertEqual(samples[0].metadata.query_column(column_index)['name'], 'species') - self.assertEqual(samples[0].metadata.query_column(column_index)['custom_metadata'], 'targets') - - log_likelihoods = primitive.log_likelihoods(inputs=attributes, outputs=targets).value - - self.assertEqual(list(log_likelihoods.columns), ['species', 'species']) - - self.assertEqual(log_likelihoods.shape, (150, 2)) - for column_index in range(2): - self.assertEqual(log_likelihoods.metadata.query_column(column_index)['name'], 'species') - - log_likelihood = primitive.log_likelihood(inputs=attributes, outputs=targets).value - - self.assertEqual(list(log_likelihood.columns), ['species', 'species']) - - self.assertEqual(log_likelihood.shape, (1, 2)) - for column_index in range(2): - self.assertAlmostEqual(log_likelihood.iloc[0, column_index], -3.72702785304761) - self.assertEqual(log_likelihoods.metadata.query_column(column_index)['name'], 'species') - - feature_importances = primitive.produce_feature_importances().value - - self.assertEqual(list(feature_importances), ['sepalLength', 'sepalWidth', 'petalLength', 'petalWidth']) - self.assertEqual(feature_importances.metadata.query_column(0)['name'], 'sepalLength') - self.assertEqual(feature_importances.metadata.query_column(1)['name'], 'sepalWidth') - self.assertEqual(feature_importances.metadata.query_column(2)['name'], 'petalLength') - self.assertEqual(feature_importances.metadata.query_column(3)['name'], 'petalWidth') - - self.assertEqual(feature_importances.values.tolist(), [[0.09090795402103087, - 0.024531041234715757, - 0.46044473961715215, - 0.42411626512710127, - ]]) - - def test_semantic_types(self): - dataframe, attributes, targets = self._get_iris_columns() - - hyperparams_class = random_forest.RandomForestClassifierPrimitive.metadata.get_hyperparams() - primitive = random_forest.RandomForestClassifierPrimitive(hyperparams=hyperparams_class.defaults().replace({'return_result': 'new', 'add_index_columns': False})) - - primitive.set_training_data(inputs=dataframe, outputs=dataframe) - primitive.fit() - - predictions = primitive.produce(inputs=dataframe).value - - self.assertEqual(list(predictions.columns), ['species']) - - self.assertEqual(predictions.shape, (150, 1)) - self.assertEqual(predictions.iloc[0, 0], 'Iris-setosa') - self.assertTrue(predictions.metadata.has_semantic_type((metadata_base.ALL_ELEMENTS, 0), 'https://metadata.datadrivendiscovery.org/types/PredictedTarget')) - self.assertFalse(predictions.metadata.has_semantic_type((metadata_base.ALL_ELEMENTS, 0), 'https://metadata.datadrivendiscovery.org/types/TrueTarget')) - self.assertEqual(predictions.metadata.query_column(0)['name'], 'species') - self.assertEqual(predictions.metadata.query_column(0)['custom_metadata'], 'targets') - - samples = primitive.sample(inputs=dataframe).value - - self.assertEqual(list(samples[0].columns), ['species']) - - self.assertEqual(len(samples), 1) - self.assertEqual(samples[0].shape, (150, 1)) - self.assertEqual(samples[0].iloc[0, 0], 'Iris-setosa') - self.assertTrue(samples[0].metadata.has_semantic_type((metadata_base.ALL_ELEMENTS, 0), 'https://metadata.datadrivendiscovery.org/types/PredictedTarget')) - self.assertFalse(samples[0].metadata.has_semantic_type((metadata_base.ALL_ELEMENTS, 0), 'https://metadata.datadrivendiscovery.org/types/TrueTarget')) - self.assertEqual(samples[0].metadata.query_column(0)['name'], 'species') - self.assertEqual(samples[0].metadata.query_column(0)['custom_metadata'], 'targets') - - log_likelihoods = primitive.log_likelihoods(inputs=dataframe, outputs=dataframe).value - - self.assertEqual(list(log_likelihoods.columns), ['species']) - - self.assertEqual(log_likelihoods.shape, (150, 1)) - self.assertEqual(log_likelihoods.metadata.query_column(0)['name'], 'species') - - log_likelihood = primitive.log_likelihood(inputs=dataframe, outputs=dataframe).value - - self.assertEqual(list(log_likelihood.columns), ['species']) - - self.assertEqual(log_likelihood.shape, (1, 1)) - self.assertAlmostEqual(log_likelihood.iloc[0, 0], -3.72702785304761) - self.assertEqual(log_likelihoods.metadata.query_column(0)['name'], 'species') - - feature_importances = primitive.produce_feature_importances().value - - self.assertEqual(list(feature_importances), ['sepalLength', 'sepalWidth', 'petalLength', 'petalWidth']) - self.assertEqual(feature_importances.metadata.query_column(0)['name'], 'sepalLength') - self.assertEqual(feature_importances.metadata.query_column(1)['name'], 'sepalWidth') - self.assertEqual(feature_importances.metadata.query_column(2)['name'], 'petalLength') - self.assertEqual(feature_importances.metadata.query_column(3)['name'], 'petalWidth') - - self.assertEqual(feature_importances.values.tolist(), [[0.09090795402103087, - 0.024531041234715757, - 0.46044473961715215, - 0.42411626512710127, - ]]) - - def test_return_append(self): - dataframe, attributes, targets = self._get_iris_columns() - - hyperparams_class = random_forest.RandomForestClassifierPrimitive.metadata.get_hyperparams() - primitive = random_forest.RandomForestClassifierPrimitive(hyperparams=hyperparams_class.defaults()) - - primitive.set_training_data(inputs=dataframe, outputs=dataframe) - primitive.fit() - - predictions = primitive.produce(inputs=dataframe).value - - self.assertEqual(list(predictions.columns), [ - 'd3mIndex', - 'sepalLength', - 'sepalWidth', - 'petalLength', - 'petalWidth', - 'species', - 'species', - ]) - - self.assertEqual(predictions.shape, (150, 7)) - self.assertEqual(predictions.iloc[0, 6], 'Iris-setosa') - self.assertTrue(predictions.metadata.has_semantic_type((metadata_base.ALL_ELEMENTS, 6), 'https://metadata.datadrivendiscovery.org/types/PredictedTarget')) - self.assertFalse(predictions.metadata.has_semantic_type((metadata_base.ALL_ELEMENTS, 6), 'https://metadata.datadrivendiscovery.org/types/TrueTarget')) - self.assertEqual(predictions.metadata.query_column(6)['name'], 'species') - self.assertEqual(predictions.metadata.query_column(6)['custom_metadata'], 'targets') - - self._test_return_append_metadata(predictions.metadata) - - def _test_return_append_metadata(self, predictions_metadata): - self.assertEqual(utils.to_json_structure(predictions_metadata.to_internal_simple_structure()), [{ - 'selector': [], - 'metadata': { - 'structural_type': 'd3m.container.pandas.DataFrame', - 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/Table'], - 'dimension': { - 'name': 'rows', - 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/TabularRow'], - 'length': 150, - }, - 'schema': 'https://metadata.datadrivendiscovery.org/schemas/v0/container.json', - }, - }, { - 'selector': ['__ALL_ELEMENTS__'], - 'metadata': { - 'dimension': { - 'name': 'columns', - 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/TabularColumn'], - 'length': 7, - }, - }, - }, { - 'selector': ['__ALL_ELEMENTS__', 0], - 'metadata': { - 'name': 'd3mIndex', - 'structural_type': 'int', - 'semantic_types': ['http://schema.org/Integer', 'https://metadata.datadrivendiscovery.org/types/PrimaryKey'], - }, - }, { - 'selector': ['__ALL_ELEMENTS__', 1], - 'metadata': { - 'name': 'sepalLength', - 'structural_type': 'float', - 'semantic_types': ['http://schema.org/Float', 'https://metadata.datadrivendiscovery.org/types/Attribute'], - 'custom_metadata': 'attributes', - }, - }, { - 'selector': ['__ALL_ELEMENTS__', 2], - 'metadata': { - 'name': 'sepalWidth', - 'structural_type': 'float', - 'semantic_types': ['http://schema.org/Float', 'https://metadata.datadrivendiscovery.org/types/Attribute'], - 'custom_metadata': 'attributes', - }, - }, { - 'selector': ['__ALL_ELEMENTS__', 3], - 'metadata': { - 'name': 'petalLength', - 'structural_type': 'float', - 'semantic_types': ['http://schema.org/Float', 'https://metadata.datadrivendiscovery.org/types/Attribute'], - 'custom_metadata': 'attributes', - }, - }, { - 'selector': ['__ALL_ELEMENTS__', 4], - 'metadata': { - 'name': 'petalWidth', - 'structural_type': 'float', - 'semantic_types': ['http://schema.org/Float', 'https://metadata.datadrivendiscovery.org/types/Attribute'], - 'custom_metadata': 'attributes', - }, - }, { - 'selector': ['__ALL_ELEMENTS__', 5], - 'metadata': { - 'name': 'species', - 'structural_type': 'str', - 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/CategoricalData', 'https://metadata.datadrivendiscovery.org/types/SuggestedTarget', 'https://metadata.datadrivendiscovery.org/types/Target', 'https://metadata.datadrivendiscovery.org/types/TrueTarget'], - 'custom_metadata': 'targets', - }, - }, { - 'selector': ['__ALL_ELEMENTS__', 6], - 'metadata': { - 'structural_type': 'str', - 'name': 'species', - 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/CategoricalData', 'https://metadata.datadrivendiscovery.org/types/SuggestedTarget', 'https://metadata.datadrivendiscovery.org/types/Target', 'https://metadata.datadrivendiscovery.org/types/PredictedTarget'], - 'custom_metadata': 'targets', - }, - }]) - - def test_return_new(self): - dataframe, attributes, targets = self._get_iris_columns() - - hyperparams_class = random_forest.RandomForestClassifierPrimitive.metadata.get_hyperparams() - primitive = random_forest.RandomForestClassifierPrimitive(hyperparams=hyperparams_class.defaults().replace({'return_result': 'new'})) - - primitive.set_training_data(inputs=dataframe, outputs=dataframe) - primitive.fit() - - predictions = primitive.produce(inputs=dataframe).value - - self.assertEqual(list(predictions.columns), [ - 'd3mIndex', - 'species', - ]) - - self.assertEqual(predictions.shape, (150, 2)) - self.assertEqual(predictions.iloc[0, 1], 'Iris-setosa') - self.assertTrue(predictions.metadata.has_semantic_type((metadata_base.ALL_ELEMENTS, 1), 'https://metadata.datadrivendiscovery.org/types/PredictedTarget')) - self.assertFalse(predictions.metadata.has_semantic_type((metadata_base.ALL_ELEMENTS, 1), 'https://metadata.datadrivendiscovery.org/types/TrueTarget')) - self.assertEqual(predictions.metadata.query_column(1)['name'], 'species') - self.assertEqual(predictions.metadata.query_column(1)['custom_metadata'], 'targets') - - self._test_return_new_metadata(predictions.metadata) - - def _test_return_new_metadata(self, predictions_metadata): - expected_metadata = [{ - 'selector': [], - 'metadata': { - 'structural_type': 'd3m.container.pandas.DataFrame', - 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/Table'], - 'dimension': { - 'name': 'rows', - 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/TabularRow'], - 'length': 150, - }, - 'schema': 'https://metadata.datadrivendiscovery.org/schemas/v0/container.json', - }, - }, { - 'selector': ['__ALL_ELEMENTS__'], - 'metadata': { - 'dimension': { - 'name': 'columns', - 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/TabularColumn'], - 'length': 2, - }, - }, - }, { - 'selector': ['__ALL_ELEMENTS__', 0], - 'metadata': { - 'name': 'd3mIndex', - 'structural_type': 'int', - 'semantic_types': ['http://schema.org/Integer', 'https://metadata.datadrivendiscovery.org/types/PrimaryKey'], - }, - }, { - 'selector': ['__ALL_ELEMENTS__', 1], - 'metadata': { - 'structural_type': 'str', - 'name': 'species', - 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/CategoricalData', 'https://metadata.datadrivendiscovery.org/types/SuggestedTarget', 'https://metadata.datadrivendiscovery.org/types/Target', 'https://metadata.datadrivendiscovery.org/types/PredictedTarget'], - 'custom_metadata': 'targets', - }, - }] - - self.assertEqual(utils.to_json_structure(predictions_metadata.to_internal_simple_structure()), expected_metadata) - - def test_return_replace(self): - dataframe, attributes, targets = self._get_iris_columns() - - hyperparams_class = random_forest.RandomForestClassifierPrimitive.metadata.get_hyperparams() - primitive = random_forest.RandomForestClassifierPrimitive(hyperparams=hyperparams_class.defaults().replace({'return_result': 'replace'})) - - primitive.set_training_data(inputs=dataframe, outputs=dataframe) - primitive.fit() - - predictions = primitive.produce(inputs=dataframe).value - - self.assertEqual(list(predictions.columns), [ - 'd3mIndex', - 'species', - 'species', - ]) - - self.assertEqual(predictions.shape, (150, 3)) - self.assertEqual(predictions.iloc[0, 1], 'Iris-setosa') - self.assertTrue(predictions.metadata.has_semantic_type((metadata_base.ALL_ELEMENTS, 1), 'https://metadata.datadrivendiscovery.org/types/PredictedTarget')) - self.assertFalse(predictions.metadata.has_semantic_type((metadata_base.ALL_ELEMENTS, 1), 'https://metadata.datadrivendiscovery.org/types/TrueTarget')) - self.assertEqual(predictions.metadata.query_column(1)['name'], 'species') - self.assertEqual(predictions.metadata.query_column(1)['custom_metadata'], 'targets') - - self._test_return_replace_metadata(predictions.metadata) - - def test_get_set_params(self): - dataframe, attributes, targets = self._get_iris_columns() - - hyperparams_class = random_forest.RandomForestClassifierPrimitive.metadata.get_hyperparams() - primitive = random_forest.RandomForestClassifierPrimitive( - hyperparams=hyperparams_class.defaults().replace({'return_result': 'new', 'add_index_columns': False})) - - primitive.set_training_data(inputs=attributes, outputs=targets) - primitive.fit() - - before_set_prediction = primitive.produce(inputs=attributes).value - params = primitive.get_params() - primitive.set_params(params=params) - after_set_prediction = primitive.produce(inputs=attributes).value - self.assertTrue(container.DataFrame.equals(before_set_prediction, after_set_prediction)) - - def test_pickle_unpickle(self): - dataframe, attributes, targets = self._get_iris_columns() - - hyperparams_class = random_forest.RandomForestClassifierPrimitive.metadata.get_hyperparams() - primitive = random_forest.RandomForestClassifierPrimitive( - hyperparams=hyperparams_class.defaults().replace({'return_result': 'new', 'add_index_columns': False})) - - primitive.set_training_data(inputs=attributes, outputs=targets) - primitive.fit() - - before_pickled_prediction = primitive.produce(inputs=attributes).value - pickle_object = pickle.dumps(primitive) - primitive = pickle.loads(pickle_object) - after_unpickled_prediction = primitive.produce(inputs=attributes).value - self.assertTrue(container.DataFrame.equals(before_pickled_prediction, after_unpickled_prediction)) - - def _test_return_replace_metadata(self, predictions_metadata): - self.assertEqual(utils.to_json_structure(predictions_metadata.to_internal_simple_structure()), [{ - 'selector': [], - 'metadata': { - 'structural_type': 'd3m.container.pandas.DataFrame', - 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/Table'], - 'dimension': { - 'name': 'rows', - 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/TabularRow'], - 'length': 150, - }, - 'schema': 'https://metadata.datadrivendiscovery.org/schemas/v0/container.json', - }, - }, { - 'selector': ['__ALL_ELEMENTS__'], - 'metadata': { - 'dimension': { - 'name': 'columns', - 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/TabularColumn'], - 'length': 3, - }, - }, - }, { - 'selector': ['__ALL_ELEMENTS__', 0], - 'metadata': { - 'name': 'd3mIndex', - 'structural_type': 'int', - 'semantic_types': ['http://schema.org/Integer', 'https://metadata.datadrivendiscovery.org/types/PrimaryKey'], - }, - }, { - 'selector': ['__ALL_ELEMENTS__', 1], - 'metadata': { - 'structural_type': 'str', - 'name': 'species', - 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/CategoricalData', 'https://metadata.datadrivendiscovery.org/types/SuggestedTarget', 'https://metadata.datadrivendiscovery.org/types/Target', 'https://metadata.datadrivendiscovery.org/types/PredictedTarget'], - 'custom_metadata': 'targets', - }, - }, { - 'selector': ['__ALL_ELEMENTS__', 2], - 'metadata': { - 'name': 'species', - 'structural_type': 'str', - 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/CategoricalData', 'https://metadata.datadrivendiscovery.org/types/SuggestedTarget', 'https://metadata.datadrivendiscovery.org/types/Target', 'https://metadata.datadrivendiscovery.org/types/TrueTarget'], - 'custom_metadata': 'targets', - }, - }]) - - def test_empty_data(self): - dataframe, attributes, targets = self._get_iris_columns() - - hyperparams_class = random_forest.RandomForestClassifierPrimitive.metadata.get_hyperparams() - primitive = random_forest.RandomForestClassifierPrimitive(hyperparams=hyperparams_class.defaults()) - - just_index_dataframe = dataframe.select_columns([0]) - no_attributes_dataframe = dataframe.select_columns([0, 5]) - - primitive.set_training_data(inputs=just_index_dataframe, outputs=just_index_dataframe) - - with self.assertRaises(Exception): - primitive.fit() - - primitive.set_training_data(inputs=no_attributes_dataframe, outputs=no_attributes_dataframe) - - with self.assertRaises(Exception): - primitive.fit() - - primitive = random_forest.RandomForestClassifierPrimitive(hyperparams=hyperparams_class.defaults().replace({ - 'error_on_no_columns': False, - 'return_result': 'replace', - })) - - primitive.set_training_data(inputs=just_index_dataframe, outputs=just_index_dataframe) - - with self.assertLogs(primitive.logger, level=logging.WARNING) as cm: - primitive.fit() - - self.assertEqual(len(cm.records), 2) - self.assertEqual(cm.records[0].msg, "No inputs columns.") - self.assertEqual(cm.records[1].msg, "No outputs columns.") - - # Test pickling. - pickle_object = pickle.dumps(primitive) - pickle.loads(pickle_object) - - with self.assertLogs(primitive.logger, level=logging.WARNING) as cm: - predictions = primitive.produce(inputs=just_index_dataframe).value - - self.assertEqual(len(cm.records), 1) - self.assertEqual(cm.records[0].msg, "No inputs columns.") - - self.assertEqual(list(predictions.columns), [ - 'd3mIndex', - ]) - self.assertEqual(predictions.shape, (150, 1)) - - self.assertEqual(predictions.metadata.to_internal_json_structure(), just_index_dataframe.metadata.to_internal_json_structure()) - - primitive = random_forest.RandomForestClassifierPrimitive(hyperparams=hyperparams_class.defaults().replace({ - 'error_on_no_columns': False, - 'return_result': 'replace', - })) - - primitive.set_training_data(inputs=no_attributes_dataframe, outputs=no_attributes_dataframe) - - with self.assertLogs(primitive.logger, level=logging.WARNING) as cm: - primitive.fit() - - self.assertEqual(len(cm.records), 1) - self.assertEqual(cm.records[0].msg, "No inputs columns.") - - # Test pickling. - pickle_object = pickle.dumps(primitive) - pickle.loads(pickle_object) - - with self.assertLogs(primitive.logger, level=logging.WARNING) as cm: - predictions = primitive.produce(inputs=no_attributes_dataframe).value - - self.assertEqual(len(cm.records), 1) - self.assertEqual(cm.records[0].msg, "No inputs columns.") - - self.assertEqual(list(predictions.columns), [ - 'd3mIndex', - 'species', - ]) - self.assertEqual(predictions.shape, (150, 2)) - - self.assertEqual(predictions.metadata.to_internal_json_structure(), no_attributes_dataframe.metadata.to_internal_json_structure()) - - primitive = random_forest.RandomForestClassifierPrimitive(hyperparams=hyperparams_class.defaults().replace({ - 'error_on_no_columns': False, - 'return_result': 'new', - })) - - primitive.set_training_data(inputs=no_attributes_dataframe, outputs=no_attributes_dataframe) - - with self.assertLogs(primitive.logger, level=logging.WARNING) as cm: - primitive.fit() - - self.assertEqual(len(cm.records), 1) - self.assertEqual(cm.records[0].msg, "No inputs columns.") - - # Test pickling. - pickle_object = pickle.dumps(primitive) - pickle.loads(pickle_object) - - with self.assertLogs(primitive.logger, level=logging.WARNING) as cm: - with self.assertRaises(ValueError): - primitive.produce(inputs=no_attributes_dataframe) - - self.assertEqual(len(cm.records), 1) - self.assertEqual(cm.records[0].msg, "No inputs columns.") - - primitive = random_forest.RandomForestClassifierPrimitive(hyperparams=hyperparams_class.defaults().replace({ - 'error_on_no_columns': False, - 'return_result': 'append', - })) - - primitive.set_training_data(inputs=no_attributes_dataframe, outputs=no_attributes_dataframe) - - with self.assertLogs(primitive.logger, level=logging.WARNING) as cm: - primitive.fit() - - # Test pickling. - pickle_object = pickle.dumps(primitive) - pickle.loads(pickle_object) - - self.assertEqual(len(cm.records), 1) - self.assertEqual(cm.records[0].msg, "No inputs columns.") - - with self.assertLogs(primitive.logger, level=logging.WARNING) as cm: - predictions = primitive.produce(inputs=no_attributes_dataframe).value - - self.assertEqual(len(cm.records), 1) - self.assertEqual(cm.records[0].msg, "No inputs columns.") - - self.assertEqual(list(predictions.columns), [ - 'd3mIndex', - 'species', - ]) - self.assertEqual(predictions.shape, (150, 2)) - - self.assertEqual(predictions.metadata.to_internal_json_structure(), no_attributes_dataframe.metadata.to_internal_json_structure()) - - -if __name__ == '__main__': - unittest.main() diff --git a/common-primitives/tests/test_ravel.py b/common-primitives/tests/test_ravel.py deleted file mode 100644 index 33d11ac..0000000 --- a/common-primitives/tests/test_ravel.py +++ /dev/null @@ -1,125 +0,0 @@ -import unittest - -from d3m import container, utils - -from common_primitives import ravel - - -class RavelAsRowPrimitiveTestCase(unittest.TestCase): - def _get_data(self): - data = container.DataFrame({ - 'a': [1, 2, 3], - 'b': [container.ndarray([2, 3, 4]), container.ndarray([5, 6, 7]), container.ndarray([8, 9, 10])] - }, { - 'top_level': 'foobar1', - }, generate_metadata=True) - - data.metadata = data.metadata.update_column(1, { - 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/Attribute'], - }) - - return data - - def test_basic(self): - dataframe = container.DataFrame({ - 'a': [1, 2, 3], - 'b': ['a', 'b', 'c'] - }, { - 'top_level': 'foobar1', - }, generate_metadata=True) - - self.assertEqual(dataframe.shape, (3, 2)) - - for row_index in range(len(dataframe)): - for column_index in range(len(dataframe.columns)): - dataframe.metadata = dataframe.metadata.update((row_index, column_index), { - 'location': (row_index, column_index), - }) - - dataframe.metadata.check(dataframe) - - hyperparams = ravel.RavelAsRowPrimitive.metadata.get_hyperparams() - primitive = ravel.RavelAsRowPrimitive(hyperparams=hyperparams.defaults()) - dataframe = primitive.produce(inputs=dataframe).value - - self.assertEqual(dataframe.shape, (1, 6)) - - self.assertEqual(dataframe.values.tolist(), [[1, 'a', 2, 'b', 3, 'c']]) - self.assertEqual(list(dataframe.columns), ['a', 'b', 'a', 'b', 'a', 'b']) - - self.assertEqual(utils.to_json_structure(dataframe.metadata.to_internal_simple_structure()), [{ - 'selector': [], - 'metadata': { - 'dimension': { - 'length': 1, - 'name': 'rows', - 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/TabularRow'], - }, - 'schema': 'https://metadata.datadrivendiscovery.org/schemas/v0/container.json', - 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/Table'], - 'structural_type': 'd3m.container.pandas.DataFrame', - 'top_level': 'foobar1', - }, - }, - { - 'selector': ['__ALL_ELEMENTS__'], - 'metadata': { - 'dimension': { - 'length': 6, - 'name': 'columns', - 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/TabularColumn'], - }, - }, - }, - { - 'selector': [0, 0], - 'metadata': { - 'location': [0, 0], - 'name': 'a', - 'structural_type': 'numpy.int64', - }, - }, - { - 'selector': [0, 1], - 'metadata': { - 'location': [0, 1], - 'name': 'b', - 'structural_type': 'str', - }, - }, - { - 'selector': [0, 2], - 'metadata': { - 'location': [1, 0], - 'name': 'a', - 'structural_type': 'numpy.int64', - }, - }, - { - 'selector': [0, 3], - 'metadata': { - 'location': [1, 1], - 'name': 'b', - 'structural_type': 'str', - }, - }, - { - 'selector': [0, 4], - 'metadata': { - 'location': [2, 0], - 'name': 'a', - 'structural_type': 'numpy.int64', - }, - }, - { - 'selector': [0, 5], - 'metadata': { - 'location': [2, 1], - 'name': 'b', - 'structural_type': 'str', - }, - }]) - - -if __name__ == '__main__': - unittest.main() diff --git a/common-primitives/tests/test_redact_columns.py b/common-primitives/tests/test_redact_columns.py deleted file mode 100644 index 5bd5df0..0000000 --- a/common-primitives/tests/test_redact_columns.py +++ /dev/null @@ -1,173 +0,0 @@ -import os -import unittest - -from d3m import container, utils -from d3m.metadata import base as metadata_base - -from common_primitives import redact_columns - - -class RedactColumnsPrimitiveTestCase(unittest.TestCase): - def _get_datasets(self): - dataset_doc_path = os.path.abspath(os.path.join(os.path.dirname(__file__), 'data', 'datasets', 'iris_dataset_1', 'datasetDoc.json')) - - dataset = container.Dataset.load('file://{dataset_doc_path}'.format(dataset_doc_path=dataset_doc_path)) - - # We set semantic types like runtime would. - dataset.metadata = dataset.metadata.add_semantic_type(('learningData', metadata_base.ALL_ELEMENTS, 5), 'https://metadata.datadrivendiscovery.org/types/Target') - dataset.metadata = dataset.metadata.add_semantic_type(('learningData', metadata_base.ALL_ELEMENTS, 5), 'https://metadata.datadrivendiscovery.org/types/TrueTarget') - dataset.metadata = dataset.metadata.remove_semantic_type(('learningData', metadata_base.ALL_ELEMENTS, 5), 'https://metadata.datadrivendiscovery.org/types/Attribute') - - datasets = container.List([dataset], { - 'schema': metadata_base.CONTAINER_SCHEMA_VERSION, - 'structural_type': container.List, - 'dimension': { - 'length': 1, - }, - }, generate_metadata=False) - - # We update metadata based on metadata of each dataset. - # TODO: In the future this might be done automatically by generate_metadata. - # See: https://gitlab.com/datadrivendiscovery/d3m/issues/119 - for index, dataset in enumerate(datasets): - datasets.metadata = dataset.metadata.copy_to(datasets.metadata, (), (index,)) - - return dataset_doc_path, datasets - - def test_basic(self): - dataset_doc_path, datasets = self._get_datasets() - - hyperparams_class = redact_columns.RedactColumnsPrimitive.metadata.get_hyperparams() - - primitive = redact_columns.RedactColumnsPrimitive(hyperparams=hyperparams_class.defaults().replace({ - 'semantic_types': ('https://metadata.datadrivendiscovery.org/types/TrueTarget',), - 'add_semantic_types': ('https://metadata.datadrivendiscovery.org/types/RedactedTarget', 'https://metadata.datadrivendiscovery.org/types/MissingData'), - })) - redacted_datasets = primitive.produce(inputs=datasets).value - - self.assertTrue(len(redacted_datasets), 1) - - redacted_dataset = redacted_datasets[0] - - self.assertIsInstance(redacted_dataset, container.Dataset) - self.assertEqual(redacted_dataset['learningData']['species'].values.tolist(), [''] * 150) - - self._test_metadata(redacted_datasets.metadata, dataset_doc_path, True) - self._test_metadata(redacted_dataset.metadata, dataset_doc_path, False) - - def _test_metadata(self, metadata, dataset_doc_path, is_list): - top_metadata = { - 'structural_type': 'd3m.container.dataset.Dataset', - 'id': 'iris_dataset_1', - 'version': '4.0.0', - 'name': 'Iris Dataset', - 'location_uris': [ - 'file://{dataset_doc_path}'.format(dataset_doc_path=dataset_doc_path), - ], - 'dimension': { - 'name': 'resources', - 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/DatasetResource'], - 'length': 1, - }, - 'digest': '49404bf166238fbdac2b6d6baa899a0d1bf8ed5976525fa7353fd732ac218a85', - 'source': { - 'license': 'CC', - 'redacted': False, - 'human_subjects_research': False, - }, - } - - if is_list: - prefix = [0] - list_metadata = [{ - 'selector': [], - 'metadata': { - 'dimension': { - 'length': 1, - }, - 'schema': metadata_base.CONTAINER_SCHEMA_VERSION, - 'structural_type': 'd3m.container.list.List', - }, - }] - else: - prefix = [] - list_metadata = [] - top_metadata['schema'] = metadata_base.CONTAINER_SCHEMA_VERSION - - self.assertEqual(utils.to_json_structure(metadata.to_internal_simple_structure()), list_metadata + [{ - 'selector': prefix + [], - 'metadata': top_metadata, - }, { - 'selector': prefix + ['learningData'], - 'metadata': { - 'structural_type': 'd3m.container.pandas.DataFrame', - 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/Table', 'https://metadata.datadrivendiscovery.org/types/DatasetEntryPoint'], - 'dimension': { - 'name': 'rows', - 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/TabularRow'], - 'length': 150, - }, - }, - }, { - 'selector': prefix + ['learningData', '__ALL_ELEMENTS__'], - 'metadata': { - 'dimension': { - 'name': 'columns', - 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/TabularColumn'], - 'length': 6, - }, - }, - }, { - 'selector': prefix + ['learningData', '__ALL_ELEMENTS__', 0], - 'metadata': { - 'name': 'd3mIndex', - 'structural_type': 'str', - 'semantic_types': ['http://schema.org/Integer', 'https://metadata.datadrivendiscovery.org/types/PrimaryKey'], - }, - }, { - 'selector': prefix + ['learningData', '__ALL_ELEMENTS__', 1], - 'metadata': { - 'name': 'sepalLength', - 'structural_type': 'str', - 'semantic_types': ['http://schema.org/Float', 'https://metadata.datadrivendiscovery.org/types/Attribute'], - }, - }, { - 'selector': prefix + ['learningData', '__ALL_ELEMENTS__', 2], - 'metadata': { - 'name': 'sepalWidth', - 'structural_type': 'str', - 'semantic_types': ['http://schema.org/Float', 'https://metadata.datadrivendiscovery.org/types/Attribute'], - }, - }, { - 'selector': prefix + ['learningData', '__ALL_ELEMENTS__', 3], - 'metadata': { - 'name': 'petalLength', - 'structural_type': 'str', - 'semantic_types': ['http://schema.org/Float', 'https://metadata.datadrivendiscovery.org/types/Attribute'], - }, - }, { - 'selector': prefix + ['learningData', '__ALL_ELEMENTS__', 4], - 'metadata': { - 'name': 'petalWidth', - 'structural_type': 'str', - 'semantic_types': ['http://schema.org/Float', 'https://metadata.datadrivendiscovery.org/types/Attribute'], - }, - }, { - 'selector': prefix + ['learningData', '__ALL_ELEMENTS__', 5], - 'metadata': { - 'name': 'species', - 'structural_type': 'str', - 'semantic_types': [ - 'https://metadata.datadrivendiscovery.org/types/CategoricalData', - 'https://metadata.datadrivendiscovery.org/types/SuggestedTarget', - 'https://metadata.datadrivendiscovery.org/types/Target', - 'https://metadata.datadrivendiscovery.org/types/TrueTarget', - 'https://metadata.datadrivendiscovery.org/types/RedactedTarget', - 'https://metadata.datadrivendiscovery.org/types/MissingData', - ], - }, - }]) - - -if __name__ == '__main__': - unittest.main() diff --git a/common-primitives/tests/test_regex_filter.py b/common-primitives/tests/test_regex_filter.py deleted file mode 100644 index 42e0d71..0000000 --- a/common-primitives/tests/test_regex_filter.py +++ /dev/null @@ -1,114 +0,0 @@ -import unittest -import os - -from common_primitives import regex_filter -from d3m import container, exceptions - -import utils as test_utils - - -class RegexFilterPrimitiveTestCase(unittest.TestCase): - def test_inclusive(self): - dataset_doc_path = os.path.abspath(os.path.join(os.path.dirname(__file__), 'data', 'datasets', 'database_dataset_1', 'datasetDoc.json')) - dataset = container.Dataset.load('file://{dataset_doc_path}'.format(dataset_doc_path=dataset_doc_path)) - resource = test_utils.get_dataframe(dataset) - - filter_hyperparams_class = regex_filter.RegexFilterPrimitive.metadata.get_hyperparams() - hp = filter_hyperparams_class({ - 'column': 1, - 'inclusive': True, - 'regex': 'AAA' - }) - - filter_primitive = regex_filter.RegexFilterPrimitive(hyperparams=hp) - new_df = filter_primitive.produce(inputs=resource).value - - matches = new_df[new_df['code'].str.match('AAA')] - self.assertTrue(matches['code'].unique() == ['AAA']) - - def test_exclusive(self): - dataset_doc_path = os.path.abspath(os.path.join(os.path.dirname(__file__), 'data', 'datasets', 'database_dataset_1', 'datasetDoc.json')) - dataset = container.Dataset.load('file://{dataset_doc_path}'.format(dataset_doc_path=dataset_doc_path)) - resource = test_utils.get_dataframe(dataset) - - filter_hyperparams_class = regex_filter.RegexFilterPrimitive.metadata.get_hyperparams() - hp = filter_hyperparams_class({ - 'column': 1, - 'inclusive': False, - 'regex': 'AAA' - }) - - filter_primitive = regex_filter.RegexFilterPrimitive(hyperparams=hp) - new_df = filter_primitive.produce(inputs=resource).value - - matches = new_df[~new_df['code'].str.match('AAA')] - self.assertTrue(set(matches['code'].unique()) == set(['BBB', 'CCC'])) - - def test_numeric(self): - dataset_doc_path = os.path.abspath(os.path.join(os.path.dirname(__file__), 'data', 'datasets', 'database_dataset_1', 'datasetDoc.json')) - dataset = container.Dataset.load('file://{dataset_doc_path}'.format(dataset_doc_path=dataset_doc_path)) - resource = test_utils.get_dataframe(dataset) - - # set dataframe type to int to match output of a prior parse columns step - resource.iloc[:,3] = resource.iloc[:,3].astype(int) - - filter_hyperparams_class = regex_filter.RegexFilterPrimitive.metadata.get_hyperparams() - hp = filter_hyperparams_class({ - 'column': 3, - 'inclusive': False, - 'regex': '1990' - }) - - filter_primitive = regex_filter.RegexFilterPrimitive(hyperparams=hp) - new_df = filter_primitive.produce(inputs=resource).value - - matches = new_df[~new_df['year'].astype(str).str.match('1990')] - self.assertTrue(set(matches['year'].unique()) == set([2000, 2010])) - - def test_row_metadata_removal(self): - dataset_doc_path = os.path.abspath(os.path.join(os.path.dirname(__file__), 'data', 'datasets', 'database_dataset_1', 'datasetDoc.json')) - dataset = container.Dataset.load('file://{dataset_doc_path}'.format(dataset_doc_path=dataset_doc_path)) - - # add metadata for rows 0 and 1 - dataset.metadata = dataset.metadata.update(('learningData', 1), {'a': 0}) - dataset.metadata = dataset.metadata.update(('learningData', 2), {'b': 1}) - - resource = test_utils.get_dataframe(dataset) - - filter_hyperparams_class = regex_filter.RegexFilterPrimitive.metadata.get_hyperparams() - hp = filter_hyperparams_class({ - 'column': 1, - 'inclusive': False, - 'regex': 'AAA' - }) - - filter_primitive = regex_filter.RegexFilterPrimitive(hyperparams=hp) - new_df = filter_primitive.produce(inputs=resource).value - - # verify that the lenght is correct - self.assertEqual(len(new_df), new_df.metadata.query(())['dimension']['length']) - - # verify that the rows were re-indexed in the metadata - self.assertEquals(new_df.metadata.query((0,))['a'], 0) - self.assertEquals(new_df.metadata.query((1,))['b'], 1) - self.assertFalse('b' in new_df.metadata.query((2,))) - - def test_bad_regex(self): - dataset_doc_path = os.path.abspath(os.path.join(os.path.dirname(__file__), 'data', 'datasets', 'database_dataset_1', 'datasetDoc.json')) - dataset = container.Dataset.load('file://{dataset_doc_path}'.format(dataset_doc_path=dataset_doc_path)) - resource = test_utils.get_dataframe(dataset) - - filter_hyperparams_class = regex_filter.RegexFilterPrimitive.metadata.get_hyperparams() - hp = filter_hyperparams_class({ - 'column': 1, - 'inclusive': True, - 'regex': '[' - }) - - filter_primitive = regex_filter.RegexFilterPrimitive(hyperparams=hp) - with self.assertRaises(exceptions.InvalidArgumentValueError): - filter_primitive.produce(inputs=resource) - - -if __name__ == '__main__': - unittest.main() diff --git a/common-primitives/tests/test_remove_duplicate_columns.py b/common-primitives/tests/test_remove_duplicate_columns.py deleted file mode 100644 index 3713751..0000000 --- a/common-primitives/tests/test_remove_duplicate_columns.py +++ /dev/null @@ -1,123 +0,0 @@ -import unittest - -from d3m import container, utils -from d3m.metadata import base as metadata_base - -from common_primitives import remove_duplicate_columns - - -class RemoveDuplicateColumnsPrimitiveTestCase(unittest.TestCase): - def test_basic(self): - main = container.DataFrame({'a1': [1, 2, 3], 'b1': [4, 5, 6], 'a2': [1, 2, 3], 'c1': [7, 8, 9], 'a3': [1, 2, 3], 'a1a': [1, 2, 3]}, { - 'top_level': 'main', - }, columns=['a1', 'b1', 'a2', 'c1', 'a3', 'a1a'], generate_metadata=True) - main.metadata = main.metadata.update_column(0, {'name': 'aaa111'}) - main.metadata = main.metadata.update_column(1, {'name': 'bbb111'}) - main.metadata = main.metadata.update_column(2, {'name': 'aaa222'}) - main.metadata = main.metadata.update_column(3, {'name': 'ccc111'}) - main.metadata = main.metadata.update_column(4, {'name': 'aaa333'}) - main.metadata = main.metadata.update_column(5, {'name': 'aaa111'}) - - self.assertEqual(utils.to_json_structure(main.metadata.to_internal_simple_structure()), [{ - 'selector': [], - 'metadata': { - 'top_level': 'main', - 'schema': metadata_base.CONTAINER_SCHEMA_VERSION, - 'structural_type': 'd3m.container.pandas.DataFrame', - 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/Table'], - 'dimension': { - 'name': 'rows', - 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/TabularRow'], - 'length': 3, - }, - }, - }, { - 'selector': ['__ALL_ELEMENTS__'], - 'metadata': { - 'dimension': { - 'name': 'columns', - 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/TabularColumn'], - 'length': 6, - }, - }, - }, { - 'selector': ['__ALL_ELEMENTS__', 0], - 'metadata': {'structural_type': 'numpy.int64', 'name': 'aaa111'}, - }, { - 'selector': ['__ALL_ELEMENTS__', 1], - 'metadata': {'structural_type': 'numpy.int64', 'name': 'bbb111'}, - }, { - 'selector': ['__ALL_ELEMENTS__', 2], - 'metadata': {'structural_type': 'numpy.int64', 'name': 'aaa222'}, - }, { - 'selector': ['__ALL_ELEMENTS__', 3], - 'metadata': {'structural_type': 'numpy.int64', 'name': 'ccc111'}, - }, { - 'selector': ['__ALL_ELEMENTS__', 4], - 'metadata': {'structural_type': 'numpy.int64', 'name': 'aaa333'}, - }, { - 'selector': ['__ALL_ELEMENTS__', 5], - 'metadata': {'structural_type': 'numpy.int64', 'name': 'aaa111'}, - }]) - - hyperparams_class = remove_duplicate_columns.RemoveDuplicateColumnsPrimitive.metadata.get_hyperparams() - primitive = remove_duplicate_columns.RemoveDuplicateColumnsPrimitive(hyperparams=hyperparams_class.defaults()) - primitive.set_training_data(inputs=main) - primitive.fit() - new_main = primitive.produce(inputs=main).value - - self.assertEqual(new_main.values.tolist(), [ - [1, 4, 7], - [2, 5, 8], - [3, 6, 9], - ]) - - self.assertEqual(utils.to_json_structure(new_main.metadata.to_internal_simple_structure()), [{ - 'selector': [], - 'metadata': { - 'top_level': 'main', - 'schema': metadata_base.CONTAINER_SCHEMA_VERSION, - 'structural_type': 'd3m.container.pandas.DataFrame', - 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/Table'], - 'dimension': { - 'name': 'rows', - 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/TabularRow'], - 'length': 3, - }, - }, - }, { - 'selector': ['__ALL_ELEMENTS__'], - 'metadata': { - 'dimension': { - 'name': 'columns', - 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/TabularColumn'], - 'length': 3, - }, - }, - }, { - 'selector': ['__ALL_ELEMENTS__', 0], - 'metadata': { - 'name': 'aaa111', - 'other_names': ['aaa222', 'aaa333'], - 'structural_type': 'numpy.int64', - }, - }, { - 'selector': ['__ALL_ELEMENTS__', 1], - 'metadata': { - 'name': 'bbb111', - 'structural_type': 'numpy.int64', - }, - }, { - 'selector': ['__ALL_ELEMENTS__', 2], - 'metadata': { - 'name': 'ccc111', - 'structural_type': 'numpy.int64', - }, - }]) - - params = primitive.get_params() - primitive.set_params(params=params) - - -if __name__ == '__main__': - unittest.main() diff --git a/common-primitives/tests/test_rename_duplicate_columns.py b/common-primitives/tests/test_rename_duplicate_columns.py deleted file mode 100644 index 90cc522..0000000 --- a/common-primitives/tests/test_rename_duplicate_columns.py +++ /dev/null @@ -1,136 +0,0 @@ -import os -import unittest - -import pandas as pd - -from d3m import container -from d3m.metadata import base as metadata_base - -from common_primitives import dataset_to_dataframe, column_parser, rename_duplicate_columns - - -class RenameDuplicateColumnsPrimitiveTestCase(unittest.TestCase): - def _get_iris(self): - dataset_doc_path = os.path.abspath( - os.path.join(os.path.dirname(__file__), 'data', 'datasets', 'iris_dataset_1', 'datasetDoc.json')) - - dataset = container.Dataset.load('file://{dataset_doc_path}'.format(dataset_doc_path=dataset_doc_path)) - - hyperparams_class = \ - dataset_to_dataframe.DatasetToDataFramePrimitive.metadata.query()['primitive_code']['class_type_arguments'][ - 'Hyperparams'] - primitive = dataset_to_dataframe.DatasetToDataFramePrimitive(hyperparams=hyperparams_class.defaults()) - - dataframe = primitive.produce(inputs=dataset).value - - return dataframe - - def _get_iris_columns(self): - dataframe = self._get_iris() - # We set semantic types like runtime would. - dataframe.metadata = dataframe.metadata.add_semantic_type((metadata_base.ALL_ELEMENTS, 5), - 'https://metadata.datadrivendiscovery.org/types/Target') - dataframe.metadata = dataframe.metadata.add_semantic_type((metadata_base.ALL_ELEMENTS, 5), - 'https://metadata.datadrivendiscovery.org/types/TrueTarget') - dataframe.metadata = dataframe.metadata.remove_semantic_type((metadata_base.ALL_ELEMENTS, 5), - 'https://metadata.datadrivendiscovery.org/types/Attribute') - - # Parsing. - hyperparams_class = \ - column_parser.ColumnParserPrimitive.metadata.query()['primitive_code']['class_type_arguments'][ - 'Hyperparams'] - primitive = column_parser.ColumnParserPrimitive(hyperparams=hyperparams_class.defaults()) - dataframe = primitive.produce(inputs=dataframe).value - - return dataframe - - def test_basic(self): - test_data_inputs = {'col1': [1.0, 2.0, 3.0], - 'col2': [4.0, 5.0, 6.0], - 'col3': [100, 200, 300]} - dataframe_inputs = container.DataFrame.from_dict(data=test_data_inputs) - test_data_inputs_dup = {'col1': [1.0, 2.0, 3.0], - 'col2': [4.0, 5.0, 6.0]} - dataframe_inputs_dup = container.DataFrame.from_dict(data=test_data_inputs_dup) - test_data_inputs_dup_2 = {'col1': [1.0, 2.0, 3.0], - 'col2': [4.0, 5.0, 6.0], - 'col3': [100, 200, 300]} - dataframe_inputs_dup_2 = container.DataFrame.from_dict(data=test_data_inputs_dup_2) - input = pd.concat([dataframe_inputs, dataframe_inputs_dup, dataframe_inputs_dup_2], axis=1) - - hyperparams_class = rename_duplicate_columns.RenameDuplicateColumnsPrimitive.metadata.query()['primitive_code'][ - 'class_type_arguments']['Hyperparams'] - - primitive = rename_duplicate_columns.RenameDuplicateColumnsPrimitive(hyperparams=hyperparams_class.defaults()) - - call_result = primitive.produce(inputs=input) - dataframe_renamed = call_result.value - self.assertEqual(dataframe_renamed.columns.values.tolist(), - ['col1', 'col2', 'col3', 'col1.1', 'col2.1', 'col1.2', 'col2.2', 'col3.1']) - - def test_monotonic_dup_col_name(self): - """This test is added because of issue #73""" - test_data_inputs = {'a': [1.0, 2.0, 3.0], - 'b': [100, 200, 300]} - dataframe_inputs = container.DataFrame.from_dict(data=test_data_inputs) - test_data_inputs_dup = {'b': [1.0, 2.0, 3.0], - 'c': [4.0, 5.0, 6.0]} - dataframe_inputs_dup = container.DataFrame.from_dict(data=test_data_inputs_dup) - input = pd.concat([dataframe_inputs, dataframe_inputs_dup], axis=1) - - hyperparams_class = rename_duplicate_columns.RenameDuplicateColumnsPrimitive.metadata.query()['primitive_code'][ - 'class_type_arguments']['Hyperparams'] - - primitive = rename_duplicate_columns.RenameDuplicateColumnsPrimitive(hyperparams=hyperparams_class.defaults()) - - call_result = primitive.produce(inputs=input) - dataframe_renamed = call_result.value - self.assertEqual(dataframe_renamed.columns.values.tolist(), - ['a', 'b', 'b.1', 'c']) - - def test_no_change(self): - test_data_inputs = {'col0': [1.0, 2.0, 3.0], - 'col1': [4.0, 5.0, 6.0], - 'col2': [100, 200, 300]} - dataframe_inputs = container.DataFrame.from_dict(data=test_data_inputs) - test_data_inputs = {'col3': [1.0, 2.0, 3.0], - 'col4': [4.0, 5.0, 6.0], - 'col5': [100, 200, 300]} - dataframe_inputs_2 = container.DataFrame.from_dict(data=test_data_inputs) - - inputs = pd.concat([dataframe_inputs, dataframe_inputs_2], axis=1) - hyperparams_class = rename_duplicate_columns.RenameDuplicateColumnsPrimitive.metadata.query()['primitive_code'][ - 'class_type_arguments']['Hyperparams'] - - primitive = rename_duplicate_columns.RenameDuplicateColumnsPrimitive(hyperparams=hyperparams_class.defaults()) - - call_result = primitive.produce(inputs=inputs) - dataframe_renamed = call_result.value - - self.assertEqual(dataframe_renamed.columns.values.tolist(), - ['col0', 'col1', 'col2', 'col3', 'col4', 'col5']) - - def test_iris_with_metadata(self): - dataframe = self._get_iris_columns() - dataframe_1 = self._get_iris_columns() - dataframe_concated = dataframe.append_columns(dataframe_1) - dataframe_concated_bk = dataframe_concated.copy() - hyperparams_class = rename_duplicate_columns.RenameDuplicateColumnsPrimitive.metadata.query()['primitive_code'][ - 'class_type_arguments']['Hyperparams'] - - primitive = rename_duplicate_columns.RenameDuplicateColumnsPrimitive(hyperparams=hyperparams_class.defaults()) - - call_result = primitive.produce(inputs=dataframe_concated) - dataframe_renamed = call_result.value - names = ['d3mIndex', 'sepalLength', 'sepalWidth', 'petalLength', 'petalWidth', 'species', - 'd3mIndex.1', 'sepalLength.1', 'sepalWidth.1', 'petalLength.1', 'petalWidth.1', - 'species.1'] - self.assertEqual(dataframe_renamed.columns.values.tolist(), names) - self.assertTrue(dataframe_concated.equals(dataframe_concated_bk)) - self.assertTrue(dataframe_concated.metadata.to_internal_json_structure(), - dataframe_concated_bk.metadata.to_internal_json_structure()) - - for i, column_name in enumerate(dataframe_renamed.columns): - self.assertEqual(dataframe_renamed.metadata.query_column(i)['other_name'], - column_name.split(primitive.hyperparams['separator'])[0]) - self.assertEqual(dataframe_renamed.metadata.query_column(i)['name'], names[i]) diff --git a/common-primitives/tests/test_replace_semantic_types.py b/common-primitives/tests/test_replace_semantic_types.py deleted file mode 100644 index 258167a..0000000 --- a/common-primitives/tests/test_replace_semantic_types.py +++ /dev/null @@ -1,97 +0,0 @@ -import os -import unittest - -from d3m import container, utils -from d3m.metadata import base as metadata_base - -from common_primitives import dataset_to_dataframe, replace_semantic_types - -import utils as test_utils - - -class ReplaceSemanticTypesPrimitiveTestCase(unittest.TestCase): - def _get_iris_dataframe(self): - dataset_doc_path = os.path.abspath(os.path.join(os.path.dirname(__file__), 'data', 'datasets', 'iris_dataset_1', 'datasetDoc.json')) - - dataset = container.Dataset.load('file://{dataset_doc_path}'.format(dataset_doc_path=dataset_doc_path)) - - hyperparams_class = dataset_to_dataframe.DatasetToDataFramePrimitive.metadata.get_hyperparams() - - primitive = dataset_to_dataframe.DatasetToDataFramePrimitive(hyperparams=hyperparams_class.defaults()) - - call_metadata = primitive.produce(inputs=dataset) - - dataframe = call_metadata.value - - return dataframe - - def test_basic(self): - dataframe = self._get_iris_dataframe() - - hyperparams_class = replace_semantic_types.ReplaceSemanticTypesPrimitive.metadata.get_hyperparams() - primitive = replace_semantic_types.ReplaceSemanticTypesPrimitive(hyperparams=hyperparams_class.defaults().replace({ - 'from_semantic_types': ('https://metadata.datadrivendiscovery.org/types/SuggestedTarget',), - 'to_semantic_types': ('https://metadata.datadrivendiscovery.org/types/Attribute',), - })) - - outputs = primitive.produce(inputs=dataframe).value - - self._test_metadata(outputs.metadata) - - def _test_metadata(self, metadata): - self.maxDiff = None - - self.assertEqual(test_utils.convert_through_json(metadata.query(())), { - 'schema': metadata_base.CONTAINER_SCHEMA_VERSION, - 'structural_type': 'd3m.container.pandas.DataFrame', - 'semantic_types': [ - 'https://metadata.datadrivendiscovery.org/types/Table', - ], - 'dimension': { - 'name': 'rows', - 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/TabularRow'], - 'length': 150, - } - }) - - self.assertEqual(test_utils.convert_through_json(metadata.query((metadata_base.ALL_ELEMENTS,))), { - 'dimension': { - 'name': 'columns', - 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/TabularColumn'], - 'length': 6, - } - }) - - self.assertEqual(test_utils.convert_through_json(metadata.query((metadata_base.ALL_ELEMENTS, 0))), { - 'name': 'd3mIndex', - 'structural_type': 'str', - 'semantic_types': [ - 'http://schema.org/Integer', - 'https://metadata.datadrivendiscovery.org/types/PrimaryKey', - ], - }) - - for i in range(1, 5): - self.assertEqual(test_utils.convert_through_json(metadata.query((metadata_base.ALL_ELEMENTS, i))), { - 'name': ['sepalLength', 'sepalWidth', 'petalLength', 'petalWidth'][i - 1], - 'structural_type': 'str', - 'semantic_types': [ - 'http://schema.org/Float', - 'https://metadata.datadrivendiscovery.org/types/Attribute', - ], - }, i) - - self.assertEqual(test_utils.convert_through_json(metadata.query((metadata_base.ALL_ELEMENTS, 5))), { - 'name': 'species', - 'structural_type': 'str', - 'semantic_types': [ - 'https://metadata.datadrivendiscovery.org/types/CategoricalData', - 'https://metadata.datadrivendiscovery.org/types/Attribute', - ], - }) - - self.assertTrue(metadata.get_elements((metadata_base.ALL_ELEMENTS,)) in [[0, 1, 2, 3, 4, 5], [metadata_base.ALL_ELEMENTS, 0, 1, 2, 3, 4, 5]]) - - -if __name__ == '__main__': - unittest.main() diff --git a/common-primitives/tests/test_simple_profiler.py b/common-primitives/tests/test_simple_profiler.py deleted file mode 100644 index b9a6706..0000000 --- a/common-primitives/tests/test_simple_profiler.py +++ /dev/null @@ -1,446 +0,0 @@ -import os.path -import pickle -import unittest - -from d3m import container -from d3m.metadata import base as metadata_base - -from common_primitives import dataset_to_dataframe, simple_profiler, train_score_split - - -class SimpleProfilerPrimitiveTestCase(unittest.TestCase): - def _get_iris(self, set_target_as_categorical): - dataset_doc_path = os.path.abspath(os.path.join(os.path.dirname(__file__), 'data', 'datasets', 'iris_dataset_1', 'datasetDoc.json')) - dataset = container.Dataset.load('file://{dataset_doc_path}'.format(dataset_doc_path=dataset_doc_path)) - - original_metadata = dataset.metadata - - # We make a very empty metadata. - dataset.metadata = metadata_base.DataMetadata().generate(dataset) - dataset.metadata = dataset.metadata.add_semantic_type(('learningData', metadata_base.ALL_ELEMENTS, 0), 'http://schema.org/Integer') - dataset.metadata = dataset.metadata.add_semantic_type(('learningData', metadata_base.ALL_ELEMENTS, 0), 'https://metadata.datadrivendiscovery.org/types/PrimaryKey') - dataset.metadata = dataset.metadata.add_semantic_type(('learningData', metadata_base.ALL_ELEMENTS, 5), 'https://metadata.datadrivendiscovery.org/types/Attribute') - dataset.metadata = dataset.metadata.add_semantic_type(('learningData', metadata_base.ALL_ELEMENTS, 5), 'https://metadata.datadrivendiscovery.org/types/SuggestedTarget') - - if set_target_as_categorical: - dataset.metadata = dataset.metadata.add_semantic_type(('learningData', metadata_base.ALL_ELEMENTS, 5), 'https://metadata.datadrivendiscovery.org/types/CategoricalData') - else: - dataset.metadata = dataset.metadata.add_semantic_type(('learningData', metadata_base.ALL_ELEMENTS, 5), 'https://metadata.datadrivendiscovery.org/types/UnknownType') - - return dataset, original_metadata - - def _test_metadata(self, original_metadata, dataframe_metadata, set_target_as_categorical): - for column_index in range(5): - self.assertCountEqual(original_metadata.query_column_field(column_index, 'semantic_types', at=('learningData',)), dataframe_metadata.query_column_field(column_index, 'semantic_types'), (set_target_as_categorical, column_index)) - - self.assertEqual(dataframe_metadata.query_column_field(5, 'semantic_types'), ( - 'https://metadata.datadrivendiscovery.org/types/SuggestedTarget', - 'https://metadata.datadrivendiscovery.org/types/CategoricalData', - 'https://metadata.datadrivendiscovery.org/types/Target', - 'https://metadata.datadrivendiscovery.org/types/TrueTarget', - ), set_target_as_categorical) - - def test_basic(self): - for set_target_as_categorical in [False, True]: - dataset, original_metadata = self._get_iris(set_target_as_categorical) - - hyperparams_class = dataset_to_dataframe.DatasetToDataFramePrimitive.metadata.get_hyperparams() - - primitive = dataset_to_dataframe.DatasetToDataFramePrimitive(hyperparams=hyperparams_class.defaults()) - - dataframe = primitive.produce(inputs=dataset).value - - hyperparams_class = simple_profiler.SimpleProfilerPrimitive.metadata.get_hyperparams() - - primitive = simple_profiler.SimpleProfilerPrimitive(hyperparams=hyperparams_class.defaults()) - - primitive.set_training_data(inputs=dataframe) - primitive.fit() - - primitive_pickled = pickle.dumps(primitive) - primitive = pickle.loads(primitive_pickled) - - dataframe = primitive.produce(inputs=dataframe).value - - self._test_metadata(original_metadata, dataframe.metadata, set_target_as_categorical) - - def test_small_test(self): - for set_target_as_categorical in [False, True]: - dataset, original_metadata = self._get_iris(set_target_as_categorical) - - hyperparams_class = train_score_split.TrainScoreDatasetSplitPrimitive.metadata.get_hyperparams() - - primitive = train_score_split.TrainScoreDatasetSplitPrimitive(hyperparams=hyperparams_class.defaults().replace({ - 'train_score_ratio': 0.9, - 'shuffle': True, - })) - - primitive.set_training_data(dataset=dataset) - primitive.fit() - - results = primitive.produce(inputs=container.List([0], generate_metadata=True)).value - - self.assertEqual(len(results), 1) - - train_dataset = results[0] - - self.assertEqual(len(train_dataset['learningData']), 135) - - results = primitive.produce_score_data(inputs=container.List([0], generate_metadata=True)).value - - self.assertEqual(len(results), 1) - - score_dataset = results[0] - - self.assertEqual(len(score_dataset['learningData']), 15) - - hyperparams_class = dataset_to_dataframe.DatasetToDataFramePrimitive.metadata.get_hyperparams() - - primitive = dataset_to_dataframe.DatasetToDataFramePrimitive(hyperparams=hyperparams_class.defaults()) - - train_dataframe = primitive.produce(inputs=train_dataset).value - - score_dataframe = primitive.produce(inputs=score_dataset).value - - hyperparams_class = simple_profiler.SimpleProfilerPrimitive.metadata.get_hyperparams() - - primitive = simple_profiler.SimpleProfilerPrimitive(hyperparams=hyperparams_class.defaults()) - - primitive.set_training_data(inputs=train_dataframe) - primitive.fit() - dataframe = primitive.produce(inputs=score_dataframe).value - - self._test_metadata(original_metadata, dataframe.metadata, set_target_as_categorical) - - def _get_column_semantic_types(self, dataframe): - number_of_columns = dataframe.metadata.query((metadata_base.ALL_ELEMENTS,))['dimension']['length'] - generated_semantic_types = [ - dataframe.metadata.query((metadata_base.ALL_ELEMENTS, i))['semantic_types'] - for i in range(number_of_columns) - ] - generated_semantic_types = [sorted(x) for x in generated_semantic_types] - - return generated_semantic_types - - def test_iris_csv(self): - dataset_doc_path = os.path.abspath( - os.path.join(os.path.dirname(__file__), 'data', 'datasets', 'iris_dataset_1', 'tables', 'learningData.csv') - ) - dataset = container.Dataset.load('file://{dataset_doc_path}'.format(dataset_doc_path=dataset_doc_path)) - - # Use profiler to assign semantic types - dataframe = self._profile_dataset(dataset=dataset) - - generated_semantic_types = self._get_column_semantic_types(dataframe) - - semantic_types = [ - [ - 'http://schema.org/Integer', - 'https://metadata.datadrivendiscovery.org/types/PrimaryKey', - ], - ['http://schema.org/Float', 'https://metadata.datadrivendiscovery.org/types/Attribute'], - ['http://schema.org/Float', 'https://metadata.datadrivendiscovery.org/types/Attribute'], - ['http://schema.org/Float', 'https://metadata.datadrivendiscovery.org/types/Attribute'], - ['http://schema.org/Float', 'https://metadata.datadrivendiscovery.org/types/Attribute'], - [ - 'https://metadata.datadrivendiscovery.org/types/Attribute', - 'https://metadata.datadrivendiscovery.org/types/CategoricalData', - ], - ] - - self.assertEqual(generated_semantic_types, semantic_types) - - def _profile_dataset(self, dataset, hyperparams=None): - if hyperparams is None: - hyperparams = {} - - hyperparams_class = dataset_to_dataframe.DatasetToDataFramePrimitive.metadata.get_hyperparams() - primitive = dataset_to_dataframe.DatasetToDataFramePrimitive(hyperparams=hyperparams_class.defaults()) - dataframe = primitive.produce(inputs=dataset).value - - hyperparams_class = simple_profiler.SimpleProfilerPrimitive.metadata.get_hyperparams() - primitive = simple_profiler.SimpleProfilerPrimitive(hyperparams=hyperparams_class.defaults().replace(hyperparams)) - primitive.set_training_data(inputs=dataframe) - primitive.fit() - - return primitive.produce(inputs=dataframe).value - - def test_boston(self): - dataset = container.dataset.Dataset.load('sklearn://boston') - - # Use profiler to assign semantic types - dataframe = self._profile_dataset(dataset=dataset) - - generated_semantic_types = self._get_column_semantic_types(dataframe) - - semantic_types = [ - ['http://schema.org/Integer', 'https://metadata.datadrivendiscovery.org/types/PrimaryKey'], - ['http://schema.org/Float', 'https://metadata.datadrivendiscovery.org/types/Attribute'], - ['http://schema.org/Float', 'https://metadata.datadrivendiscovery.org/types/Attribute'], - ['http://schema.org/Float', 'https://metadata.datadrivendiscovery.org/types/Attribute'], - ['http://schema.org/Boolean', 'https://metadata.datadrivendiscovery.org/types/Attribute'], - ['http://schema.org/Float', 'https://metadata.datadrivendiscovery.org/types/Attribute'], - ['http://schema.org/Float', 'https://metadata.datadrivendiscovery.org/types/Attribute'], - ['http://schema.org/Float', 'https://metadata.datadrivendiscovery.org/types/Attribute'], - ['http://schema.org/Float', 'https://metadata.datadrivendiscovery.org/types/Attribute'], - [ - 'https://metadata.datadrivendiscovery.org/types/Attribute', - 'https://metadata.datadrivendiscovery.org/types/CategoricalData', - ], - ['http://schema.org/Integer', 'https://metadata.datadrivendiscovery.org/types/Attribute'], - ['http://schema.org/Float', 'https://metadata.datadrivendiscovery.org/types/Attribute'], - ['http://schema.org/Float', 'https://metadata.datadrivendiscovery.org/types/Attribute'], - ['http://schema.org/Float', 'https://metadata.datadrivendiscovery.org/types/Attribute'], - [ - 'http://schema.org/Float', - 'https://metadata.datadrivendiscovery.org/types/SuggestedTarget', - 'https://metadata.datadrivendiscovery.org/types/Target', - 'https://metadata.datadrivendiscovery.org/types/TrueTarget', - ], - ] - - self.assertEqual(generated_semantic_types, semantic_types) - - def test_diabetes(self): - dataset = container.dataset.Dataset.load('sklearn://diabetes') - - # Use profiler to assign semantic types - dataframe = self._profile_dataset(dataset=dataset) - - generated_semantic_types = self._get_column_semantic_types(dataframe) - - semantic_types = [ - ['http://schema.org/Integer', 'https://metadata.datadrivendiscovery.org/types/PrimaryKey'], - ['http://schema.org/Float', 'https://metadata.datadrivendiscovery.org/types/Attribute'], - ['http://schema.org/Float', 'https://metadata.datadrivendiscovery.org/types/Attribute'], - ['http://schema.org/Float', 'https://metadata.datadrivendiscovery.org/types/Attribute'], - ['http://schema.org/Float', 'https://metadata.datadrivendiscovery.org/types/Attribute'], - ['http://schema.org/Float', 'https://metadata.datadrivendiscovery.org/types/Attribute'], - ['http://schema.org/Float', 'https://metadata.datadrivendiscovery.org/types/Attribute'], - ['http://schema.org/Float', 'https://metadata.datadrivendiscovery.org/types/Attribute'], - ['http://schema.org/Float', 'https://metadata.datadrivendiscovery.org/types/Attribute'], - ['http://schema.org/Float', 'https://metadata.datadrivendiscovery.org/types/Attribute'], - ['http://schema.org/Float', 'https://metadata.datadrivendiscovery.org/types/Attribute'], - [ - 'http://schema.org/Integer', - 'https://metadata.datadrivendiscovery.org/types/SuggestedTarget', - 'https://metadata.datadrivendiscovery.org/types/Target', - 'https://metadata.datadrivendiscovery.org/types/TrueTarget', - ], - ] - - self.assertEqual(generated_semantic_types, semantic_types) - - def test_digits(self): - self.maxDiff = None - - dataset = container.dataset.Dataset.load('sklearn://digits') - - detect_semantic_types = list(simple_profiler.SimpleProfilerPrimitive.metadata.get_hyperparams().configuration['detect_semantic_types'].get_default()) - # Some pixels have very little different values. - detect_semantic_types.remove('http://schema.org/Boolean') - # There are just 16 colors, but we want to see them as integers. - detect_semantic_types.remove('https://metadata.datadrivendiscovery.org/types/CategoricalData') - - # Use profiler to assign semantic types - dataframe = self._profile_dataset(dataset=dataset, hyperparams={ - 'detect_semantic_types': detect_semantic_types, - }) - - generated_semantic_types = self._get_column_semantic_types(dataframe) - - semantic_types = ( - [['http://schema.org/Integer', 'https://metadata.datadrivendiscovery.org/types/PrimaryKey']] - + 64 - * [ - [ - 'http://schema.org/Integer', - 'https://metadata.datadrivendiscovery.org/types/Attribute', - ] - ] - + [ - [ - 'https://metadata.datadrivendiscovery.org/types/CategoricalData', - 'https://metadata.datadrivendiscovery.org/types/SuggestedTarget', - 'https://metadata.datadrivendiscovery.org/types/Target', - 'https://metadata.datadrivendiscovery.org/types/TrueTarget', - ] - ] - ) - - self.assertEqual(generated_semantic_types, semantic_types) - - def test_iris(self): - dataset = container.dataset.Dataset.load('sklearn://iris') - - # Use profiler to assign semantic types - dataframe = self._profile_dataset(dataset=dataset) - - generated_semantic_types = self._get_column_semantic_types(dataframe) - - semantic_types = [ - ['http://schema.org/Integer', 'https://metadata.datadrivendiscovery.org/types/PrimaryKey'], - [ - 'http://schema.org/Float', - 'https://metadata.datadrivendiscovery.org/types/Attribute', - ], - [ - 'http://schema.org/Float', - 'https://metadata.datadrivendiscovery.org/types/Attribute', - ], - [ - 'http://schema.org/Float', - 'https://metadata.datadrivendiscovery.org/types/Attribute', - ], - [ - 'http://schema.org/Float', - 'https://metadata.datadrivendiscovery.org/types/Attribute', - ], - [ - 'https://metadata.datadrivendiscovery.org/types/CategoricalData', - 'https://metadata.datadrivendiscovery.org/types/SuggestedTarget', - 'https://metadata.datadrivendiscovery.org/types/Target', - 'https://metadata.datadrivendiscovery.org/types/TrueTarget', - ], - ] - - self.assertEqual(generated_semantic_types, semantic_types) - - def test_breast_cancer(self): - dataset = container.dataset.Dataset.load('sklearn://breast_cancer') - - # Use profiler to assign semantic types - dataframe = self._profile_dataset(dataset=dataset) - - generated_semantic_types = self._get_column_semantic_types(dataframe) - - semantic_types = ( - [['http://schema.org/Integer', 'https://metadata.datadrivendiscovery.org/types/PrimaryKey']] - + 30 - * [ - [ - 'http://schema.org/Float', - 'https://metadata.datadrivendiscovery.org/types/Attribute', - ] - ] - + [ - [ - 'http://schema.org/Boolean', - 'https://metadata.datadrivendiscovery.org/types/SuggestedTarget', - 'https://metadata.datadrivendiscovery.org/types/Target', - 'https://metadata.datadrivendiscovery.org/types/TrueTarget', - ] - ] - ) - - self.assertEqual(generated_semantic_types, semantic_types) - - def test_linnerud(self): - dataset = container.dataset.Dataset.load('sklearn://linnerud') - - # Use profiler to assign semantic types - dataframe = self._profile_dataset(dataset=dataset) - - generated_semantic_types = self._get_column_semantic_types(dataframe) - - semantic_types = [ - ['http://schema.org/Integer', 'https://metadata.datadrivendiscovery.org/types/PrimaryKey'], - ['http://schema.org/Integer', 'https://metadata.datadrivendiscovery.org/types/Attribute'], - ['http://schema.org/Integer', 'https://metadata.datadrivendiscovery.org/types/Attribute'], - ['http://schema.org/Integer', 'https://metadata.datadrivendiscovery.org/types/Attribute'], - [ - 'http://schema.org/Integer', - 'https://metadata.datadrivendiscovery.org/types/SuggestedTarget', - # Only the first "SuggestedTarget" column is made into a target. - 'https://metadata.datadrivendiscovery.org/types/Target', - 'https://metadata.datadrivendiscovery.org/types/TrueTarget', - ], - [ - 'http://schema.org/Integer', - 'https://metadata.datadrivendiscovery.org/types/Attribute', - 'https://metadata.datadrivendiscovery.org/types/SuggestedTarget', - ], - [ - 'http://schema.org/Integer', - 'https://metadata.datadrivendiscovery.org/types/Attribute', - 'https://metadata.datadrivendiscovery.org/types/SuggestedTarget', - ], - ] - - self.assertEqual(generated_semantic_types, semantic_types) - - def test_wine(self): - dataset = container.dataset.Dataset.load('sklearn://wine') - - # Use profiler to assign semantic types - dataframe = self._profile_dataset(dataset=dataset) - - generated_semantic_types = self._get_column_semantic_types(dataframe) - - semantic_types = [ - ['http://schema.org/Integer', 'https://metadata.datadrivendiscovery.org/types/PrimaryKey'], - [ - 'http://schema.org/Float', - 'https://metadata.datadrivendiscovery.org/types/Attribute', - ], - [ - 'http://schema.org/Float', - 'https://metadata.datadrivendiscovery.org/types/Attribute', - ], - [ - 'http://schema.org/Float', - 'https://metadata.datadrivendiscovery.org/types/Attribute', - ], - [ - 'http://schema.org/Float', - 'https://metadata.datadrivendiscovery.org/types/Attribute', - ], - [ - 'http://schema.org/Integer', - 'https://metadata.datadrivendiscovery.org/types/Attribute', - ], - [ - 'http://schema.org/Float', - 'https://metadata.datadrivendiscovery.org/types/Attribute', - ], - [ - 'http://schema.org/Float', - 'https://metadata.datadrivendiscovery.org/types/Attribute', - ], - [ - 'http://schema.org/Float', - 'https://metadata.datadrivendiscovery.org/types/Attribute', - ], - [ - 'http://schema.org/Float', - 'https://metadata.datadrivendiscovery.org/types/Attribute', - ], - [ - 'http://schema.org/Float', - 'https://metadata.datadrivendiscovery.org/types/Attribute', - ], - [ - 'http://schema.org/Float', - 'https://metadata.datadrivendiscovery.org/types/Attribute', - ], - [ - 'http://schema.org/Float', - 'https://metadata.datadrivendiscovery.org/types/Attribute', - ], - [ - 'http://schema.org/Integer', - 'https://metadata.datadrivendiscovery.org/types/Attribute', - ], - [ - 'https://metadata.datadrivendiscovery.org/types/CategoricalData', - 'https://metadata.datadrivendiscovery.org/types/SuggestedTarget', - 'https://metadata.datadrivendiscovery.org/types/Target', - 'https://metadata.datadrivendiscovery.org/types/TrueTarget', - ], - ] - - self.assertEqual(generated_semantic_types, semantic_types) - - -if __name__ == '__main__': - unittest.main() diff --git a/common-primitives/tests/test_stack_ndarray_column.py b/common-primitives/tests/test_stack_ndarray_column.py deleted file mode 100644 index d6b3b1d..0000000 --- a/common-primitives/tests/test_stack_ndarray_column.py +++ /dev/null @@ -1,77 +0,0 @@ -import unittest - -from d3m import container, utils -from d3m.metadata import base as metadata_base - -from common_primitives import stack_ndarray_column - - -class StackNDArrayColumnPrimitiveTestCase(unittest.TestCase): - def _get_data(self): - data = container.DataFrame({ - 'a': [1, 2, 3], - 'b': [container.ndarray([2, 3, 4]), container.ndarray([5, 6, 7]), container.ndarray([8, 9, 10])] - }, { - 'top_level': 'foobar1', - }, generate_metadata=True) - - data.metadata = data.metadata.update_column(1, { - 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/Attribute'], - }) - - return data - - def test_basic(self): - data = self._get_data() - - data_metadata_before = data.metadata.to_internal_json_structure() - - stack_hyperparams_class = stack_ndarray_column.StackNDArrayColumnPrimitive.metadata.get_hyperparams() - stack_primitive = stack_ndarray_column.StackNDArrayColumnPrimitive(hyperparams=stack_hyperparams_class.defaults()) - stack_array = stack_primitive.produce(inputs=data).value - - self.assertEqual(stack_array.shape, (3, 3)) - - self._test_metadata(stack_array.metadata) - - self.assertEqual(data.metadata.to_internal_json_structure(), data_metadata_before) - - def _test_metadata(self, metadata): - self.maxDiff = None - - self.assertEqual(utils.to_json_structure(metadata.to_internal_simple_structure()), [{ - 'selector': [], - 'metadata': { - 'top_level': 'foobar1', - 'schema': metadata_base.CONTAINER_SCHEMA_VERSION, - 'structural_type': 'd3m.container.numpy.ndarray', - 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/Table'], - 'dimension': { - 'name': 'rows', - 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/TabularRow'], - 'length': 3, - }, - }, - }, { - 'selector': ['__ALL_ELEMENTS__'], - 'metadata': { - 'dimension': { - 'length': 3, - 'name': 'columns', - 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/TabularColumn'], - }, - # It is unclear if name and semantic types should be moved to rows, but this is what currently happens. - 'name': 'b', - 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/Attribute'], - 'structural_type': '__NO_VALUE__', - }, - }, { - 'selector': ['__ALL_ELEMENTS__', '__ALL_ELEMENTS__'], - 'metadata': { - 'structural_type': 'numpy.int64', - }, - }]) - - -if __name__ == '__main__': - unittest.main() diff --git a/common-primitives/tests/test_tabular_extractor.py b/common-primitives/tests/test_tabular_extractor.py deleted file mode 100644 index 29b2905..0000000 --- a/common-primitives/tests/test_tabular_extractor.py +++ /dev/null @@ -1,173 +0,0 @@ -import os -import unittest - -from d3m import container, utils -from d3m.metadata import base as metadata_base - -from common_primitives import dataset_to_dataframe, column_parser, tabular_extractor - -import utils as test_utils - - -class TabularExtractorPrimitiveTestCase(unittest.TestCase): - def setUp(self): - dataset_doc_path = os.path.abspath(os.path.join(os.path.dirname(__file__), 'data', 'datasets', 'iris_dataset_1', 'datasetDoc.json')) - - dataset = container.Dataset.load('file://{dataset_doc_path}'.format(dataset_doc_path=dataset_doc_path)) - - # We mark targets as attributes. - dataset.metadata = dataset.metadata.remove_semantic_type(('learningData', metadata_base.ALL_ELEMENTS, 5), 'https://metadata.datadrivendiscovery.org/types/Target') - dataset.metadata = dataset.metadata.remove_semantic_type(('learningData', metadata_base.ALL_ELEMENTS, 5), 'https://metadata.datadrivendiscovery.org/types/TrueTarget') - dataset.metadata = dataset.metadata.add_semantic_type(('learningData', metadata_base.ALL_ELEMENTS, 5), 'https://metadata.datadrivendiscovery.org/types/Attribute') - - self.dataset = dataset - - # DatasetToDataFramePrimitive - - df_hyperparams_class = dataset_to_dataframe.DatasetToDataFramePrimitive.metadata.get_hyperparams() - - df_primitive = dataset_to_dataframe.DatasetToDataFramePrimitive(hyperparams=df_hyperparams_class.defaults()) - - df_dataframe = df_primitive.produce(inputs=self.dataset).value - - # Set some missing values. - df_dataframe.iloc[1, 1] = "" - df_dataframe.iloc[10, 1] = "" - df_dataframe.iloc[15, 1] = "" - - # ColumnParserPrimitive - - cp_hyperparams_class = column_parser.ColumnParserPrimitive.metadata.get_hyperparams() - - # To simulate how Pandas "read_csv" is reading CSV files, we parse just numbers. - cp_primitive = column_parser.ColumnParserPrimitive( - hyperparams=cp_hyperparams_class.defaults().replace({ - 'parse_semantic_types': ['http://schema.org/Integer', 'http://schema.org/Float'], - }), - ) - - self.dataframe = cp_primitive.produce(inputs=df_dataframe).value - - def test_defaults(self): - te_hyperparams_class = tabular_extractor.AnnotatedTabularExtractorPrimitive.metadata.get_hyperparams() - - # It one-hot encodes categorical columns, it imputes numerical values, - # and adds missing indicator column for each. - te_primitive = tabular_extractor.AnnotatedTabularExtractorPrimitive( - hyperparams=te_hyperparams_class.defaults(), - ) - - te_primitive.set_training_data(inputs=self.dataframe) - te_primitive.fit() - - dataframe = te_primitive.produce(inputs=self.dataframe).value - - # 1 index column, 4 numerical columns with one indicator column each, - # 3 columns for one-hot encoding of "target" column and indicator column for that. - self.assertEqual(dataframe.shape, (150, 13)) - - self.assertEqual(test_utils.convert_through_json(utils.to_json_structure(dataframe.metadata.to_internal_simple_structure())), [{ - 'selector': [], - 'metadata': { - 'dimension': { - 'length': 150, - 'name': 'rows', - 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/TabularRow'], - }, - 'schema': 'https://metadata.datadrivendiscovery.org/schemas/v0/container.json', - 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/Table'], - 'structural_type': 'd3m.container.pandas.DataFrame', - }, - }, { - 'selector': ['__ALL_ELEMENTS__'], - 'metadata': { - 'dimension': { - 'length': 13, - 'name': 'columns', - 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/TabularColumn'], - }, - }, - }, { - 'selector': ['__ALL_ELEMENTS__', 0], - 'metadata': { - 'name': 'd3mIndex', - 'semantic_types': ['http://schema.org/Integer', 'https://metadata.datadrivendiscovery.org/types/PrimaryKey'], - 'structural_type': 'int', - }, - }, { - 'selector': ['__ALL_ELEMENTS__', 1], - 'metadata': { - 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/Attribute'], - 'structural_type': 'numpy.float64', - }, - }, { - 'selector': ['__ALL_ELEMENTS__', 2], - 'metadata': { - 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/Attribute'], - 'structural_type': 'numpy.float64', - }, - }, { - 'selector': ['__ALL_ELEMENTS__', 3], - 'metadata': { - 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/Attribute'], - 'structural_type': 'numpy.float64', - }, - }, { - 'selector': ['__ALL_ELEMENTS__', 4], - 'metadata': { - 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/Attribute'], - 'structural_type': 'numpy.float64', - }, - }, { - 'selector': ['__ALL_ELEMENTS__', 5], - 'metadata': { - 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/Attribute'], - 'structural_type': 'numpy.float64', - }, - }, { - 'selector': ['__ALL_ELEMENTS__', 6], - 'metadata': { - 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/Attribute'], - 'structural_type': 'numpy.float64', - }, - }, { - 'selector': ['__ALL_ELEMENTS__', 7], - 'metadata': { - 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/Attribute'], - 'structural_type': 'numpy.float64', - }, - }, { - 'selector': ['__ALL_ELEMENTS__', 8], - 'metadata': { - 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/Attribute'], - 'structural_type': 'numpy.float64', - }, - }, { - 'selector': ['__ALL_ELEMENTS__', 9], - 'metadata': { - 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/Attribute'], - 'structural_type': 'numpy.float64', - }, - }, { - 'selector': ['__ALL_ELEMENTS__', 10], - 'metadata': { - 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/Attribute'], - 'structural_type': 'numpy.float64', - }, - }, { - 'selector': ['__ALL_ELEMENTS__', 11], - 'metadata': { - 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/Attribute'], - 'structural_type': 'numpy.float64', - }, - }, { - 'selector': ['__ALL_ELEMENTS__', 12], - 'metadata': { - 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/Attribute'], - 'structural_type': 'numpy.float64', - }, - }]) - - -if __name__ == '__main__': - unittest.main() diff --git a/common-primitives/tests/test_term_filter.py b/common-primitives/tests/test_term_filter.py deleted file mode 100644 index 5131238..0000000 --- a/common-primitives/tests/test_term_filter.py +++ /dev/null @@ -1,136 +0,0 @@ -import unittest -import os - -from common_primitives import term_filter -from d3m import container - -import utils as test_utils - - -class TermFilterPrimitiveTestCase(unittest.TestCase): - def test_inclusive(self): - dataset_doc_path = os.path.abspath(os.path.join(os.path.dirname(__file__), 'data', 'datasets', 'database_dataset_1', 'datasetDoc.json')) - dataset = container.Dataset.load('file://{dataset_doc_path}'.format(dataset_doc_path=dataset_doc_path)) - resource = test_utils.get_dataframe(dataset) - - filter_hyperparams_class = term_filter.TermFilterPrimitive.metadata.get_hyperparams() - hp = filter_hyperparams_class({ - 'column': 1, - 'inclusive': True, - 'terms': ['AAA', 'CCC'], - 'match_whole': True - }) - - filter_primitive = term_filter.TermFilterPrimitive(hyperparams=hp) - new_df = filter_primitive.produce(inputs=resource).value - - self.assertTrue(set(new_df['code'].unique()) == set(['AAA', 'CCC'])) - - def test_exclusive(self): - dataset_doc_path = os.path.abspath(os.path.join(os.path.dirname(__file__), 'data', 'datasets', 'database_dataset_1', 'datasetDoc.json')) - dataset = container.Dataset.load('file://{dataset_doc_path}'.format(dataset_doc_path=dataset_doc_path)) - resource = test_utils.get_dataframe(dataset) - - filter_hyperparams_class = term_filter.TermFilterPrimitive.metadata.get_hyperparams() - hp = filter_hyperparams_class({ - 'column': 1, - 'inclusive': False, - 'terms': ['AAA', 'CCC'], - 'match_whole': True - }) - - filter_primitive = term_filter.TermFilterPrimitive(hyperparams=hp) - new_df = filter_primitive.produce(inputs=resource).value - - self.assertTrue(set(new_df['code'].unique()) == set(['BBB'])) - - def test_numeric(self): - dataset_doc_path = os.path.abspath(os.path.join(os.path.dirname(__file__), 'data', 'datasets', 'database_dataset_1', 'datasetDoc.json')) - dataset = container.Dataset.load('file://{dataset_doc_path}'.format(dataset_doc_path=dataset_doc_path)) - resource = test_utils.get_dataframe(dataset) - - # set dataframe type to int to match output of a prior parse columns step - resource.iloc[:,3] = resource.iloc[:,3].astype(int) - - filter_hyperparams_class = term_filter.TermFilterPrimitive.metadata.get_hyperparams() - hp = filter_hyperparams_class({ - 'column': 3, - 'inclusive': False, - 'terms': ['1990'], - 'match_whole': True - }) - - filter_primitive = term_filter.TermFilterPrimitive(hyperparams=hp) - new_df = filter_primitive.produce(inputs=resource).value - - matches = new_df[~new_df['year'].astype(str).str.match('1990')] - self.assertTrue(set(matches['year'].unique()) == set([2000, 2010])) - - def test_partial_no_match(self): - dataset_doc_path = os.path.abspath(os.path.join(os.path.dirname(__file__), 'data', 'datasets', 'database_dataset_1', 'datasetDoc.json')) - dataset = container.Dataset.load('file://{dataset_doc_path}'.format(dataset_doc_path=dataset_doc_path)) - resource = test_utils.get_dataframe(dataset) - - filter_hyperparams_class = term_filter.TermFilterPrimitive.metadata.get_hyperparams() - hp = filter_hyperparams_class({ - 'column': 1, - 'inclusive': True, - 'terms': ['AA', 'CC'], - 'match_whole': False - }) - - filter_primitive = term_filter.TermFilterPrimitive(hyperparams=hp) - new_df = filter_primitive.produce(inputs=resource).value - - self.assertTrue(set(new_df['code'].unique()) == set(['AAA', 'CCC'])) - - def test_escaped_regex(self): - dataset_doc_path = os.path.abspath(os.path.join(os.path.dirname(__file__), 'data', 'datasets', 'database_dataset_1', 'datasetDoc.json')) - dataset = container.Dataset.load('file://{dataset_doc_path}'.format(dataset_doc_path=dataset_doc_path)) - resource = test_utils.get_dataframe(dataset) - - filter_hyperparams_class = term_filter.TermFilterPrimitive.metadata.get_hyperparams() - hp = filter_hyperparams_class({ - 'column': 4, - 'inclusive': True, - 'terms': ['40.2'], - 'match_whole': False - }) - - filter_primitive = term_filter.TermFilterPrimitive(hyperparams=hp) - new_df = filter_primitive.produce(inputs=resource).value - - self.assertListEqual(list(new_df['value']), ['40.2346487255306']) - - def test_row_metadata_removal(self): - dataset_doc_path = os.path.abspath(os.path.join(os.path.dirname(__file__), 'data', 'datasets', 'database_dataset_1', 'datasetDoc.json')) - dataset = container.Dataset.load('file://{dataset_doc_path}'.format(dataset_doc_path=dataset_doc_path)) - - # add metadata for rows 0 and 1 - dataset.metadata = dataset.metadata.update(('learningData', 1), {'a': 0}) - dataset.metadata = dataset.metadata.update(('learningData', 2), {'b': 1}) - - resource = test_utils.get_dataframe(dataset) - - filter_hyperparams_class = term_filter.TermFilterPrimitive.metadata.get_hyperparams() - hp = filter_hyperparams_class({ - 'column': 1, - 'inclusive': False, - 'terms': ['AAA'], - 'match_whole': True - }) - - filter_primitive = term_filter.TermFilterPrimitive(hyperparams=hp) - new_df = filter_primitive.produce(inputs=resource).value - - # verify that the lenght is correct - self.assertEqual(len(new_df), new_df.metadata.query(())['dimension']['length']) - - # verify that the rows were re-indexed in the metadata - self.assertEquals(new_df.metadata.query((0,))['a'], 0) - self.assertEquals(new_df.metadata.query((1,))['b'], 1) - self.assertFalse('b' in new_df.metadata.query((2,))) - - -if __name__ == '__main__': - unittest.main() diff --git a/common-primitives/tests/test_text_reader.py b/common-primitives/tests/test_text_reader.py deleted file mode 100644 index 00335be..0000000 --- a/common-primitives/tests/test_text_reader.py +++ /dev/null @@ -1,30 +0,0 @@ -import unittest -import os - -from d3m import container - -from common_primitives import dataset_to_dataframe, text_reader - - -class TextReaderPrimitiveTestCase(unittest.TestCase): - def test_basic(self): - dataset_doc_path = os.path.abspath(os.path.join(os.path.dirname(__file__), 'data', 'datasets', 'text_dataset_1', 'datasetDoc.json')) - - dataset = container.Dataset.load('file://{dataset_doc_path}'.format(dataset_doc_path=dataset_doc_path)) - - dataframe_hyperparams_class = dataset_to_dataframe.DatasetToDataFramePrimitive.metadata.get_hyperparams() - dataframe_primitive = dataset_to_dataframe.DatasetToDataFramePrimitive(hyperparams=dataframe_hyperparams_class.defaults().replace({'dataframe_resource': '0'})) - dataframe = dataframe_primitive.produce(inputs=dataset).value - - text_hyperparams_class = text_reader.TextReaderPrimitive.metadata.get_hyperparams() - text_primitive = text_reader.TextReaderPrimitive(hyperparams=text_hyperparams_class.defaults().replace({'return_result': 'replace'})) - tables = text_primitive.produce(inputs=dataframe).value - - self.assertEqual(tables.shape, (4, 1)) - - self.assertEqual(tables.metadata.query_column(0)['structural_type'], str) - self.assertEqual(tables.metadata.query_column(0)['semantic_types'], ('https://metadata.datadrivendiscovery.org/types/PrimaryKey', 'http://schema.org/Text')) - - -if __name__ == '__main__': - unittest.main() diff --git a/common-primitives/tests/test_train_score_split.py b/common-primitives/tests/test_train_score_split.py deleted file mode 100644 index 317367a..0000000 --- a/common-primitives/tests/test_train_score_split.py +++ /dev/null @@ -1,88 +0,0 @@ -import os -import pickle -import unittest - -from d3m import container -from d3m.metadata import base as metadata_base - -from common_primitives import train_score_split - - -class TrainScoreDatasetSplitPrimitiveTestCase(unittest.TestCase): - def test_produce_train(self): - dataset_doc_path = os.path.abspath(os.path.join(os.path.dirname(__file__), '..', 'tests', 'data', 'datasets', 'iris_dataset_1', 'datasetDoc.json')) - - dataset = container.Dataset.load('file://{dataset_doc_path}'.format(dataset_doc_path=dataset_doc_path)) - - # We set semantic types like runtime would. - dataset.metadata = dataset.metadata.add_semantic_type(('learningData', metadata_base.ALL_ELEMENTS, 5), 'https://metadata.datadrivendiscovery.org/types/Target') - dataset.metadata = dataset.metadata.add_semantic_type(('learningData', metadata_base.ALL_ELEMENTS, 5), 'https://metadata.datadrivendiscovery.org/types/TrueTarget') - dataset.metadata = dataset.metadata.remove_semantic_type(('learningData', metadata_base.ALL_ELEMENTS, 5), 'https://metadata.datadrivendiscovery.org/types/Attribute') - - hyperparams_class = train_score_split.TrainScoreDatasetSplitPrimitive.metadata.get_hyperparams() - - primitive = train_score_split.TrainScoreDatasetSplitPrimitive(hyperparams=hyperparams_class.defaults().replace({ - 'shuffle': True, - })) - - primitive.set_training_data(dataset=dataset) - primitive.fit() - - # To test that pickling works. - pickle.dumps(primitive) - - results = primitive.produce(inputs=container.List([0], generate_metadata=True)).value - - self.assertEqual(len(results), 1) - - for dataset in results: - self.assertEqual(len(dataset), 1) - - self.assertEqual(results[0]['learningData'].shape[0], 112) - self.assertEqual(list(results[0]['learningData'].iloc[:, 0]), [ - '0', '1', '2', '3', '4', '5', '6', '9', '10', '11', '12', '13', '14', '15', '17', '19', '20', - '21', '23', '25', '28', '29', '30', '31', '32', '34', '35', '36', '38', '39', '41', '42', '43', - '46', '47', '48', '49', '50', '52', '53', '55', '56', '57', '58', '60', '61', '64', '65', '67', - '68', '69', '70', '72', '74', '75', '77', '79', '80', '81', '82', '85', '87', '88', '89', '91', - '92', '94', '95', '96', '98', '99', '101', '102', '103', '104', '105', '106', '108', '109', '110', - '111', '112', '113', '115', '116', '117', '118', '119', '120', '122', '123', '124', '125', '128', - '129', '130', '131', '133', '135', '136', '138', '139', '140', '141', '142', '143', '144', '145', - '146', '147', '148', '149', - ]) - - def test_produce_score(self): - dataset_doc_path = os.path.abspath(os.path.join(os.path.dirname(__file__), '..', 'tests', 'data', 'datasets', 'iris_dataset_1', 'datasetDoc.json')) - - dataset = container.Dataset.load('file://{dataset_doc_path}'.format(dataset_doc_path=dataset_doc_path)) - - # We set semantic types like runtime would. - dataset.metadata = dataset.metadata.add_semantic_type(('learningData', metadata_base.ALL_ELEMENTS, 5), 'https://metadata.datadrivendiscovery.org/types/Target') - dataset.metadata = dataset.metadata.add_semantic_type(('learningData', metadata_base.ALL_ELEMENTS, 5), 'https://metadata.datadrivendiscovery.org/types/TrueTarget') - dataset.metadata = dataset.metadata.remove_semantic_type(('learningData', metadata_base.ALL_ELEMENTS, 5), 'https://metadata.datadrivendiscovery.org/types/Attribute') - - hyperparams_class = train_score_split.TrainScoreDatasetSplitPrimitive.metadata.get_hyperparams() - - primitive = train_score_split.TrainScoreDatasetSplitPrimitive(hyperparams=hyperparams_class.defaults().replace({ - 'shuffle': True, - })) - - primitive.set_training_data(dataset=dataset) - primitive.fit() - - results = primitive.produce_score_data(inputs=container.List([0], generate_metadata=True)).value - - self.assertEqual(len(results), 1) - - for dataset in results: - self.assertEqual(len(dataset), 1) - - self.assertEqual(results[0]['learningData'].shape[0], 38) - self.assertEqual(list(results[0]['learningData'].iloc[:, 0]), [ - '7', '8', '16', '18', '22', '24', '26', '27', '33', '37', '40', '44', '45', '51', '54', - '59', '62', '63', '66', '71', '73', '76', '78', '83', '84', '86', '90', '93', '97', '100', - '107', '114', '121', '126', '127', '132', '134', '137', - ]) - - -if __name__ == '__main__': - unittest.main() diff --git a/common-primitives/tests/test_unseen_label_decoder.py b/common-primitives/tests/test_unseen_label_decoder.py deleted file mode 100644 index 108a5c6..0000000 --- a/common-primitives/tests/test_unseen_label_decoder.py +++ /dev/null @@ -1,51 +0,0 @@ -import unittest - -from d3m import container - -from common_primitives import unseen_label_encoder, unseen_label_decoder - - -class UnseenLabelEncoderTestCase(unittest.TestCase): - def test_basic(self): - encoder_hyperparams_class = unseen_label_encoder.UnseenLabelEncoderPrimitive.metadata.get_hyperparams() - encoder_primitive = unseen_label_encoder.UnseenLabelEncoderPrimitive(hyperparams=encoder_hyperparams_class.defaults()) - - inputs = container.DataFrame({ - 'value': [0.0, 1.0, 2.0, 3.0], - 'number': [0, 1, 2, 3], - 'word': ['one', 'two', 'three', 'four'], - }, generate_metadata=True) - inputs.metadata = inputs.metadata.update_column(2, { - 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/CategoricalData'], - }) - - encoder_primitive.set_training_data(inputs=inputs) - encoder_primitive.fit() - - inputs = container.DataFrame({ - 'value': [1.0, 2.0, 3.0], - 'number': [1, 2, 3], - 'word': ['one', 'two', 'five'], - }, generate_metadata=True) - inputs.metadata = inputs.metadata.update_column(2, { - 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/CategoricalData'], - }) - - outputs = encoder_primitive.produce(inputs=inputs).value - - decoder_hyperparams_class = unseen_label_decoder.UnseenLabelDecoderPrimitive.metadata.get_hyperparams() - decoder_primitive = unseen_label_decoder.UnseenLabelDecoderPrimitive(hyperparams=decoder_hyperparams_class.defaults().replace({'encoder': encoder_primitive})) - - decoded = decoder_primitive.produce(inputs=outputs).value - - self.assertEqual(decoded.values.tolist(), [ - [1, 1.0, 'one'], - [2, 2.0, 'two'], - [3, 3.0, ''], - ]) - - self.assertEqual(decoded.metadata.query_column(2)['structural_type'], str) - - -if __name__ == '__main__': - unittest.main() diff --git a/common-primitives/tests/test_unseen_label_encoder.py b/common-primitives/tests/test_unseen_label_encoder.py deleted file mode 100644 index 5057688..0000000 --- a/common-primitives/tests/test_unseen_label_encoder.py +++ /dev/null @@ -1,46 +0,0 @@ -import unittest - -from d3m import container - -from common_primitives import unseen_label_encoder - - -class UnseenLabelEncoderTestCase(unittest.TestCase): - def test_basic(self): - encoder_hyperparams_class = unseen_label_encoder.UnseenLabelEncoderPrimitive.metadata.get_hyperparams() - encoder_primitive = unseen_label_encoder.UnseenLabelEncoderPrimitive(hyperparams=encoder_hyperparams_class.defaults()) - - inputs = container.DataFrame({ - 'value': [0.0, 1.0, 2.0, 3.0], - 'number': [0, 1, 2, 3], - 'word': ['one', 'two', 'three', 'four'], - }, generate_metadata=True) - inputs.metadata = inputs.metadata.update_column(2, { - 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/CategoricalData'], - }) - - encoder_primitive.set_training_data(inputs=inputs) - encoder_primitive.fit() - - inputs = container.DataFrame({ - 'value': [1.0, 2.0, 3.0], - 'number': [1, 2, 3], - 'word': ['one', 'two', 'five'], - }, generate_metadata=True) - inputs.metadata = inputs.metadata.update_column(2, { - 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/CategoricalData'], - }) - - outputs = encoder_primitive.produce(inputs=inputs).value - - self.assertEqual(outputs.values.tolist(), [ - [1, 1.0, 1], - [2, 2.0, 2], - [3, 3.0, 0], - ]) - - self.assertEqual(outputs.metadata.query_column(2)['structural_type'], int) - - -if __name__ == '__main__': - unittest.main() diff --git a/common-primitives/tests/test_video_reader.py b/common-primitives/tests/test_video_reader.py deleted file mode 100644 index 4ae2f72..0000000 --- a/common-primitives/tests/test_video_reader.py +++ /dev/null @@ -1,35 +0,0 @@ -import unittest -import os - -from d3m import container - -from common_primitives import dataset_to_dataframe, video_reader - - -class VideoReaderPrimitiveTestCase(unittest.TestCase): - def test_basic(self): - dataset_doc_path = os.path.abspath(os.path.join(os.path.dirname(__file__), 'data', 'datasets', 'video_dataset_1', 'datasetDoc.json')) - - dataset = container.Dataset.load('file://{dataset_doc_path}'.format(dataset_doc_path=dataset_doc_path)) - - dataframe_hyperparams_class = dataset_to_dataframe.DatasetToDataFramePrimitive.metadata.get_hyperparams() - dataframe_primitive = dataset_to_dataframe.DatasetToDataFramePrimitive(hyperparams=dataframe_hyperparams_class.defaults().replace({'dataframe_resource': '0'})) - dataframe = dataframe_primitive.produce(inputs=dataset).value - - video_hyperparams_class = video_reader.VideoReaderPrimitive.metadata.get_hyperparams() - video_primitive = video_reader.VideoReaderPrimitive(hyperparams=video_hyperparams_class.defaults().replace({'return_result': 'replace'})) - videos = video_primitive.produce(inputs=dataframe).value - - self.assertEqual(videos.shape, (2, 1)) - self.assertEqual(videos.iloc[0, 0].shape, (408, 240, 320, 3)) - self.assertEqual(videos.iloc[1, 0].shape, (79, 240, 320, 3)) - - self._test_metadata(videos.metadata) - - def _test_metadata(self, metadata): - self.assertEqual(metadata.query_column(0)['structural_type'], container.ndarray) - self.assertEqual(metadata.query_column(0)['semantic_types'], ('https://metadata.datadrivendiscovery.org/types/PrimaryKey', 'http://schema.org/VideoObject')) - - -if __name__ == '__main__': - unittest.main() diff --git a/common-primitives/tests/test_xgboost_dart.py b/common-primitives/tests/test_xgboost_dart.py deleted file mode 100644 index a2928f4..0000000 --- a/common-primitives/tests/test_xgboost_dart.py +++ /dev/null @@ -1,687 +0,0 @@ -import os -import pickle -import unittest - -from d3m import container, utils -from d3m.metadata import base as metadata_base - -from common_primitives import dataset_to_dataframe, extract_columns_semantic_types, xgboost_dart, column_parser - - -class XGBoostDartTestCase(unittest.TestCase): - def _get_iris(self): - dataset_doc_path = os.path.abspath( - os.path.join(os.path.dirname(__file__), 'data', 'datasets', 'iris_dataset_1', 'datasetDoc.json')) - - dataset = container.Dataset.load('file://{dataset_doc_path}'.format(dataset_doc_path=dataset_doc_path)) - - hyperparams_class = \ - dataset_to_dataframe.DatasetToDataFramePrimitive.metadata.query()['primitive_code']['class_type_arguments'][ - 'Hyperparams'] - primitive = dataset_to_dataframe.DatasetToDataFramePrimitive(hyperparams=hyperparams_class.defaults()) - - dataframe = primitive.produce(inputs=dataset).value - - return dataframe - - def _get_iris_columns(self): - dataframe = self._get_iris() - - # We set custom metadata on columns. - for column_index in range(1, 5): - dataframe.metadata = dataframe.metadata.update_column(column_index, {'custom_metadata': 'attributes'}) - for column_index in range(5, 6): - dataframe.metadata = dataframe.metadata.update_column(column_index, {'custom_metadata': 'targets'}) - - # We set semantic types like runtime would. - dataframe.metadata = dataframe.metadata.add_semantic_type((metadata_base.ALL_ELEMENTS, 5), - 'https://metadata.datadrivendiscovery.org/types/Target') - dataframe.metadata = dataframe.metadata.add_semantic_type((metadata_base.ALL_ELEMENTS, 5), - 'https://metadata.datadrivendiscovery.org/types/TrueTarget') - dataframe.metadata = dataframe.metadata.remove_semantic_type((metadata_base.ALL_ELEMENTS, 5), 'https://metadata.datadrivendiscovery.org/types/Attribute') - - # Parsing. - hyperparams_class = \ - column_parser.ColumnParserPrimitive.metadata.query()['primitive_code']['class_type_arguments'][ - 'Hyperparams'] - primitive = column_parser.ColumnParserPrimitive(hyperparams=hyperparams_class.defaults()) - dataframe = primitive.produce(inputs=dataframe).value - - hyperparams_class = \ - extract_columns_semantic_types.ExtractColumnsBySemanticTypesPrimitive.metadata.query()['primitive_code'][ - 'class_type_arguments']['Hyperparams'] - - primitive = extract_columns_semantic_types.ExtractColumnsBySemanticTypesPrimitive( - hyperparams=hyperparams_class.defaults().replace( - {'semantic_types': ('https://metadata.datadrivendiscovery.org/types/Attribute',)})) - attributes = primitive.produce(inputs=dataframe).value - - primitive = extract_columns_semantic_types.ExtractColumnsBySemanticTypesPrimitive( - hyperparams=hyperparams_class.defaults().replace( - {'semantic_types': ('https://metadata.datadrivendiscovery.org/types/SuggestedTarget',)})) - targets = primitive.produce(inputs=dataframe).value - - return dataframe, attributes, targets - - def test_single_target(self): - dataframe, attributes, targets = self._get_iris_columns() - - self.assertEqual(list(targets.columns), ['species']) - hyperparams_class = \ - xgboost_dart.XGBoostDartClassifierPrimitive.metadata.query()['primitive_code']['class_type_arguments'][ - 'Hyperparams'] - primitive = xgboost_dart.XGBoostDartClassifierPrimitive( - hyperparams=hyperparams_class.defaults().replace({'return_result': 'new', 'add_index_columns': False})) - primitive.set_training_data(inputs=attributes, outputs=targets) - primitive.fit() - - predictions = primitive.produce(inputs=attributes).value - self.assertEqual(list(predictions.columns), ['species']) - - self.assertEqual(predictions.shape, (150, 1)) - self.assertEqual(predictions.iloc[0, 0], 'Iris-setosa') - self.assertTrue(predictions.metadata.has_semantic_type((metadata_base.ALL_ELEMENTS, 0), - 'https://metadata.datadrivendiscovery.org/types/PredictedTarget')) - self.assertFalse(predictions.metadata.has_semantic_type((metadata_base.ALL_ELEMENTS, 0), - 'https://metadata.datadrivendiscovery.org/types/TrueTarget')) - self.assertEqual(predictions.metadata.query_column(0)['name'], 'species') - self.assertEqual(predictions.metadata.query_column(0)['custom_metadata'], 'targets') - - self._test_single_target_metadata(predictions.metadata) - - samples = primitive.sample(inputs=attributes).value - self.assertEqual(list(samples[0].columns), ['species']) - - self.assertEqual(len(samples), 1) - self.assertEqual(samples[0].shape, (150, 1)) - self.assertEqual(samples[0].iloc[0, 0], 'Iris-setosa') - self.assertTrue(samples[0].metadata.has_semantic_type((metadata_base.ALL_ELEMENTS, 0), - 'https://metadata.datadrivendiscovery.org/types/PredictedTarget')) - self.assertFalse(samples[0].metadata.has_semantic_type((metadata_base.ALL_ELEMENTS, 0), - 'https://metadata.datadrivendiscovery.org/types/TrueTarget')) - self.assertEqual(samples[0].metadata.query_column(0)['name'], 'species') - self.assertEqual(samples[0].metadata.query_column(0)['custom_metadata'], 'targets') - - log_likelihoods = primitive.log_likelihoods(inputs=attributes, outputs=targets).value - self.assertEqual(list(log_likelihoods.columns), ['species']) - - self.assertEqual(log_likelihoods.shape, (150, 1)) - self.assertEqual(log_likelihoods.metadata.query_column(0)['name'], 'species') - - log_likelihood = primitive.log_likelihood(inputs=attributes, outputs=targets).value - self.assertEqual(list(log_likelihood.columns), ['species']) - - self.assertEqual(log_likelihood.shape, (1, 1)) - self.assertAlmostEqual(log_likelihood.iloc[0, 0], -2.414982318878174) - self.assertEqual(log_likelihoods.metadata.query_column(0)['name'], 'species') - - def test_single_target_continue_fit(self): - dataframe, attributes, targets = self._get_iris_columns() - - hyperparams_class = \ - xgboost_dart.XGBoostDartClassifierPrimitive.metadata.query()['primitive_code']['class_type_arguments'][ - 'Hyperparams'] - primitive = xgboost_dart.XGBoostDartClassifierPrimitive( - hyperparams=hyperparams_class.defaults().replace({'return_result': 'new', 'add_index_columns': False})) - - primitive.set_training_data(inputs=attributes, outputs=targets) - primitive.fit() - # reset the training data to make continue_fit() work. - primitive.set_training_data(inputs=attributes, outputs=targets) - primitive.continue_fit() - params = primitive.get_params() - self.assertEqual(params['booster'].best_ntree_limit, - primitive.hyperparams['n_estimators'] + primitive.hyperparams['n_more_estimators']) - predictions = primitive.produce(inputs=attributes).value - - self.assertEqual(predictions.shape, (150, 1)) - self.assertEqual(predictions.iloc[0, 0], 'Iris-setosa') - self.assertTrue(predictions.metadata.has_semantic_type((metadata_base.ALL_ELEMENTS, 0), - 'https://metadata.datadrivendiscovery.org/types/PredictedTarget')) - self.assertFalse(predictions.metadata.has_semantic_type((metadata_base.ALL_ELEMENTS, 0), - 'https://metadata.datadrivendiscovery.org/types/TrueTarget')) - self.assertEqual(predictions.metadata.query_column(0)['name'], 'species') - self.assertEqual(predictions.metadata.query_column(0)['custom_metadata'], 'targets') - - self._test_single_target_metadata(predictions.metadata) - - samples = primitive.sample(inputs=attributes).value - - self.assertEqual(len(samples), 1) - self.assertEqual(samples[0].shape, (150, 1)) - self.assertEqual(samples[0].iloc[0, 0], 'Iris-setosa') - self.assertTrue(samples[0].metadata.has_semantic_type((metadata_base.ALL_ELEMENTS, 0), - 'https://metadata.datadrivendiscovery.org/types/PredictedTarget')) - self.assertFalse(samples[0].metadata.has_semantic_type((metadata_base.ALL_ELEMENTS, 0), - 'https://metadata.datadrivendiscovery.org/types/TrueTarget')) - self.assertEqual(samples[0].metadata.query_column(0)['name'], 'species') - self.assertEqual(samples[0].metadata.query_column(0)['custom_metadata'], 'targets') - - log_likelihoods = primitive.log_likelihoods(inputs=attributes, outputs=targets).value - - self.assertEqual(log_likelihoods.shape, (150, 1)) - self.assertEqual(log_likelihoods.metadata.query_column(0)['name'], 'species') - - log_likelihood = primitive.log_likelihood(inputs=attributes, outputs=targets).value - - self.assertEqual(log_likelihood.shape, (1, 1)) - self.assertEqual(log_likelihoods.metadata.query_column(0)['name'], 'species') - - def _test_single_target_metadata(self, predictions_metadata): - expected_metadata = [{ - 'selector': [], - 'metadata': { - 'schema': metadata_base.CONTAINER_SCHEMA_VERSION, - 'structural_type': 'd3m.container.pandas.DataFrame', - 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/Table'], - 'dimension': { - 'name': 'rows', - 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/TabularRow'], - 'length': 150, - }, - }, - }, { - 'selector': ['__ALL_ELEMENTS__'], - 'metadata': { - 'dimension': { - 'name': 'columns', - 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/TabularColumn'], - 'length': 1, - }, - }, - }, { - 'selector': ['__ALL_ELEMENTS__', 0], - 'metadata': { - 'structural_type': 'str', - 'name': 'species', - 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/CategoricalData', - 'https://metadata.datadrivendiscovery.org/types/SuggestedTarget', - 'https://metadata.datadrivendiscovery.org/types/Target', - 'https://metadata.datadrivendiscovery.org/types/PredictedTarget'], - 'custom_metadata': 'targets', - }, - }] - - self.assertEqual(utils.to_json_structure(predictions_metadata.to_internal_simple_structure()), expected_metadata) - - def test_multiple_targets(self): - dataframe, attributes, targets = self._get_iris_columns() - - targets = targets.append_columns(targets) - - self.assertEqual(list(targets.columns), ['species', 'species']) - - hyperparams_class = \ - xgboost_dart.XGBoostDartClassifierPrimitive.metadata.query()['primitive_code']['class_type_arguments'][ - 'Hyperparams'] - primitive = xgboost_dart.XGBoostDartClassifierPrimitive( - hyperparams=hyperparams_class.defaults().replace({'return_result': 'new', 'add_index_columns': False})) - - primitive.set_training_data(inputs=attributes, outputs=targets) - primitive.fit() - - predictions = primitive.produce(inputs=attributes).value - self.assertEqual(list(predictions.columns), ['species', 'species']) - self.assertEqual(predictions.shape, (150, 2)) - for column_index in range(2): - self.assertEqual(predictions.iloc[0, column_index], 'Iris-setosa') - self.assertTrue(predictions.metadata.has_semantic_type((metadata_base.ALL_ELEMENTS, column_index), - 'https://metadata.datadrivendiscovery.org/types/PredictedTarget')) - self.assertFalse(predictions.metadata.has_semantic_type((metadata_base.ALL_ELEMENTS, column_index), - 'https://metadata.datadrivendiscovery.org/types/TrueTarget')) - self.assertEqual(predictions.metadata.query_column(column_index)['name'], 'species') - self.assertEqual(predictions.metadata.query_column(column_index)['custom_metadata'], 'targets') - - samples = primitive.sample(inputs=attributes).value - self.assertEqual(list(samples[0].columns), ['species', 'species']) - self.assertEqual(len(samples), 1) - self.assertEqual(samples[0].shape, (150, 2)) - for column_index in range(2): - self.assertEqual(samples[0].iloc[0, column_index], 'Iris-setosa') - self.assertTrue(samples[0].metadata.has_semantic_type((metadata_base.ALL_ELEMENTS, column_index), - 'https://metadata.datadrivendiscovery.org/types/PredictedTarget')) - self.assertFalse(samples[0].metadata.has_semantic_type((metadata_base.ALL_ELEMENTS, column_index), - 'https://metadata.datadrivendiscovery.org/types/TrueTarget')) - self.assertEqual(samples[0].metadata.query_column(column_index)['name'], 'species') - self.assertEqual(samples[0].metadata.query_column(column_index)['custom_metadata'], 'targets') - - log_likelihoods = primitive.log_likelihoods(inputs=attributes, outputs=targets).value - self.assertEqual(list(log_likelihoods.columns), ['species', 'species']) - - self.assertEqual(log_likelihoods.shape, (150, 2)) - for column_index in range(2): - self.assertEqual(log_likelihoods.metadata.query_column(column_index)['name'], 'species') - - log_likelihood = primitive.log_likelihood(inputs=attributes, outputs=targets).value - self.assertEqual(list(log_likelihood.columns), ['species', 'species']) - - self.assertEqual(log_likelihood.shape, (1, 2)) - for column_index in range(2): - self.assertAlmostEqual(log_likelihood.iloc[0, column_index], -2.414982318878174) - self.assertEqual(log_likelihoods.metadata.query_column(column_index)['name'], 'species') - - def test_multiple_targets_continue_fit(self): - dataframe, attributes, targets = self._get_iris_columns() - second_targets = targets.copy() - second_targets['species'] = targets['species'].map( - {'Iris-setosa': 't-Iris-setosa', 'Iris-versicolor': 't-Iris-versicolor', - 'Iris-virginica': 't-Iris-virginica'}) - second_targets.rename(columns={'species': 't-species'}, inplace=True) - second_targets.metadata = second_targets.metadata.update_column(0, {'name': 't-species'}) - targets = targets.append_columns(second_targets) - hyperparams_class = \ - xgboost_dart.XGBoostDartClassifierPrimitive.metadata.query()['primitive_code']['class_type_arguments'][ - 'Hyperparams'] - primitive = xgboost_dart.XGBoostDartClassifierPrimitive( - hyperparams=hyperparams_class.defaults().replace({'return_result': 'new', 'add_index_columns': False})) - - primitive.set_training_data(inputs=attributes, outputs=targets) - primitive.fit() - primitive.set_training_data(inputs=attributes, outputs=targets) - primitive.continue_fit() - params = primitive.get_params() - for estimator in params['estimators']: - self.assertEqual(estimator.get_booster().best_ntree_limit, - primitive.hyperparams['n_estimators'] + primitive.hyperparams['n_more_estimators']) - - predictions = primitive.produce(inputs=attributes).value - - self.assertEqual(predictions.shape, (150, 2)) - self.assertEqual(predictions.iloc[0, 0], 'Iris-setosa') - self.assertTrue(predictions.metadata.has_semantic_type((metadata_base.ALL_ELEMENTS, 0), - 'https://metadata.datadrivendiscovery.org/types/PredictedTarget')) - self.assertFalse(predictions.metadata.has_semantic_type((metadata_base.ALL_ELEMENTS, 0), - 'https://metadata.datadrivendiscovery.org/types/TrueTarget')) - self.assertEqual(predictions.metadata.query_column(0)['name'], 'species') - self.assertEqual(predictions.metadata.query_column(0)['custom_metadata'], 'targets') - self.assertEqual(predictions.iloc[0, 1], 't-Iris-setosa') - self.assertTrue(predictions.metadata.has_semantic_type((metadata_base.ALL_ELEMENTS, 1), - 'https://metadata.datadrivendiscovery.org/types/PredictedTarget')) - self.assertFalse(predictions.metadata.has_semantic_type((metadata_base.ALL_ELEMENTS, 1), - 'https://metadata.datadrivendiscovery.org/types/TrueTarget')) - self.assertEqual(predictions.metadata.query_column(1)['name'], 't-species') - self.assertEqual(predictions.metadata.query_column(1)['custom_metadata'], 'targets') - samples = primitive.sample(inputs=attributes).value - - self.assertEqual(len(samples), 1) - self.assertEqual(samples[0].shape, (150, 2)) - self.assertEqual(samples[0].iloc[0, 0], 'Iris-setosa') - self.assertTrue(samples[0].metadata.has_semantic_type((metadata_base.ALL_ELEMENTS, 0), - 'https://metadata.datadrivendiscovery.org/types/PredictedTarget')) - self.assertFalse(samples[0].metadata.has_semantic_type((metadata_base.ALL_ELEMENTS, 0), - 'https://metadata.datadrivendiscovery.org/types/TrueTarget')) - self.assertEqual(samples[0].metadata.query_column(0)['name'], 'species') - self.assertEqual(samples[0].metadata.query_column(0)['custom_metadata'], 'targets') - - self.assertEqual(samples[0].iloc[0, 1], 't-Iris-setosa') - self.assertTrue(samples[0].metadata.has_semantic_type((metadata_base.ALL_ELEMENTS, 1), - 'https://metadata.datadrivendiscovery.org/types/PredictedTarget')) - self.assertFalse(samples[0].metadata.has_semantic_type((metadata_base.ALL_ELEMENTS, 1), - 'https://metadata.datadrivendiscovery.org/types/TrueTarget')) - self.assertEqual(samples[0].metadata.query_column(1)['name'], 't-species') - self.assertEqual(samples[0].metadata.query_column(1)['custom_metadata'], 'targets') - log_likelihoods = primitive.log_likelihoods(inputs=attributes, outputs=targets).value - - self.assertEqual(log_likelihoods.shape, (150, 2)) - self.assertEqual(log_likelihoods.metadata.query_column(0)['name'], 'species') - self.assertEqual(log_likelihoods.metadata.query_column(1)['name'], 't-species') - - log_likelihood = primitive.log_likelihood(inputs=attributes, outputs=targets).value - - self.assertEqual(log_likelihood.shape, (1, 2)) - self.assertEqual(log_likelihoods.metadata.query_column(0)['name'], 'species') - self.assertEqual(log_likelihoods.metadata.query_column(1)['name'], 't-species') - - def test_semantic_types(self): - dataframe, attributes, targets = self._get_iris_columns() - - hyperparams_class = \ - xgboost_dart.XGBoostDartClassifierPrimitive.metadata.query()['primitive_code']['class_type_arguments'][ - 'Hyperparams'] - primitive = xgboost_dart.XGBoostDartClassifierPrimitive( - hyperparams=hyperparams_class.defaults().replace({'return_result': 'new', 'add_index_columns': False})) - - primitive.set_training_data(inputs=dataframe, outputs=dataframe) - primitive.fit() - - predictions = primitive.produce(inputs=dataframe).value - self.assertEqual(list(predictions.columns), ['species']) - - self.assertEqual(predictions.shape, (150, 1)) - self.assertEqual(predictions.iloc[0, 0], 'Iris-setosa') - self.assertTrue(predictions.metadata.has_semantic_type((metadata_base.ALL_ELEMENTS, 0), - 'https://metadata.datadrivendiscovery.org/types/PredictedTarget')) - self.assertFalse(predictions.metadata.has_semantic_type((metadata_base.ALL_ELEMENTS, 0), - 'https://metadata.datadrivendiscovery.org/types/TrueTarget')) - self.assertEqual(predictions.metadata.query_column(0)['name'], 'species') - self.assertEqual(predictions.metadata.query_column(0)['custom_metadata'], 'targets') - - samples = primitive.sample(inputs=dataframe).value - self.assertEqual(list(samples[0].columns), ['species']) - - self.assertEqual(len(samples), 1) - self.assertEqual(samples[0].shape, (150, 1)) - self.assertEqual(samples[0].iloc[0, 0], 'Iris-setosa') - self.assertTrue(samples[0].metadata.has_semantic_type((metadata_base.ALL_ELEMENTS, 0), - 'https://metadata.datadrivendiscovery.org/types/PredictedTarget')) - self.assertFalse(samples[0].metadata.has_semantic_type((metadata_base.ALL_ELEMENTS, 0), - 'https://metadata.datadrivendiscovery.org/types/TrueTarget')) - self.assertEqual(samples[0].metadata.query_column(0)['name'], 'species') - self.assertEqual(samples[0].metadata.query_column(0)['custom_metadata'], 'targets') - - log_likelihoods = primitive.log_likelihoods(inputs=dataframe, outputs=dataframe).value - self.assertEqual(list(log_likelihoods.columns), ['species']) - - self.assertEqual(log_likelihoods.shape, (150, 1)) - self.assertEqual(log_likelihoods.metadata.query_column(0)['name'], 'species') - - log_likelihood = primitive.log_likelihood(inputs=dataframe, outputs=dataframe).value - self.assertEqual(list(log_likelihood.columns), ['species']) - - self.assertEqual(log_likelihood.shape, (1, 1)) - self.assertAlmostEqual(log_likelihood.iloc[0, 0], -2.414982318878174) - self.assertEqual(log_likelihoods.metadata.query_column(0)['name'], 'species') - - def test_return_append(self): - dataframe, attributes, targets = self._get_iris_columns() - - hyperparams_class = \ - xgboost_dart.XGBoostDartClassifierPrimitive.metadata.query()['primitive_code']['class_type_arguments'][ - 'Hyperparams'] - primitive = xgboost_dart.XGBoostDartClassifierPrimitive(hyperparams=hyperparams_class.defaults()) - - primitive.set_training_data(inputs=dataframe, outputs=dataframe) - primitive.fit() - - predictions = primitive.produce(inputs=dataframe).value - - self.assertEqual(list(predictions.columns), [ - 'd3mIndex', - 'sepalLength', - 'sepalWidth', - 'petalLength', - 'petalWidth', - 'species', - 'species', - ]) - - self.assertEqual(predictions.shape, (150, 7)) - self.assertEqual(predictions.iloc[0, 6], 'Iris-setosa') - self.assertTrue(predictions.metadata.has_semantic_type((metadata_base.ALL_ELEMENTS, 6), - 'https://metadata.datadrivendiscovery.org/types/PredictedTarget')) - self.assertFalse(predictions.metadata.has_semantic_type((metadata_base.ALL_ELEMENTS, 6), - 'https://metadata.datadrivendiscovery.org/types/TrueTarget')) - self.assertEqual(predictions.metadata.query_column(6)['name'], 'species') - self.assertEqual(predictions.metadata.query_column(6)['custom_metadata'], 'targets') - - self._test_return_append_metadata(predictions.metadata) - - def _test_return_append_metadata(self, predictions_metadata): - self.assertEqual(utils.to_json_structure(predictions_metadata.to_internal_simple_structure()), [{ - 'selector': [], - 'metadata': { - 'structural_type': 'd3m.container.pandas.DataFrame', - 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/Table'], - 'dimension': { - 'name': 'rows', - 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/TabularRow'], - 'length': 150, - }, - 'schema': 'https://metadata.datadrivendiscovery.org/schemas/v0/container.json', - }, - }, { - 'selector': ['__ALL_ELEMENTS__'], - 'metadata': { - 'dimension': { - 'name': 'columns', - 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/TabularColumn'], - 'length': 7, - }, - }, - }, { - 'selector': ['__ALL_ELEMENTS__', 0], - 'metadata': { - 'name': 'd3mIndex', - 'structural_type': 'int', - 'semantic_types': ['http://schema.org/Integer', - 'https://metadata.datadrivendiscovery.org/types/PrimaryKey'], - }, - }, { - 'selector': ['__ALL_ELEMENTS__', 1], - 'metadata': { - 'name': 'sepalLength', - 'structural_type': 'float', - 'semantic_types': ['http://schema.org/Float', - 'https://metadata.datadrivendiscovery.org/types/Attribute'], - 'custom_metadata': 'attributes', - }, - }, { - 'selector': ['__ALL_ELEMENTS__', 2], - 'metadata': { - 'name': 'sepalWidth', - 'structural_type': 'float', - 'semantic_types': ['http://schema.org/Float', - 'https://metadata.datadrivendiscovery.org/types/Attribute'], - 'custom_metadata': 'attributes', - }, - }, { - 'selector': ['__ALL_ELEMENTS__', 3], - 'metadata': { - 'name': 'petalLength', - 'structural_type': 'float', - 'semantic_types': ['http://schema.org/Float', - 'https://metadata.datadrivendiscovery.org/types/Attribute'], - 'custom_metadata': 'attributes', - }, - }, { - 'selector': ['__ALL_ELEMENTS__', 4], - 'metadata': { - 'name': 'petalWidth', - 'structural_type': 'float', - 'semantic_types': ['http://schema.org/Float', - 'https://metadata.datadrivendiscovery.org/types/Attribute'], - 'custom_metadata': 'attributes', - }, - }, { - 'selector': ['__ALL_ELEMENTS__', 5], - 'metadata': { - 'name': 'species', - 'structural_type': 'str', - 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/CategoricalData', - 'https://metadata.datadrivendiscovery.org/types/SuggestedTarget', - 'https://metadata.datadrivendiscovery.org/types/Target', - 'https://metadata.datadrivendiscovery.org/types/TrueTarget'], - 'custom_metadata': 'targets', - }, - }, { - 'selector': ['__ALL_ELEMENTS__', 6], - 'metadata': { - 'structural_type': 'str', - 'name': 'species', - 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/CategoricalData', - 'https://metadata.datadrivendiscovery.org/types/SuggestedTarget', - 'https://metadata.datadrivendiscovery.org/types/Target', - 'https://metadata.datadrivendiscovery.org/types/PredictedTarget'], - 'custom_metadata': 'targets', - }, - }]) - - def test_return_new(self): - dataframe, attributes, targets = self._get_iris_columns() - - hyperparams_class = \ - xgboost_dart.XGBoostDartClassifierPrimitive.metadata.query()['primitive_code']['class_type_arguments'][ - 'Hyperparams'] - primitive = xgboost_dart.XGBoostDartClassifierPrimitive( - hyperparams=hyperparams_class.defaults().replace({'return_result': 'new'})) - - primitive.set_training_data(inputs=dataframe, outputs=dataframe) - primitive.fit() - - predictions = primitive.produce(inputs=dataframe).value - self.assertEqual(list(predictions.columns), [ - 'd3mIndex', - 'species', - ]) - self.assertEqual(predictions.shape, (150, 2)) - self.assertEqual(predictions.iloc[0, 1], 'Iris-setosa') - self.assertTrue(predictions.metadata.has_semantic_type((metadata_base.ALL_ELEMENTS, 1), - 'https://metadata.datadrivendiscovery.org/types/PredictedTarget')) - self.assertFalse(predictions.metadata.has_semantic_type((metadata_base.ALL_ELEMENTS, 1), - 'https://metadata.datadrivendiscovery.org/types/TrueTarget')) - self.assertEqual(predictions.metadata.query_column(1)['name'], 'species') - self.assertEqual(predictions.metadata.query_column(1)['custom_metadata'], 'targets') - - self._test_return_new_metadata(predictions.metadata) - - def _test_return_new_metadata(self, predictions_metadata): - expected_metadata = [{ - 'selector': [], - 'metadata': { - 'structural_type': 'd3m.container.pandas.DataFrame', - 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/Table'], - 'dimension': { - 'name': 'rows', - 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/TabularRow'], - 'length': 150, - }, - 'schema': 'https://metadata.datadrivendiscovery.org/schemas/v0/container.json', - }, - }, { - 'selector': ['__ALL_ELEMENTS__'], - 'metadata': { - 'dimension': { - 'name': 'columns', - 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/TabularColumn'], - 'length': 2, - }, - }, - }, { - 'selector': ['__ALL_ELEMENTS__', 0], - 'metadata': { - 'name': 'd3mIndex', - 'structural_type': 'int', - 'semantic_types': ['http://schema.org/Integer', - 'https://metadata.datadrivendiscovery.org/types/PrimaryKey'], - }, - }, { - 'selector': ['__ALL_ELEMENTS__', 1], - 'metadata': { - 'structural_type': 'str', - 'name': 'species', - 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/CategoricalData', - 'https://metadata.datadrivendiscovery.org/types/SuggestedTarget', - 'https://metadata.datadrivendiscovery.org/types/Target', - 'https://metadata.datadrivendiscovery.org/types/PredictedTarget'], - 'custom_metadata': 'targets', - }, - }] - - self.assertEqual(utils.to_json_structure(predictions_metadata.to_internal_simple_structure()), expected_metadata) - - def test_return_replace(self): - dataframe, attributes, targets = self._get_iris_columns() - - hyperparams_class = \ - xgboost_dart.XGBoostDartClassifierPrimitive.metadata.query()['primitive_code']['class_type_arguments'][ - 'Hyperparams'] - primitive = xgboost_dart.XGBoostDartClassifierPrimitive( - hyperparams=hyperparams_class.defaults().replace({'return_result': 'replace'})) - - primitive.set_training_data(inputs=dataframe, outputs=dataframe) - primitive.fit() - - predictions = primitive.produce(inputs=dataframe).value - self.assertEqual(list(predictions.columns), [ - 'd3mIndex', - 'species', - 'species', - ]) - - self.assertEqual(predictions.shape, (150, 3)) - self.assertEqual(predictions.iloc[0, 1], 'Iris-setosa') - self.assertTrue(predictions.metadata.has_semantic_type((metadata_base.ALL_ELEMENTS, 1), - 'https://metadata.datadrivendiscovery.org/types/PredictedTarget')) - self.assertFalse(predictions.metadata.has_semantic_type((metadata_base.ALL_ELEMENTS, 1), - 'https://metadata.datadrivendiscovery.org/types/TrueTarget')) - self.assertEqual(predictions.metadata.query_column(1)['name'], 'species') - self.assertEqual(predictions.metadata.query_column(1)['custom_metadata'], 'targets') - - self._test_return_replace_metadata(predictions.metadata) - - def test_pickle_unpickle(self): - dataframe, attributes, targets = self._get_iris_columns() - - hyperparams_class = \ - xgboost_dart.XGBoostDartClassifierPrimitive.metadata.query()['primitive_code']['class_type_arguments'][ - 'Hyperparams'] - primitive = xgboost_dart.XGBoostDartClassifierPrimitive( - hyperparams=hyperparams_class.defaults().replace({'return_result': 'new', 'add_index_columns': False})) - - primitive.set_training_data(inputs=attributes, outputs=targets) - primitive.fit() - - before_pickled_prediction = primitive.produce(inputs=attributes).value - pickle_object = pickle.dumps(primitive) - primitive = pickle.loads(pickle_object) - after_unpickled_prediction = primitive.produce(inputs=attributes).value - _ = pickle.dumps(primitive) - self.assertTrue(container.DataFrame.equals(before_pickled_prediction, after_unpickled_prediction)) - - def _test_return_replace_metadata(self, predictions_metadata): - self.assertEqual(utils.to_json_structure(predictions_metadata.to_internal_simple_structure()), [{ - 'selector': [], - 'metadata': { - 'structural_type': 'd3m.container.pandas.DataFrame', - 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/Table'], - 'dimension': { - 'name': 'rows', - 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/TabularRow'], - 'length': 150, - }, - 'schema': 'https://metadata.datadrivendiscovery.org/schemas/v0/container.json', - }, - }, { - 'selector': ['__ALL_ELEMENTS__'], - 'metadata': { - 'dimension': { - 'name': 'columns', - 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/TabularColumn'], - 'length': 3, - }, - }, - }, { - 'selector': ['__ALL_ELEMENTS__', 0], - 'metadata': { - 'name': 'd3mIndex', - 'structural_type': 'int', - 'semantic_types': ['http://schema.org/Integer', - 'https://metadata.datadrivendiscovery.org/types/PrimaryKey'], - }, - }, { - 'selector': ['__ALL_ELEMENTS__', 1], - 'metadata': { - 'structural_type': 'str', - 'name': 'species', - 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/CategoricalData', - 'https://metadata.datadrivendiscovery.org/types/SuggestedTarget', - 'https://metadata.datadrivendiscovery.org/types/Target', - 'https://metadata.datadrivendiscovery.org/types/PredictedTarget'], - 'custom_metadata': 'targets', - }, - }, { - 'selector': ['__ALL_ELEMENTS__', 2], - 'metadata': { - 'name': 'species', - 'structural_type': 'str', - 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/CategoricalData', - 'https://metadata.datadrivendiscovery.org/types/SuggestedTarget', - 'https://metadata.datadrivendiscovery.org/types/Target', - 'https://metadata.datadrivendiscovery.org/types/TrueTarget'], - 'custom_metadata': 'targets', - }, - }]) - - -if __name__ == '__main__': - unittest.main() diff --git a/common-primitives/tests/test_xgboost_gbtree.py b/common-primitives/tests/test_xgboost_gbtree.py deleted file mode 100644 index 1ec0e67..0000000 --- a/common-primitives/tests/test_xgboost_gbtree.py +++ /dev/null @@ -1,733 +0,0 @@ -import os -import pickle -import unittest - -from d3m import container, utils -from d3m.metadata import base as metadata_base - -from common_primitives import dataset_to_dataframe, extract_columns_semantic_types, xgboost_gbtree, column_parser - - -class XGBoostTestCase(unittest.TestCase): - def _get_iris(self): - dataset_doc_path = os.path.abspath( - os.path.join(os.path.dirname(__file__), 'data', 'datasets', 'iris_dataset_1', 'datasetDoc.json')) - - dataset = container.Dataset.load('file://{dataset_doc_path}'.format(dataset_doc_path=dataset_doc_path)) - - hyperparams_class = \ - dataset_to_dataframe.DatasetToDataFramePrimitive.metadata.query()['primitive_code']['class_type_arguments'][ - 'Hyperparams'] - primitive = dataset_to_dataframe.DatasetToDataFramePrimitive(hyperparams=hyperparams_class.defaults()) - - dataframe = primitive.produce(inputs=dataset).value - - return dataframe - - def _get_iris_columns(self): - dataframe = self._get_iris() - - # We set custom metadata on columns. - for column_index in range(1, 5): - dataframe.metadata = dataframe.metadata.update_column(column_index, {'custom_metadata': 'attributes'}) - for column_index in range(5, 6): - dataframe.metadata = dataframe.metadata.update_column(column_index, {'custom_metadata': 'targets'}) - - # We set semantic types like runtime would. - dataframe.metadata = dataframe.metadata.add_semantic_type((metadata_base.ALL_ELEMENTS, 5), - 'https://metadata.datadrivendiscovery.org/types/Target') - dataframe.metadata = dataframe.metadata.add_semantic_type((metadata_base.ALL_ELEMENTS, 5), - 'https://metadata.datadrivendiscovery.org/types/TrueTarget') - dataframe.metadata = dataframe.metadata.remove_semantic_type((metadata_base.ALL_ELEMENTS, 5), - 'https://metadata.datadrivendiscovery.org/types/Attribute') - - # Parsing. - hyperparams_class = \ - column_parser.ColumnParserPrimitive.metadata.query()['primitive_code']['class_type_arguments'][ - 'Hyperparams'] - primitive = column_parser.ColumnParserPrimitive(hyperparams=hyperparams_class.defaults()) - dataframe = primitive.produce(inputs=dataframe).value - - hyperparams_class = \ - extract_columns_semantic_types.ExtractColumnsBySemanticTypesPrimitive.metadata.query()['primitive_code'][ - 'class_type_arguments']['Hyperparams'] - - primitive = extract_columns_semantic_types.ExtractColumnsBySemanticTypesPrimitive( - hyperparams=hyperparams_class.defaults().replace( - {'semantic_types': ('https://metadata.datadrivendiscovery.org/types/Attribute',)})) - attributes = primitive.produce(inputs=dataframe).value - - primitive = extract_columns_semantic_types.ExtractColumnsBySemanticTypesPrimitive( - hyperparams=hyperparams_class.defaults().replace( - {'semantic_types': ('https://metadata.datadrivendiscovery.org/types/SuggestedTarget',)})) - targets = primitive.produce(inputs=dataframe).value - - return dataframe, attributes, targets - - def test_single_target(self): - dataframe, attributes, targets = self._get_iris_columns() - - self.assertEqual(list(targets.columns), ['species']) - hyperparams_class = \ - xgboost_gbtree.XGBoostGBTreeClassifierPrimitive.metadata.query()['primitive_code']['class_type_arguments'][ - 'Hyperparams'] - primitive = xgboost_gbtree.XGBoostGBTreeClassifierPrimitive( - hyperparams=hyperparams_class.defaults().replace({'return_result': 'new', 'add_index_columns': False})) - - primitive.set_training_data(inputs=attributes, outputs=targets) - primitive.fit() - - predictions = primitive.produce(inputs=attributes).value - self.assertEqual(list(predictions.columns), ['species']) - - self.assertEqual(predictions.shape, (150, 1)) - self.assertEqual(predictions.iloc[0, 0], 'Iris-setosa') - self.assertTrue(predictions.metadata.has_semantic_type((metadata_base.ALL_ELEMENTS, 0), - 'https://metadata.datadrivendiscovery.org/types/PredictedTarget')) - self.assertFalse(predictions.metadata.has_semantic_type((metadata_base.ALL_ELEMENTS, 0), - 'https://metadata.datadrivendiscovery.org/types/TrueTarget')) - self.assertEqual(predictions.metadata.query_column(0)['name'], 'species') - self.assertEqual(predictions.metadata.query_column(0)['custom_metadata'], 'targets') - - self._test_single_target_metadata(predictions.metadata) - - samples = primitive.sample(inputs=attributes).value - self.assertEqual(list(samples[0].columns), ['species']) - - self.assertEqual(len(samples), 1) - self.assertEqual(samples[0].shape, (150, 1)) - self.assertEqual(samples[0].iloc[0, 0], 'Iris-setosa') - self.assertTrue(samples[0].metadata.has_semantic_type((metadata_base.ALL_ELEMENTS, 0), - 'https://metadata.datadrivendiscovery.org/types/PredictedTarget')) - self.assertFalse(samples[0].metadata.has_semantic_type((metadata_base.ALL_ELEMENTS, 0), - 'https://metadata.datadrivendiscovery.org/types/TrueTarget')) - self.assertEqual(samples[0].metadata.query_column(0)['name'], 'species') - self.assertEqual(samples[0].metadata.query_column(0)['custom_metadata'], 'targets') - - log_likelihoods = primitive.log_likelihoods(inputs=attributes, outputs=targets).value - self.assertEqual(list(log_likelihoods.columns), ['species']) - - self.assertEqual(log_likelihoods.shape, (150, 1)) - self.assertEqual(log_likelihoods.metadata.query_column(0)['name'], 'species') - - log_likelihood = primitive.log_likelihood(inputs=attributes, outputs=targets).value - self.assertEqual(list(log_likelihood.columns), ['species']) - - self.assertEqual(log_likelihood.shape, (1, 1)) - self.assertAlmostEqual(log_likelihood.iloc[0, 0], -3.4919378757476807) - self.assertEqual(log_likelihoods.metadata.query_column(0)['name'], 'species') - - def test_single_target_continue_fit(self): - dataframe, attributes, targets = self._get_iris_columns() - - hyperparams_class = \ - xgboost_gbtree.XGBoostGBTreeClassifierPrimitive.metadata.query()['primitive_code']['class_type_arguments'][ - 'Hyperparams'] - primitive = xgboost_gbtree.XGBoostGBTreeClassifierPrimitive( - hyperparams=hyperparams_class.defaults().replace({'return_result': 'new', 'add_index_columns': False})) - - primitive.set_training_data(inputs=attributes, outputs=targets) - primitive.fit() - # reset the training data to make continue_fit() work. - primitive.set_training_data(inputs=attributes, outputs=targets) - primitive.continue_fit() - params = primitive.get_params() - self.assertEqual(params['booster'].best_ntree_limit, - primitive.hyperparams['n_estimators'] + primitive.hyperparams['n_more_estimators']) - predictions = primitive.produce(inputs=attributes).value - - self.assertEqual(predictions.shape, (150, 1)) - self.assertEqual(predictions.iloc[0, 0], 'Iris-setosa') - self.assertTrue(predictions.metadata.has_semantic_type((metadata_base.ALL_ELEMENTS, 0), - 'https://metadata.datadrivendiscovery.org/types/PredictedTarget')) - self.assertFalse(predictions.metadata.has_semantic_type((metadata_base.ALL_ELEMENTS, 0), - 'https://metadata.datadrivendiscovery.org/types/TrueTarget')) - self.assertEqual(predictions.metadata.query_column(0)['name'], 'species') - self.assertEqual(predictions.metadata.query_column(0)['custom_metadata'], 'targets') - - self._test_single_target_metadata(predictions.metadata) - - samples = primitive.sample(inputs=attributes).value - - self.assertEqual(len(samples), 1) - self.assertEqual(samples[0].shape, (150, 1)) - self.assertEqual(samples[0].iloc[0, 0], 'Iris-setosa') - self.assertTrue(samples[0].metadata.has_semantic_type((metadata_base.ALL_ELEMENTS, 0), - 'https://metadata.datadrivendiscovery.org/types/PredictedTarget')) - self.assertFalse(samples[0].metadata.has_semantic_type((metadata_base.ALL_ELEMENTS, 0), - 'https://metadata.datadrivendiscovery.org/types/TrueTarget')) - self.assertEqual(samples[0].metadata.query_column(0)['name'], 'species') - self.assertEqual(samples[0].metadata.query_column(0)['custom_metadata'], 'targets') - - log_likelihoods = primitive.log_likelihoods(inputs=attributes, outputs=targets).value - - self.assertEqual(log_likelihoods.shape, (150, 1)) - self.assertEqual(log_likelihoods.metadata.query_column(0)['name'], 'species') - - log_likelihood = primitive.log_likelihood(inputs=attributes, outputs=targets).value - - self.assertEqual(log_likelihood.shape, (1, 1)) - self.assertAlmostEqual(log_likelihood.iloc[0, 0], -2.4149818420410156) - self.assertEqual(log_likelihoods.metadata.query_column(0)['name'], 'species') - - def _test_single_target_metadata(self, predictions_metadata): - expected_metadata = [{ - 'selector': [], - 'metadata': { - 'schema': metadata_base.CONTAINER_SCHEMA_VERSION, - 'structural_type': 'd3m.container.pandas.DataFrame', - 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/Table'], - 'dimension': { - 'name': 'rows', - 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/TabularRow'], - 'length': 150, - }, - }, - }, { - 'selector': ['__ALL_ELEMENTS__'], - 'metadata': { - 'dimension': { - 'name': 'columns', - 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/TabularColumn'], - 'length': 1, - }, - }, - }, { - 'selector': ['__ALL_ELEMENTS__', 0], - 'metadata': { - 'structural_type': 'str', - 'name': 'species', - 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/CategoricalData', - 'https://metadata.datadrivendiscovery.org/types/SuggestedTarget', - 'https://metadata.datadrivendiscovery.org/types/Target', - 'https://metadata.datadrivendiscovery.org/types/PredictedTarget'], - 'custom_metadata': 'targets', - }, - }] - - self.assertEqual(utils.to_json_structure(predictions_metadata.to_internal_simple_structure()), expected_metadata) - - def test_multiple_targets(self): - dataframe, attributes, targets = self._get_iris_columns() - - targets = targets.append_columns(targets) - self.assertEqual(list(targets.columns), ['species', 'species']) - - hyperparams_class = \ - xgboost_gbtree.XGBoostGBTreeClassifierPrimitive.metadata.query()['primitive_code']['class_type_arguments'][ - 'Hyperparams'] - primitive = xgboost_gbtree.XGBoostGBTreeClassifierPrimitive( - hyperparams=hyperparams_class.defaults().replace({'return_result': 'new', 'add_index_columns': False})) - - primitive.set_training_data(inputs=attributes, outputs=targets) - primitive.fit() - - predictions = primitive.produce(inputs=attributes).value - self.assertEqual(list(predictions.columns), ['species', 'species']) - - self.assertEqual(predictions.shape, (150, 2)) - for column_index in range(2): - self.assertEqual(predictions.iloc[0, column_index], 'Iris-setosa') - self.assertTrue(predictions.metadata.has_semantic_type((metadata_base.ALL_ELEMENTS, column_index), - 'https://metadata.datadrivendiscovery.org/types/PredictedTarget')) - self.assertFalse(predictions.metadata.has_semantic_type((metadata_base.ALL_ELEMENTS, column_index), - 'https://metadata.datadrivendiscovery.org/types/TrueTarget')) - self.assertEqual(predictions.metadata.query_column(column_index)['name'], 'species') - self.assertEqual(predictions.metadata.query_column(column_index)['custom_metadata'], 'targets') - - samples = primitive.sample(inputs=attributes).value - self.assertEqual(list(samples[0].columns), ['species', 'species']) - - self.assertEqual(len(samples), 1) - self.assertEqual(samples[0].shape, (150, 2)) - for column_index in range(2): - self.assertEqual(samples[0].iloc[0, column_index], 'Iris-setosa') - self.assertTrue(samples[0].metadata.has_semantic_type((metadata_base.ALL_ELEMENTS, column_index), - 'https://metadata.datadrivendiscovery.org/types/PredictedTarget')) - self.assertFalse(samples[0].metadata.has_semantic_type((metadata_base.ALL_ELEMENTS, column_index), - 'https://metadata.datadrivendiscovery.org/types/TrueTarget')) - self.assertEqual(samples[0].metadata.query_column(column_index)['name'], 'species') - self.assertEqual(samples[0].metadata.query_column(column_index)['custom_metadata'], 'targets') - - log_likelihoods = primitive.log_likelihoods(inputs=attributes, outputs=targets).value - self.assertEqual(list(log_likelihoods.columns), ['species', 'species']) - - self.assertEqual(log_likelihoods.shape, (150, 2)) - for column_index in range(2): - self.assertEqual(log_likelihoods.metadata.query_column(column_index)['name'], 'species') - - log_likelihood = primitive.log_likelihood(inputs=attributes, outputs=targets).value - - self.assertEqual(list(log_likelihood.columns), ['species', 'species']) - self.assertEqual(log_likelihood.shape, (1, 2)) - for column_index in range(2): - self.assertAlmostEqual(log_likelihood.iloc[0, column_index], -3.4919378757476807) - self.assertEqual(log_likelihoods.metadata.query_column(column_index)['name'], 'species') - - feature_importances = primitive.produce_feature_importances().value - self.assertEqual(list(feature_importances), ['sepalLength', 'sepalWidth', 'petalLength', 'petalWidth']) - self.assertEqual(feature_importances.metadata.query_column(0)['name'], 'sepalLength') - self.assertEqual(feature_importances.metadata.query_column(1)['name'], 'sepalWidth') - self.assertEqual(feature_importances.metadata.query_column(2)['name'], 'petalLength') - self.assertEqual(feature_importances.metadata.query_column(3)['name'], 'petalWidth') - - self.assertEqual(feature_importances.values.tolist(), [[0.012397459708154202, - 0.03404613956809044, - 0.5992223024368286, - 0.35433411598205566, - ]]) - - def test_multiple_targets_continue_fit(self): - dataframe, attributes, targets = self._get_iris_columns() - second_targets = targets.copy() - second_targets['species'] = targets['species'].map( - {'Iris-setosa': 't-Iris-setosa', 'Iris-versicolor': 't-Iris-versicolor', - 'Iris-virginica': 't-Iris-virginica'}) - second_targets.rename(columns={'species': 't-species'}, inplace=True) - second_targets.metadata = second_targets.metadata.update_column(0, {'name': 't-species'}) - targets = targets.append_columns(second_targets) - hyperparams_class = \ - xgboost_gbtree.XGBoostGBTreeClassifierPrimitive.metadata.query()['primitive_code']['class_type_arguments'][ - 'Hyperparams'] - primitive = xgboost_gbtree.XGBoostGBTreeClassifierPrimitive( - hyperparams=hyperparams_class.defaults().replace({'return_result': 'new', 'add_index_columns': False})) - - primitive.set_training_data(inputs=attributes, outputs=targets) - primitive.fit() - primitive.set_training_data(inputs=attributes, outputs=targets) - primitive.continue_fit() - params = primitive.get_params() - for estimator in params['estimators']: - self.assertEqual(estimator.get_booster().best_ntree_limit, - primitive.hyperparams['n_estimators'] + primitive.hyperparams['n_more_estimators']) - - predictions = primitive.produce(inputs=attributes).value - - - self.assertEqual(predictions.shape, (150, 2)) - self.assertEqual(predictions.iloc[0, 0], 'Iris-setosa') - self.assertTrue(predictions.metadata.has_semantic_type((metadata_base.ALL_ELEMENTS, 0), - 'https://metadata.datadrivendiscovery.org/types/PredictedTarget')) - self.assertFalse(predictions.metadata.has_semantic_type((metadata_base.ALL_ELEMENTS, 0), - 'https://metadata.datadrivendiscovery.org/types/TrueTarget')) - self.assertEqual(predictions.metadata.query_column(0)['name'], 'species') - self.assertEqual(predictions.metadata.query_column(0)['custom_metadata'], 'targets') - self.assertEqual(predictions.iloc[0, 1], 't-Iris-setosa') - self.assertTrue(predictions.metadata.has_semantic_type((metadata_base.ALL_ELEMENTS, 1), - 'https://metadata.datadrivendiscovery.org/types/PredictedTarget')) - self.assertFalse(predictions.metadata.has_semantic_type((metadata_base.ALL_ELEMENTS, 1), - 'https://metadata.datadrivendiscovery.org/types/TrueTarget')) - self.assertEqual(predictions.metadata.query_column(1)['name'], 't-species') - self.assertEqual(predictions.metadata.query_column(1)['custom_metadata'], 'targets') - samples = primitive.sample(inputs=attributes).value - - self.assertEqual(len(samples), 1) - self.assertEqual(samples[0].shape, (150, 2)) - self.assertEqual(samples[0].iloc[0, 0], 'Iris-setosa') - self.assertTrue(samples[0].metadata.has_semantic_type((metadata_base.ALL_ELEMENTS, 0), - 'https://metadata.datadrivendiscovery.org/types/PredictedTarget')) - self.assertFalse(samples[0].metadata.has_semantic_type((metadata_base.ALL_ELEMENTS, 0), - 'https://metadata.datadrivendiscovery.org/types/TrueTarget')) - self.assertEqual(samples[0].metadata.query_column(0)['name'], 'species') - self.assertEqual(samples[0].metadata.query_column(0)['custom_metadata'], 'targets') - - self.assertEqual(samples[0].iloc[0, 1], 't-Iris-setosa') - self.assertTrue(samples[0].metadata.has_semantic_type((metadata_base.ALL_ELEMENTS, 1), - 'https://metadata.datadrivendiscovery.org/types/PredictedTarget')) - self.assertFalse(samples[0].metadata.has_semantic_type((metadata_base.ALL_ELEMENTS, 1), - 'https://metadata.datadrivendiscovery.org/types/TrueTarget')) - self.assertEqual(samples[0].metadata.query_column(1)['name'], 't-species') - self.assertEqual(samples[0].metadata.query_column(1)['custom_metadata'], 'targets') - log_likelihoods = primitive.log_likelihoods(inputs=attributes, outputs=targets).value - - self.assertEqual(log_likelihoods.shape, (150, 2)) - self.assertEqual(log_likelihoods.metadata.query_column(0)['name'], 'species') - self.assertEqual(log_likelihoods.metadata.query_column(1)['name'], 't-species') - - log_likelihood = primitive.log_likelihood(inputs=attributes, outputs=targets).value - - self.assertEqual(log_likelihood.shape, (1, 2)) - self.assertAlmostEqual(log_likelihood.iloc[0, 0], -2.4149818420410156) - self.assertEqual(log_likelihoods.metadata.query_column(0)['name'], 'species') - self.assertAlmostEqual(log_likelihood.iloc[0, 1], -2.4149818420410156) - self.assertEqual(log_likelihoods.metadata.query_column(1)['name'], 't-species') - - feature_importances = primitive.produce_feature_importances().value - - self.assertEqual(feature_importances.values.tolist(), - [[0.011062598787248135, - 0.026943154633045197, - 0.6588393449783325, - 0.3031548857688904]]) - - def test_semantic_types(self): - dataframe, attributes, targets = self._get_iris_columns() - - hyperparams_class = \ - xgboost_gbtree.XGBoostGBTreeClassifierPrimitive.metadata.query()['primitive_code']['class_type_arguments'][ - 'Hyperparams'] - primitive = xgboost_gbtree.XGBoostGBTreeClassifierPrimitive( - hyperparams=hyperparams_class.defaults().replace({'return_result': 'new', 'add_index_columns': False})) - - primitive.set_training_data(inputs=dataframe, outputs=dataframe) - primitive.fit() - - predictions = primitive.produce(inputs=dataframe).value - self.assertEqual(list(predictions.columns), ['species']) - - self.assertEqual(predictions.shape, (150, 1)) - self.assertEqual(predictions.iloc[0, 0], 'Iris-setosa') - self.assertTrue(predictions.metadata.has_semantic_type((metadata_base.ALL_ELEMENTS, 0), - 'https://metadata.datadrivendiscovery.org/types/PredictedTarget')) - self.assertFalse(predictions.metadata.has_semantic_type((metadata_base.ALL_ELEMENTS, 0), - 'https://metadata.datadrivendiscovery.org/types/TrueTarget')) - self.assertEqual(predictions.metadata.query_column(0)['name'], 'species') - self.assertEqual(predictions.metadata.query_column(0)['custom_metadata'], 'targets') - - samples = primitive.sample(inputs=dataframe).value - self.assertEqual(list(samples[0].columns), ['species']) - - self.assertEqual(len(samples), 1) - self.assertEqual(samples[0].shape, (150, 1)) - self.assertEqual(samples[0].iloc[0, 0], 'Iris-setosa') - self.assertTrue(samples[0].metadata.has_semantic_type((metadata_base.ALL_ELEMENTS, 0), - 'https://metadata.datadrivendiscovery.org/types/PredictedTarget')) - self.assertFalse(samples[0].metadata.has_semantic_type((metadata_base.ALL_ELEMENTS, 0), - 'https://metadata.datadrivendiscovery.org/types/TrueTarget')) - self.assertEqual(samples[0].metadata.query_column(0)['name'], 'species') - self.assertEqual(samples[0].metadata.query_column(0)['custom_metadata'], 'targets') - - log_likelihoods = primitive.log_likelihoods(inputs=dataframe, outputs=dataframe).value - self.assertEqual(list(log_likelihoods.columns), ['species']) - - self.assertEqual(log_likelihoods.shape, (150, 1)) - self.assertEqual(log_likelihoods.metadata.query_column(0)['name'], 'species') - - log_likelihood = primitive.log_likelihood(inputs=dataframe, outputs=dataframe).value - self.assertEqual(list(log_likelihood.columns), ['species']) - - self.assertEqual(log_likelihood.shape, (1, 1)) - self.assertAlmostEqual(log_likelihood.iloc[0, 0], -3.4919378757476807) - self.assertEqual(log_likelihoods.metadata.query_column(0)['name'], 'species') - - feature_importances = primitive.produce_feature_importances().value - self.assertEqual(list(feature_importances), ['sepalLength', 'sepalWidth', 'petalLength', 'petalWidth']) - self.assertEqual(feature_importances.metadata.query_column(0)['name'], 'sepalLength') - self.assertEqual(feature_importances.metadata.query_column(1)['name'], 'sepalWidth') - self.assertEqual(feature_importances.metadata.query_column(2)['name'], 'petalLength') - self.assertEqual(feature_importances.metadata.query_column(3)['name'], 'petalWidth') - - - self.assertEqual(feature_importances.values.tolist(), - [[0.012397459708154202, - 0.03404613956809044, - 0.5992223024368286, - 0.35433411598205566]]) - - def test_return_append(self): - dataframe, attributes, targets = self._get_iris_columns() - - hyperparams_class = \ - xgboost_gbtree.XGBoostGBTreeClassifierPrimitive.metadata.query()['primitive_code']['class_type_arguments'][ - 'Hyperparams'] - primitive = xgboost_gbtree.XGBoostGBTreeClassifierPrimitive(hyperparams=hyperparams_class.defaults()) - - primitive.set_training_data(inputs=dataframe, outputs=dataframe) - primitive.fit() - - predictions = primitive.produce(inputs=dataframe).value - - self.assertEqual(list(predictions.columns), [ - 'd3mIndex', - 'sepalLength', - 'sepalWidth', - 'petalLength', - 'petalWidth', - 'species', - 'species', - ]) - - self.assertEqual(predictions.shape, (150, 7)) - self.assertEqual(predictions.iloc[0, 6], 'Iris-setosa') - self.assertTrue(predictions.metadata.has_semantic_type((metadata_base.ALL_ELEMENTS, 6), - 'https://metadata.datadrivendiscovery.org/types/PredictedTarget')) - self.assertFalse(predictions.metadata.has_semantic_type((metadata_base.ALL_ELEMENTS, 6), - 'https://metadata.datadrivendiscovery.org/types/TrueTarget')) - self.assertEqual(predictions.metadata.query_column(6)['name'], 'species') - self.assertEqual(predictions.metadata.query_column(6)['custom_metadata'], 'targets') - - self._test_return_append_metadata(predictions.metadata) - - def _test_return_append_metadata(self, predictions_metadata): - self.assertEqual(utils.to_json_structure(predictions_metadata.to_internal_simple_structure()), [{ - 'selector': [], - 'metadata': { - 'structural_type': 'd3m.container.pandas.DataFrame', - 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/Table'], - 'dimension': { - 'name': 'rows', - 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/TabularRow'], - 'length': 150, - }, - 'schema': 'https://metadata.datadrivendiscovery.org/schemas/v0/container.json', - }, - }, { - 'selector': ['__ALL_ELEMENTS__'], - 'metadata': { - 'dimension': { - 'name': 'columns', - 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/TabularColumn'], - 'length': 7, - }, - }, - }, { - 'selector': ['__ALL_ELEMENTS__', 0], - 'metadata': { - 'name': 'd3mIndex', - 'structural_type': 'int', - 'semantic_types': ['http://schema.org/Integer', - 'https://metadata.datadrivendiscovery.org/types/PrimaryKey'], - }, - }, { - 'selector': ['__ALL_ELEMENTS__', 1], - 'metadata': { - 'name': 'sepalLength', - 'structural_type': 'float', - 'semantic_types': ['http://schema.org/Float', - 'https://metadata.datadrivendiscovery.org/types/Attribute'], - 'custom_metadata': 'attributes', - }, - }, { - 'selector': ['__ALL_ELEMENTS__', 2], - 'metadata': { - 'name': 'sepalWidth', - 'structural_type': 'float', - 'semantic_types': ['http://schema.org/Float', - 'https://metadata.datadrivendiscovery.org/types/Attribute'], - 'custom_metadata': 'attributes', - }, - }, { - 'selector': ['__ALL_ELEMENTS__', 3], - 'metadata': { - 'name': 'petalLength', - 'structural_type': 'float', - 'semantic_types': ['http://schema.org/Float', - 'https://metadata.datadrivendiscovery.org/types/Attribute'], - 'custom_metadata': 'attributes', - }, - }, { - 'selector': ['__ALL_ELEMENTS__', 4], - 'metadata': { - 'name': 'petalWidth', - 'structural_type': 'float', - 'semantic_types': ['http://schema.org/Float', - 'https://metadata.datadrivendiscovery.org/types/Attribute'], - 'custom_metadata': 'attributes', - }, - }, { - 'selector': ['__ALL_ELEMENTS__', 5], - 'metadata': { - 'name': 'species', - 'structural_type': 'str', - 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/CategoricalData', - 'https://metadata.datadrivendiscovery.org/types/SuggestedTarget', - 'https://metadata.datadrivendiscovery.org/types/Target', - 'https://metadata.datadrivendiscovery.org/types/TrueTarget'], - 'custom_metadata': 'targets', - }, - }, { - 'selector': ['__ALL_ELEMENTS__', 6], - 'metadata': { - 'structural_type': 'str', - 'name': 'species', - 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/CategoricalData', - 'https://metadata.datadrivendiscovery.org/types/SuggestedTarget', - 'https://metadata.datadrivendiscovery.org/types/Target', - 'https://metadata.datadrivendiscovery.org/types/PredictedTarget'], - 'custom_metadata': 'targets', - }, - }]) - - def test_return_new(self): - dataframe, attributes, targets = self._get_iris_columns() - - hyperparams_class = \ - xgboost_gbtree.XGBoostGBTreeClassifierPrimitive.metadata.query()['primitive_code']['class_type_arguments'][ - 'Hyperparams'] - primitive = xgboost_gbtree.XGBoostGBTreeClassifierPrimitive( - hyperparams=hyperparams_class.defaults().replace({'return_result': 'new'})) - - primitive.set_training_data(inputs=dataframe, outputs=dataframe) - primitive.fit() - - predictions = primitive.produce(inputs=dataframe).value - - self.assertEqual(list(predictions.columns), [ - 'd3mIndex', - 'species', - ]) - - self.assertEqual(predictions.shape, (150, 2)) - self.assertEqual(predictions.iloc[0, 1], 'Iris-setosa') - self.assertTrue(predictions.metadata.has_semantic_type((metadata_base.ALL_ELEMENTS, 1), - 'https://metadata.datadrivendiscovery.org/types/PredictedTarget')) - self.assertFalse(predictions.metadata.has_semantic_type((metadata_base.ALL_ELEMENTS, 1), - 'https://metadata.datadrivendiscovery.org/types/TrueTarget')) - self.assertEqual(predictions.metadata.query_column(1)['name'], 'species') - self.assertEqual(predictions.metadata.query_column(1)['custom_metadata'], 'targets') - - self._test_return_new_metadata(predictions.metadata) - - def _test_return_new_metadata(self, predictions_metadata): - expected_metadata = [{ - 'selector': [], - 'metadata': { - 'structural_type': 'd3m.container.pandas.DataFrame', - 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/Table'], - 'dimension': { - 'name': 'rows', - 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/TabularRow'], - 'length': 150, - }, - 'schema': 'https://metadata.datadrivendiscovery.org/schemas/v0/container.json', - }, - }, { - 'selector': ['__ALL_ELEMENTS__'], - 'metadata': { - 'dimension': { - 'name': 'columns', - 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/TabularColumn'], - 'length': 2, - }, - }, - }, { - 'selector': ['__ALL_ELEMENTS__', 0], - 'metadata': { - 'name': 'd3mIndex', - 'structural_type': 'int', - 'semantic_types': ['http://schema.org/Integer', - 'https://metadata.datadrivendiscovery.org/types/PrimaryKey'], - }, - }, { - 'selector': ['__ALL_ELEMENTS__', 1], - 'metadata': { - 'structural_type': 'str', - 'name': 'species', - 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/CategoricalData', - 'https://metadata.datadrivendiscovery.org/types/SuggestedTarget', - 'https://metadata.datadrivendiscovery.org/types/Target', - 'https://metadata.datadrivendiscovery.org/types/PredictedTarget'], - 'custom_metadata': 'targets', - }, - }] - - self.assertEqual(utils.to_json_structure(predictions_metadata.to_internal_simple_structure()), expected_metadata) - - def test_return_replace(self): - dataframe, attributes, targets = self._get_iris_columns() - - hyperparams_class = \ - xgboost_gbtree.XGBoostGBTreeClassifierPrimitive.metadata.query()['primitive_code']['class_type_arguments'][ - 'Hyperparams'] - primitive = xgboost_gbtree.XGBoostGBTreeClassifierPrimitive( - hyperparams=hyperparams_class.defaults().replace({'return_result': 'replace'})) - - primitive.set_training_data(inputs=dataframe, outputs=dataframe) - primitive.fit() - - predictions = primitive.produce(inputs=dataframe).value - - self.assertEqual(list(predictions.columns), [ - 'd3mIndex', - 'species', - 'species', - ]) - - self.assertEqual(predictions.shape, (150, 3)) - self.assertEqual(predictions.iloc[0, 1], 'Iris-setosa') - self.assertTrue(predictions.metadata.has_semantic_type((metadata_base.ALL_ELEMENTS, 1), - 'https://metadata.datadrivendiscovery.org/types/PredictedTarget')) - self.assertFalse(predictions.metadata.has_semantic_type((metadata_base.ALL_ELEMENTS, 1), - 'https://metadata.datadrivendiscovery.org/types/TrueTarget')) - self.assertEqual(predictions.metadata.query_column(1)['name'], 'species') - self.assertEqual(predictions.metadata.query_column(1)['custom_metadata'], 'targets') - - self._test_return_replace_metadata(predictions.metadata) - - def test_pickle_unpickle(self): - dataframe, attributes, targets = self._get_iris_columns() - - hyperparams_class = \ - xgboost_gbtree.XGBoostGBTreeClassifierPrimitive.metadata.query()['primitive_code']['class_type_arguments'][ - 'Hyperparams'] - primitive = xgboost_gbtree.XGBoostGBTreeClassifierPrimitive( - hyperparams=hyperparams_class.defaults().replace({'return_result': 'new', 'add_index_columns': False})) - - primitive.set_training_data(inputs=attributes, outputs=targets) - primitive.fit() - - before_pickled_prediction = primitive.produce(inputs=attributes).value - pickle_object = pickle.dumps(primitive) - primitive = pickle.loads(pickle_object) - after_unpickled_prediction = primitive.produce(inputs=attributes).value - # try to pickle again to see if we load it properly - _ = pickle.dumps(primitive) - self.assertTrue(container.DataFrame.equals(before_pickled_prediction, after_unpickled_prediction)) - - def _test_return_replace_metadata(self, predictions_metadata): - self.assertEqual(utils.to_json_structure(predictions_metadata.to_internal_simple_structure()), [{ - 'selector': [], - 'metadata': { - 'structural_type': 'd3m.container.pandas.DataFrame', - 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/Table'], - 'dimension': { - 'name': 'rows', - 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/TabularRow'], - 'length': 150, - }, - 'schema': 'https://metadata.datadrivendiscovery.org/schemas/v0/container.json', - }, - }, { - 'selector': ['__ALL_ELEMENTS__'], - 'metadata': { - 'dimension': { - 'name': 'columns', - 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/TabularColumn'], - 'length': 3, - }, - }, - }, { - 'selector': ['__ALL_ELEMENTS__', 0], - 'metadata': { - 'name': 'd3mIndex', - 'structural_type': 'int', - 'semantic_types': ['http://schema.org/Integer', - 'https://metadata.datadrivendiscovery.org/types/PrimaryKey'], - }, - }, { - 'selector': ['__ALL_ELEMENTS__', 1], - 'metadata': { - 'structural_type': 'str', - 'name': 'species', - 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/CategoricalData', - 'https://metadata.datadrivendiscovery.org/types/SuggestedTarget', - 'https://metadata.datadrivendiscovery.org/types/Target', - 'https://metadata.datadrivendiscovery.org/types/PredictedTarget'], - 'custom_metadata': 'targets', - }, - }, { - 'selector': ['__ALL_ELEMENTS__', 2], - 'metadata': { - 'name': 'species', - 'structural_type': 'str', - 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/CategoricalData', - 'https://metadata.datadrivendiscovery.org/types/SuggestedTarget', - 'https://metadata.datadrivendiscovery.org/types/Target', - 'https://metadata.datadrivendiscovery.org/types/TrueTarget'], - 'custom_metadata': 'targets', - }, - }]) - - -if __name__ == '__main__': - unittest.main() diff --git a/common-primitives/tests/test_xgboost_regressor.py b/common-primitives/tests/test_xgboost_regressor.py deleted file mode 100644 index d513cc1..0000000 --- a/common-primitives/tests/test_xgboost_regressor.py +++ /dev/null @@ -1,617 +0,0 @@ -import os -import pickle -import unittest - -from sklearn.metrics import mean_squared_error - -from d3m import container, utils -from d3m.metadata import base as metadata_base - -from common_primitives import dataset_to_dataframe, extract_columns_semantic_types, xgboost_regressor, column_parser - - -class XGBoostRegressorTestCase(unittest.TestCase): - def _get_iris(self): - dataset_doc_path = os.path.abspath( - os.path.join(os.path.dirname(__file__), 'data', 'datasets', 'iris_dataset_1', 'datasetDoc.json')) - - dataset = container.Dataset.load('file://{dataset_doc_path}'.format(dataset_doc_path=dataset_doc_path)) - - hyperparams_class = \ - dataset_to_dataframe.DatasetToDataFramePrimitive.metadata.query()['primitive_code']['class_type_arguments'][ - 'Hyperparams'] - primitive = dataset_to_dataframe.DatasetToDataFramePrimitive(hyperparams=hyperparams_class.defaults()) - - dataframe = primitive.produce(inputs=dataset).value - - return dataframe - - def _get_iris_columns(self): - dataframe = self._get_iris() - col_index_list = list(range(len(dataframe.columns))) - _, target = col_index_list.pop(0), col_index_list.pop(3) - original_target_col = 5 - # We set custom metadata on columns. - for column_index in col_index_list: - dataframe.metadata = dataframe.metadata.update_column(column_index, {'custom_metadata': 'attributes'}) - dataframe.metadata = dataframe.metadata.update_column(target, {'custom_metadata': 'targets'}) - dataframe.metadata = dataframe.metadata.remove_semantic_type((metadata_base.ALL_ELEMENTS, target), - 'https://metadata.datadrivendiscovery.org/types/Attribute') - dataframe.metadata = dataframe.metadata.add_semantic_type((metadata_base.ALL_ELEMENTS, original_target_col), - 'https://metadata.datadrivendiscovery.org/types/Attribute') - # We set semantic types like runtime would. - dataframe.metadata = dataframe.metadata.add_semantic_type((metadata_base.ALL_ELEMENTS, target), - 'https://metadata.datadrivendiscovery.org/types/Target') - dataframe.metadata = dataframe.metadata.add_semantic_type((metadata_base.ALL_ELEMENTS, target), - 'https://metadata.datadrivendiscovery.org/types/TrueTarget') - dataframe.metadata = dataframe.metadata.remove_semantic_type((metadata_base.ALL_ELEMENTS, target), 'https://metadata.datadrivendiscovery.org/types/Attribute') - - # Parsing. - hyperparams_class = \ - column_parser.ColumnParserPrimitive.metadata.query()['primitive_code']['class_type_arguments'][ - 'Hyperparams'] - primitive = column_parser.ColumnParserPrimitive(hyperparams=hyperparams_class.defaults()) - dataframe = primitive.produce(inputs=dataframe).value - - hyperparams_class = \ - extract_columns_semantic_types.ExtractColumnsBySemanticTypesPrimitive.metadata.query()['primitive_code'][ - 'class_type_arguments']['Hyperparams'] - - primitive = extract_columns_semantic_types.ExtractColumnsBySemanticTypesPrimitive( - hyperparams=hyperparams_class.defaults().replace( - {'semantic_types': ('https://metadata.datadrivendiscovery.org/types/Attribute',)})) - attributes = primitive.produce(inputs=dataframe).value - - primitive = extract_columns_semantic_types.ExtractColumnsBySemanticTypesPrimitive( - hyperparams=hyperparams_class.defaults().replace( - {'semantic_types': ('https://metadata.datadrivendiscovery.org/types/TrueTarget',)})) - targets = primitive.produce(inputs=dataframe).value - - return dataframe, attributes, targets - - def test_single_target(self): - dataframe, attributes, targets = self._get_iris_columns() - - hyperparams_class = \ - xgboost_regressor.XGBoostGBTreeRegressorPrimitive.metadata.query()['primitive_code'][ - 'class_type_arguments']['Hyperparams'] - primitive = xgboost_regressor.XGBoostGBTreeRegressorPrimitive( - hyperparams=hyperparams_class.defaults().replace({'return_result': 'new', 'add_index_columns': False})) - - primitive.set_training_data(inputs=attributes, outputs=targets) - primitive.fit() - - predictions = primitive.produce(inputs=attributes).value - mse = mean_squared_error(targets, predictions) - self.assertLessEqual(mse, 0.01) - self.assertEqual(predictions.shape, (150, 1)) - self.assertTrue(predictions.metadata.has_semantic_type((metadata_base.ALL_ELEMENTS, 0), - 'https://metadata.datadrivendiscovery.org/types/PredictedTarget')) - self.assertFalse(predictions.metadata.has_semantic_type((metadata_base.ALL_ELEMENTS, 0), - 'https://metadata.datadrivendiscovery.org/types/TrueTarget')) - self.assertEqual(predictions.metadata.query_column(0)['name'], 'petalWidth') - self.assertEqual(predictions.metadata.query_column(0)['custom_metadata'], 'targets') - - self._test_single_target_metadata(predictions.metadata) - - samples = primitive.sample(inputs=attributes).value - - self.assertEqual(len(samples), 1) - self.assertEqual(samples[0].shape, (150, 1)) - self.assertTrue(samples[0].metadata.has_semantic_type((metadata_base.ALL_ELEMENTS, 0), - 'https://metadata.datadrivendiscovery.org/types/PredictedTarget')) - self.assertFalse(samples[0].metadata.has_semantic_type((metadata_base.ALL_ELEMENTS, 0), - 'https://metadata.datadrivendiscovery.org/types/TrueTarget')) - self.assertEqual(samples[0].metadata.query_column(0)['name'], 'petalWidth') - self.assertEqual(samples[0].metadata.query_column(0)['custom_metadata'], 'targets') - - def test_single_target_continue(self): - dataframe, attributes, targets = self._get_iris_columns() - - hyperparams_class = \ - xgboost_regressor.XGBoostGBTreeRegressorPrimitive.metadata.query()['primitive_code'][ - 'class_type_arguments'][ - 'Hyperparams'] - primitive = xgboost_regressor.XGBoostGBTreeRegressorPrimitive( - hyperparams=hyperparams_class.defaults().replace({'return_result': 'new', 'add_index_columns': False})) - - primitive.set_training_data(inputs=attributes, outputs=targets) - primitive.fit() - # reset the training data to make continue_fit() work. - primitive.set_training_data(inputs=attributes, outputs=targets) - primitive.continue_fit() - params = primitive.get_params() - self.assertEqual(params['booster'].best_ntree_limit, - primitive.hyperparams['n_estimators'] + primitive.hyperparams['n_more_estimators']) - predictions = primitive.produce(inputs=attributes).value - mse = mean_squared_error(targets, predictions) - self.assertLessEqual(mse, 0.01) - self.assertEqual(predictions.shape, (150, 1)) - self.assertTrue(predictions.metadata.has_semantic_type((metadata_base.ALL_ELEMENTS, 0), - 'https://metadata.datadrivendiscovery.org/types/PredictedTarget')) - self.assertFalse(predictions.metadata.has_semantic_type((metadata_base.ALL_ELEMENTS, 0), - 'https://metadata.datadrivendiscovery.org/types/TrueTarget')) - self.assertEqual(predictions.metadata.query_column(0)['name'], 'petalWidth') - self.assertEqual(predictions.metadata.query_column(0)['custom_metadata'], 'targets') - - self._test_single_target_metadata(predictions.metadata) - - samples = primitive.sample(inputs=attributes).value - - self.assertEqual(len(samples), 1) - self.assertEqual(samples[0].shape, (150, 1)) - self.assertTrue(samples[0].metadata.has_semantic_type((metadata_base.ALL_ELEMENTS, 0), - 'https://metadata.datadrivendiscovery.org/types/PredictedTarget')) - self.assertFalse(samples[0].metadata.has_semantic_type((metadata_base.ALL_ELEMENTS, 0), - 'https://metadata.datadrivendiscovery.org/types/TrueTarget')) - self.assertEqual(samples[0].metadata.query_column(0)['name'], 'petalWidth') - self.assertEqual(samples[0].metadata.query_column(0)['custom_metadata'], 'targets') - - def _test_single_target_metadata(self, predictions_metadata): - expected_metadata = [{ - 'selector': [], - 'metadata': { - 'schema': metadata_base.CONTAINER_SCHEMA_VERSION, - 'structural_type': 'd3m.container.pandas.DataFrame', - 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/Table'], - 'dimension': { - 'name': 'rows', - 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/TabularRow'], - 'length': 150, - }, - }, - }, { - 'selector': ['__ALL_ELEMENTS__'], - 'metadata': { - 'dimension': { - 'name': 'columns', - 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/TabularColumn'], - 'length': 1, - }, - }, - }, { - 'selector': ['__ALL_ELEMENTS__', 0], - 'metadata': { - 'structural_type': 'float', - 'name': 'petalWidth', - 'semantic_types': ['http://schema.org/Float', - 'https://metadata.datadrivendiscovery.org/types/Target', - 'https://metadata.datadrivendiscovery.org/types/PredictedTarget'], - 'custom_metadata': 'targets', - }, - }] - - self.assertEqual(utils.to_json_structure(predictions_metadata.to_internal_simple_structure()), expected_metadata) - - def test_multiple_targets(self): - dataframe, attributes, targets = self._get_iris_columns() - - targets = targets.append_columns(targets) - - hyperparams_class = \ - xgboost_regressor.XGBoostGBTreeRegressorPrimitive.metadata.query()['primitive_code'][ - 'class_type_arguments']['Hyperparams'] - primitive = xgboost_regressor.XGBoostGBTreeRegressorPrimitive( - hyperparams=hyperparams_class.defaults().replace({'return_result': 'new', 'add_index_columns': False})) - - primitive.set_training_data(inputs=attributes, outputs=targets) - primitive.fit() - - predictions = primitive.produce(inputs=attributes).value - mse = mean_squared_error(targets, predictions) - self.assertLessEqual(mse, 0.01) - - self.assertEqual(predictions.shape, (150, 2)) - for column_index in range(2): - self.assertTrue(predictions.metadata.has_semantic_type((metadata_base.ALL_ELEMENTS, column_index), - 'https://metadata.datadrivendiscovery.org/types/PredictedTarget')) - self.assertFalse(predictions.metadata.has_semantic_type((metadata_base.ALL_ELEMENTS, column_index), - 'https://metadata.datadrivendiscovery.org/types/TrueTarget')) - self.assertEqual(predictions.metadata.query_column(column_index)['name'], 'petalWidth') - self.assertEqual(predictions.metadata.query_column(column_index)['custom_metadata'], 'targets') - - samples = primitive.sample(inputs=attributes).value - - self.assertEqual(len(samples), 1) - self.assertEqual(samples[0].shape, (150, 2)) - for column_index in range(2): - self.assertTrue(samples[0].metadata.has_semantic_type((metadata_base.ALL_ELEMENTS, column_index), - 'https://metadata.datadrivendiscovery.org/types/PredictedTarget')) - self.assertFalse(samples[0].metadata.has_semantic_type((metadata_base.ALL_ELEMENTS, column_index), - 'https://metadata.datadrivendiscovery.org/types/TrueTarget')) - self.assertEqual(samples[0].metadata.query_column(column_index)['name'], 'petalWidth') - self.assertEqual(samples[0].metadata.query_column(column_index)['custom_metadata'], 'targets') - - feature_importances = primitive.produce_feature_importances().value - - self.assertEqual(feature_importances.values.tolist(), - [[0.0049971588887274265, - 0.006304567214101553, - 0.27505698800086975, - 0.7136412858963013]]) - - def test_multiple_targets_continue(self): - dataframe, attributes, targets = self._get_iris_columns() - second_targets = targets.copy() - second_targets.rename(columns={'petalWidth': 't-petalWidth'}, inplace=True) - second_targets.metadata = second_targets.metadata.update_column(0, {'name': 't-petalWidth'}) - targets = targets.append_columns(second_targets) - - hyperparams_class = \ - xgboost_regressor.XGBoostGBTreeRegressorPrimitive.metadata.query()['primitive_code'][ - 'class_type_arguments']['Hyperparams'] - primitive = xgboost_regressor.XGBoostGBTreeRegressorPrimitive( - hyperparams=hyperparams_class.defaults().replace({'return_result': 'new', 'add_index_columns': False})) - - primitive.set_training_data(inputs=attributes, outputs=targets) - primitive.fit() - # Set training data again to make continue_fit work - primitive.set_training_data(inputs=attributes, outputs=targets) - primitive.continue_fit() - params = primitive.get_params() - for estimator in params['estimators']: - self.assertEqual(estimator.get_booster().best_ntree_limit, - primitive.hyperparams['n_estimators'] + primitive.hyperparams['n_more_estimators']) - - predictions = primitive.produce(inputs=attributes).value - mse = mean_squared_error(targets, predictions) - self.assertLessEqual(mse, 0.01) - self.assertEqual(predictions.shape, (150, 2)) - - self.assertTrue(predictions.metadata.has_semantic_type((metadata_base.ALL_ELEMENTS, 0), - 'https://metadata.datadrivendiscovery.org/types/PredictedTarget')) - self.assertFalse(predictions.metadata.has_semantic_type((metadata_base.ALL_ELEMENTS, 0), - 'https://metadata.datadrivendiscovery.org/types/TrueTarget')) - self.assertEqual(predictions.metadata.query_column(0)['name'], 'petalWidth') - self.assertEqual(predictions.metadata.query_column(0)['custom_metadata'], 'targets') - self.assertTrue(predictions.metadata.has_semantic_type((metadata_base.ALL_ELEMENTS, 1), - 'https://metadata.datadrivendiscovery.org/types/PredictedTarget')) - self.assertFalse(predictions.metadata.has_semantic_type((metadata_base.ALL_ELEMENTS, 1), - 'https://metadata.datadrivendiscovery.org/types/TrueTarget')) - self.assertEqual(predictions.metadata.query_column(1)['name'], 't-petalWidth') - self.assertEqual(predictions.metadata.query_column(1)['custom_metadata'], 'targets') - - samples = primitive.sample(inputs=attributes).value - - self.assertEqual(len(samples), 1) - self.assertEqual(samples[0].shape, (150, 2)) - self.assertTrue(samples[0].metadata.has_semantic_type((metadata_base.ALL_ELEMENTS, 0), - 'https://metadata.datadrivendiscovery.org/types/PredictedTarget')) - self.assertFalse(samples[0].metadata.has_semantic_type((metadata_base.ALL_ELEMENTS, 0), - 'https://metadata.datadrivendiscovery.org/types/TrueTarget')) - self.assertEqual(samples[0].metadata.query_column(0)['name'], 'petalWidth') - self.assertEqual(samples[0].metadata.query_column(0)['custom_metadata'], 'targets') - self.assertTrue(samples[0].metadata.has_semantic_type((metadata_base.ALL_ELEMENTS, 1), - 'https://metadata.datadrivendiscovery.org/types/PredictedTarget')) - self.assertFalse(samples[0].metadata.has_semantic_type((metadata_base.ALL_ELEMENTS, 1), - 'https://metadata.datadrivendiscovery.org/types/TrueTarget')) - self.assertEqual(samples[0].metadata.query_column(1)['name'], 't-petalWidth') - self.assertEqual(samples[0].metadata.query_column(1)['custom_metadata'], 'targets') - - feature_importances = primitive.produce_feature_importances().value - - self.assertEqual(feature_importances.values.tolist(), - [[0.003233343129977584, - 0.003926052246242762, - 0.19553671777248383, - 0.7973038554191589]]) - - def test_semantic_types(self): - dataframe, attributes, targets = self._get_iris_columns() - - hyperparams_class = \ - xgboost_regressor.XGBoostGBTreeRegressorPrimitive.metadata.query()['primitive_code'][ - 'class_type_arguments']['Hyperparams'] - primitive = xgboost_regressor.XGBoostGBTreeRegressorPrimitive( - hyperparams=hyperparams_class.defaults().replace({'return_result': 'new', 'add_index_columns': False})) - - primitive.set_training_data(inputs=dataframe, outputs=dataframe) - primitive.fit() - - predictions = primitive.produce(inputs=dataframe).value - - self.assertEqual(predictions.shape, (150, 1)) - self.assertTrue(predictions.metadata.has_semantic_type((metadata_base.ALL_ELEMENTS, 0), - 'https://metadata.datadrivendiscovery.org/types/PredictedTarget')) - self.assertFalse(predictions.metadata.has_semantic_type((metadata_base.ALL_ELEMENTS, 0), - 'https://metadata.datadrivendiscovery.org/types/TrueTarget')) - self.assertEqual(predictions.metadata.query_column(0)['name'], 'petalWidth') - self.assertEqual(predictions.metadata.query_column(0)['custom_metadata'], 'targets') - - samples = primitive.sample(inputs=attributes).value - - self.assertEqual(len(samples), 1) - self.assertEqual(samples[0].shape, (150, 1)) - self.assertTrue(samples[0].metadata.has_semantic_type((metadata_base.ALL_ELEMENTS, 0), - 'https://metadata.datadrivendiscovery.org/types/PredictedTarget')) - self.assertFalse(samples[0].metadata.has_semantic_type((metadata_base.ALL_ELEMENTS, 0), - 'https://metadata.datadrivendiscovery.org/types/TrueTarget')) - self.assertEqual(samples[0].metadata.query_column(0)['name'], 'petalWidth') - self.assertEqual(samples[0].metadata.query_column(0)['custom_metadata'], 'targets') - - feature_importances = primitive.produce_feature_importances().value - - self.assertEqual(feature_importances.values.tolist(), - [[0.0049971588887274265, - 0.006304567214101553, - 0.27505698800086975, - 0.7136412858963013]]) - - def test_return_append(self): - dataframe, attributes, targets = self._get_iris_columns() - - hyperparams_class = \ - xgboost_regressor.XGBoostGBTreeRegressorPrimitive.metadata.query()['primitive_code'][ - 'class_type_arguments']['Hyperparams'] - primitive = xgboost_regressor.XGBoostGBTreeRegressorPrimitive(hyperparams=hyperparams_class.defaults()) - - primitive.set_training_data(inputs=dataframe, outputs=dataframe) - primitive.fit() - - predictions = primitive.produce(inputs=dataframe).value - - self.assertEqual(predictions.shape, (150, 7)) - self.assertTrue(predictions.metadata.has_semantic_type((metadata_base.ALL_ELEMENTS, 6), - 'https://metadata.datadrivendiscovery.org/types/PredictedTarget')) - self.assertFalse(predictions.metadata.has_semantic_type((metadata_base.ALL_ELEMENTS, 6), - 'https://metadata.datadrivendiscovery.org/types/TrueTarget')) - self.assertEqual(predictions.metadata.query_column(6)['name'], 'petalWidth') - self.assertEqual(predictions.metadata.query_column(6)['custom_metadata'], 'targets') - - self._test_return_append_metadata(predictions.metadata) - - def _test_return_append_metadata(self, predictions_metadata): - self.assertEqual(utils.to_json_structure(predictions_metadata.to_internal_simple_structure()), [{ - 'selector': [], - 'metadata': { - 'structural_type': 'd3m.container.pandas.DataFrame', - 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/Table'], - 'dimension': { - 'name': 'rows', - 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/TabularRow'], - 'length': 150, - }, - 'schema': 'https://metadata.datadrivendiscovery.org/schemas/v0/container.json', - }, - }, { - 'selector': ['__ALL_ELEMENTS__'], - 'metadata': { - 'dimension': { - 'name': 'columns', - 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/TabularColumn'], - 'length': 7, - }, - }, - }, { - 'selector': ['__ALL_ELEMENTS__', 0], - 'metadata': { - 'name': 'd3mIndex', - 'structural_type': 'int', - 'semantic_types': ['http://schema.org/Integer', - 'https://metadata.datadrivendiscovery.org/types/PrimaryKey'], - }, - }, { - 'selector': ['__ALL_ELEMENTS__', 1], - 'metadata': { - 'name': 'sepalLength', - 'structural_type': 'float', - 'semantic_types': ['http://schema.org/Float', - 'https://metadata.datadrivendiscovery.org/types/Attribute'], - 'custom_metadata': 'attributes', - }, - }, { - 'selector': ['__ALL_ELEMENTS__', 2], - 'metadata': { - 'name': 'sepalWidth', - 'structural_type': 'float', - 'semantic_types': ['http://schema.org/Float', - 'https://metadata.datadrivendiscovery.org/types/Attribute'], - 'custom_metadata': 'attributes', - }, - }, { - 'selector': ['__ALL_ELEMENTS__', 3], - 'metadata': { - 'name': 'petalLength', - 'structural_type': 'float', - 'semantic_types': ['http://schema.org/Float', - 'https://metadata.datadrivendiscovery.org/types/Attribute'], - 'custom_metadata': 'attributes', - }, - }, { - 'selector': ['__ALL_ELEMENTS__', 4], - 'metadata': { - 'name': 'petalWidth', - 'structural_type': 'float', - 'semantic_types': ['http://schema.org/Float', - 'https://metadata.datadrivendiscovery.org/types/Target', - 'https://metadata.datadrivendiscovery.org/types/TrueTarget'], - 'custom_metadata': 'targets', - }, - }, { - 'selector': ['__ALL_ELEMENTS__', 5], - 'metadata': { - 'name': 'species', - 'structural_type': 'int', - 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/CategoricalData', - 'https://metadata.datadrivendiscovery.org/types/SuggestedTarget', - 'https://metadata.datadrivendiscovery.org/types/Attribute', ], - 'custom_metadata': 'attributes', - }, - }, { - 'selector': ['__ALL_ELEMENTS__', 6], - 'metadata': { - 'structural_type': 'float', - 'name': 'petalWidth', - 'semantic_types': ['http://schema.org/Float', - 'https://metadata.datadrivendiscovery.org/types/Target', - 'https://metadata.datadrivendiscovery.org/types/PredictedTarget'], - 'custom_metadata': 'targets', - }, - }]) - - def test_return_new(self): - dataframe, attributes, targets = self._get_iris_columns() - - hyperparams_class = \ - xgboost_regressor.XGBoostGBTreeRegressorPrimitive.metadata.query()['primitive_code'][ - 'class_type_arguments']['Hyperparams'] - primitive = xgboost_regressor.XGBoostGBTreeRegressorPrimitive( - hyperparams=hyperparams_class.defaults().replace({'return_result': 'new'})) - - primitive.set_training_data(inputs=dataframe, outputs=dataframe) - primitive.fit() - - predictions = primitive.produce(inputs=dataframe).value - - self.assertEqual(predictions.shape, (150, 2)) - self.assertTrue(predictions.metadata.has_semantic_type((metadata_base.ALL_ELEMENTS, 1), - 'https://metadata.datadrivendiscovery.org/types/PredictedTarget')) - self.assertFalse(predictions.metadata.has_semantic_type((metadata_base.ALL_ELEMENTS, 1), - 'https://metadata.datadrivendiscovery.org/types/TrueTarget')) - self.assertEqual(predictions.metadata.query_column(1)['name'], 'petalWidth') - self.assertEqual(predictions.metadata.query_column(1)['custom_metadata'], 'targets') - - self._test_return_new_metadata(predictions.metadata) - - def _test_return_new_metadata(self, predictions_metadata): - expected_metadata = [{ - 'selector': [], - 'metadata': { - 'structural_type': 'd3m.container.pandas.DataFrame', - 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/Table'], - 'dimension': { - 'name': 'rows', - 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/TabularRow'], - 'length': 150, - }, - 'schema': 'https://metadata.datadrivendiscovery.org/schemas/v0/container.json', - }, - }, { - 'selector': ['__ALL_ELEMENTS__'], - 'metadata': { - 'dimension': { - 'name': 'columns', - 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/TabularColumn'], - 'length': 2, - }, - }, - }, { - 'selector': ['__ALL_ELEMENTS__', 0], - 'metadata': { - 'name': 'd3mIndex', - 'structural_type': 'int', - 'semantic_types': ['http://schema.org/Integer', - 'https://metadata.datadrivendiscovery.org/types/PrimaryKey'], - }, - }, { - 'selector': ['__ALL_ELEMENTS__', 1], - 'metadata': { - 'structural_type': 'float', - 'name': 'petalWidth', - 'semantic_types': ['http://schema.org/Float', - 'https://metadata.datadrivendiscovery.org/types/Target', - 'https://metadata.datadrivendiscovery.org/types/PredictedTarget'], - 'custom_metadata': 'targets', - }, - }] - - self.assertEqual(utils.to_json_structure(predictions_metadata.to_internal_simple_structure()), expected_metadata) - - def test_return_replace(self): - dataframe, attributes, targets = self._get_iris_columns() - - hyperparams_class = \ - xgboost_regressor.XGBoostGBTreeRegressorPrimitive.metadata.query()['primitive_code'][ - 'class_type_arguments']['Hyperparams'] - primitive = xgboost_regressor.XGBoostGBTreeRegressorPrimitive( - hyperparams=hyperparams_class.defaults().replace({'return_result': 'replace'})) - - primitive.set_training_data(inputs=dataframe, outputs=dataframe) - primitive.fit() - - predictions = primitive.produce(inputs=dataframe).value - - self.assertEqual(predictions.shape, (150, 3)) - self.assertTrue(predictions.metadata.has_semantic_type((metadata_base.ALL_ELEMENTS, 1), - 'https://metadata.datadrivendiscovery.org/types/PredictedTarget')) - self.assertFalse(predictions.metadata.has_semantic_type((metadata_base.ALL_ELEMENTS, 1), - 'https://metadata.datadrivendiscovery.org/types/TrueTarget')) - self.assertEqual(predictions.metadata.query_column(1)['name'], 'petalWidth') - self.assertEqual(predictions.metadata.query_column(1)['custom_metadata'], 'targets') - - self._test_return_replace_metadata(predictions.metadata) - - def test_pickle_unpickle(self): - dataframe, attributes, targets = self._get_iris_columns() - - hyperparams_class = \ - xgboost_regressor.XGBoostGBTreeRegressorPrimitive.metadata.query()['primitive_code'][ - 'class_type_arguments'][ - 'Hyperparams'] - primitive = xgboost_regressor.XGBoostGBTreeRegressorPrimitive( - hyperparams=hyperparams_class.defaults().replace({'return_result': 'new', 'add_index_columns': False})) - - primitive.set_training_data(inputs=attributes, outputs=targets) - primitive.fit() - - before_pickled_prediction = primitive.produce(inputs=attributes).value - pickle_object = pickle.dumps(primitive) - primitive = pickle.loads(pickle_object) - after_unpickled_prediction = primitive.produce(inputs=attributes).value - self.assertTrue(container.DataFrame.equals(before_pickled_prediction, after_unpickled_prediction)) - - def _test_return_replace_metadata(self, predictions_metadata): - self.assertEqual(utils.to_json_structure(predictions_metadata.to_internal_simple_structure()), [{ - 'selector': [], - 'metadata': { - 'structural_type': 'd3m.container.pandas.DataFrame', - 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/Table'], - 'dimension': { - 'name': 'rows', - 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/TabularRow'], - 'length': 150, - }, - 'schema': 'https://metadata.datadrivendiscovery.org/schemas/v0/container.json', - }, - }, { - 'selector': ['__ALL_ELEMENTS__'], - 'metadata': { - 'dimension': { - 'name': 'columns', - 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/TabularColumn'], - 'length': 3, - }, - }, - }, { - 'selector': ['__ALL_ELEMENTS__', 0], - 'metadata': { - 'name': 'd3mIndex', - 'structural_type': 'int', - 'semantic_types': ['http://schema.org/Integer', - 'https://metadata.datadrivendiscovery.org/types/PrimaryKey'], - }, - }, { - 'selector': ['__ALL_ELEMENTS__', 1], - 'metadata': { - 'structural_type': 'float', - 'name': 'petalWidth', - 'semantic_types': ['http://schema.org/Float', - 'https://metadata.datadrivendiscovery.org/types/Target', - 'https://metadata.datadrivendiscovery.org/types/PredictedTarget'], - 'custom_metadata': 'targets', - }, - }, { - 'selector': ['__ALL_ELEMENTS__', 2], - 'metadata': { - 'name': 'petalWidth', - 'structural_type': 'float', - 'semantic_types': ['http://schema.org/Float', - 'https://metadata.datadrivendiscovery.org/types/Target', - 'https://metadata.datadrivendiscovery.org/types/TrueTarget'], - 'custom_metadata': 'targets', - }, - }]) - - -if __name__ == '__main__': - unittest.main() diff --git a/common-primitives/tests/utils.py b/common-primitives/tests/utils.py deleted file mode 100644 index 18dc51c..0000000 --- a/common-primitives/tests/utils.py +++ /dev/null @@ -1,112 +0,0 @@ -import json -import os - -from d3m import utils, container -from d3m.metadata import base as metadata_base - -from common_primitives import dataset_to_dataframe - - -def convert_metadata(metadata): - return json.loads(json.dumps(metadata, cls=utils.JsonEncoder)) - - -def load_iris_metadata(): - dataset_doc_path = os.path.abspath(os.path.join(os.path.dirname(__file__), 'data', 'datasets', 'iris_dataset_1', 'datasetDoc.json')) - dataset = container.Dataset.load('file://{dataset_doc_path}'.format(dataset_doc_path=dataset_doc_path)) - return dataset - - -def test_iris_metadata(test_obj, metadata, structural_type, rows_structural_type=None): - test_obj.maxDiff = None - - test_obj.assertEqual(convert_metadata(metadata.query(())), { - 'schema': metadata_base.CONTAINER_SCHEMA_VERSION, - 'structural_type': structural_type, - 'semantic_types': [ - 'https://metadata.datadrivendiscovery.org/types/Table', - ], - 'dimension': { - 'name': 'rows', - 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/TabularRow'], - 'length': 150, - } - }) - - if rows_structural_type is None: - test_obj.assertEqual(convert_metadata(metadata.query((metadata_base.ALL_ELEMENTS,))), { - 'dimension': { - 'name': 'columns', - 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/TabularColumn'], - 'length': 6, - } - }) - else: - test_obj.assertEqual(convert_metadata(metadata.query((metadata_base.ALL_ELEMENTS,))), { - 'structural_type': rows_structural_type, - 'dimension': { - 'name': 'columns', - 'semantic_types': ['https://metadata.datadrivendiscovery.org/types/TabularColumn'], - 'length': 6, - } - }) - - test_obj.assertEqual(convert_metadata(metadata.query((metadata_base.ALL_ELEMENTS, 0))), { - 'name': 'd3mIndex', - 'structural_type': 'str', - 'semantic_types': [ - 'http://schema.org/Integer', - 'https://metadata.datadrivendiscovery.org/types/PrimaryKey', - ], - }) - - for i in range(1, 5): - test_obj.assertEqual(convert_metadata(metadata.query((metadata_base.ALL_ELEMENTS, i))), { - 'name': ['sepalLength', 'sepalWidth', 'petalLength', 'petalWidth'][i - 1], - 'structural_type': 'str', - 'semantic_types': [ - 'http://schema.org/Float', - 'https://metadata.datadrivendiscovery.org/types/Attribute', - ], - }, i) - - test_obj.assertEqual(convert_metadata(metadata.query((metadata_base.ALL_ELEMENTS, 5))), { - 'name': 'species', - 'structural_type': 'str', - 'semantic_types': [ - 'https://metadata.datadrivendiscovery.org/types/CategoricalData', - 'https://metadata.datadrivendiscovery.org/types/SuggestedTarget', - 'https://metadata.datadrivendiscovery.org/types/Attribute', - ], - }) - - -def convert_through_json(data): - return json.loads(json.dumps(data, cls=utils.JsonEncoder)) - - -def normalize_semantic_types(data): - if isinstance(data, dict): - if 'semantic_types' in data: - # We sort them so that it is easier to compare them. - data['semantic_types'] = sorted(data['semantic_types']) - - return {key: normalize_semantic_types(value) for key, value in data.items()} - - return data - - -def effective_metadata(metadata): - output = metadata.to_json_structure() - - for entry in output: - entry['metadata'] = normalize_semantic_types(entry['metadata']) - - return output - - -def get_dataframe(dataset): - dataset_hyperparams_class = dataset_to_dataframe.DatasetToDataFramePrimitive.metadata.get_hyperparams() - dataframe_primitive = dataset_to_dataframe.DatasetToDataFramePrimitive(hyperparams=dataset_hyperparams_class.defaults()) - dataframe = dataframe_primitive.produce(inputs=dataset).value - return dataframe diff --git a/entry_points.ini b/entry_points.ini index 6806df3..690abb7 100644 --- a/entry_points.ini +++ b/entry_points.ini @@ -1,79 +1,79 @@ [d3m.primitives] -tods.data_processing.dataset_to_dataframe = data_processing.DatasetToDataframe:DatasetToDataFramePrimitive -tods.data_processing.time_interval_transform = data_processing.TimeIntervalTransform:TimeIntervalTransform -tods.data_processing.categorical_to_binary = data_processing.CategoricalToBinary:CategoricalToBinary -tods.data_processing.column_filter = data_processing.ColumnFilter:ColumnFilter -tods.data_processing.timestamp_validation = data_processing.TimeStampValidation:TimeStampValidationPrimitive -tods.data_processing.duplication_validation = data_processing.DuplicationValidation:DuplicationValidation -tods.data_processing.continuity_validation = data_processing.ContinuityValidation:ContinuityValidation +tods.data_processing.dataset_to_dataframe = tods.data_processing.DatasetToDataframe:DatasetToDataFramePrimitive +tods.data_processing.time_interval_transform = tods.data_processing.TimeIntervalTransform:TimeIntervalTransform +tods.data_processing.categorical_to_binary = tods.data_processing.CategoricalToBinary:CategoricalToBinary +tods.data_processing.column_filter = tods.data_processing.ColumnFilter:ColumnFilter +tods.data_processing.timestamp_validation = tods.data_processing.TimeStampValidation:TimeStampValidationPrimitive +tods.data_processing.duplication_validation = tods.data_processing.DuplicationValidation:DuplicationValidation +tods.data_processing.continuity_validation = tods.data_processing.ContinuityValidation:ContinuityValidation -tods.timeseries_processing.transformation.axiswise_scaler=timeseries_processing.SKAxiswiseScaler:SKAxiswiseScaler -tods.timeseries_processing.transformation.standard_scaler=timeseries_processing.SKStandardScaler:SKStandardScaler -tods.timeseries_processing.transformation.power_transformer=timeseries_processing.SKPowerTransformer:SKPowerTransformer -tods.timeseries_processing.transformation.quantile_transformer=timeseries_processing.SKQuantileTransformer:SKQuantileTransformer -tods.timeseries_processing.transformation.moving_average_transform = timeseries_processing.MovingAverageTransform:MovingAverageTransform -tods.timeseries_processing.transformation.simple_exponential_smoothing = timeseries_processing.SimpleExponentialSmoothing:SimpleExponentialSmoothing -tods.timeseries_processing.transformation.holt_smoothing = timeseries_processing.HoltSmoothing:HoltSmoothing -tods.timeseries_processing.transformation.holt_winters_exponential_smoothing= timeseries_processing.HoltWintersExponentialSmoothing:HoltWintersExponentialSmoothing -tods.timeseries_processing.decomposition.time_series_seasonality_trend_decomposition = timeseries_processing.TimeSeriesSeasonalityTrendDecomposition:TimeSeriesSeasonalityTrendDecompositionPrimitive +tods.timeseries_processing.transformation.axiswise_scaler = tods.timeseries_processing.SKAxiswiseScaler:SKAxiswiseScaler +tods.timeseries_processing.transformation.standard_scaler = tods.timeseries_processing.SKStandardScaler:SKStandardScaler +tods.timeseries_processing.transformation.power_transformer = tods.timeseries_processing.SKPowerTransformer:SKPowerTransformer +tods.timeseries_processing.transformation.quantile_transformer = tods.timeseries_processing.SKQuantileTransformer:SKQuantileTransformer +tods.timeseries_processing.transformation.moving_average_transform = tods.timeseries_processing.MovingAverageTransform:MovingAverageTransform +tods.timeseries_processing.transformation.simple_exponential_smoothing = tods.timeseries_processing.SimpleExponentialSmoothing:SimpleExponentialSmoothing +tods.timeseries_processing.transformation.holt_smoothing = tods.timeseries_processing.HoltSmoothing:HoltSmoothing +tods.timeseries_processing.transformation.holt_winters_exponential_smoothing= tods.timeseries_processing.HoltWintersExponentialSmoothing:HoltWintersExponentialSmoothing +tods.timeseries_processing.decomposition.time_series_seasonality_trend_decomposition = tods.timeseries_processing.TimeSeriesSeasonalityTrendDecomposition:TimeSeriesSeasonalityTrendDecompositionPrimitive -tods.feature_analysis.auto_correlation = feature_analysis.AutoCorrelation:AutoCorrelation -tods.feature_analysis.statistical_mean = feature_analysis.StatisticalMean:StatisticalMeanPrimitive -tods.feature_analysis.statistical_median = feature_analysis.StatisticalMedian:StatisticalMedianPrimitive -tods.feature_analysis.statistical_g_mean = feature_analysis.StatisticalGmean:StatisticalGmeanPrimitive -tods.feature_analysis.statistical_abs_energy = feature_analysis.StatisticalAbsEnergy:StatisticalAbsEnergyPrimitive -tods.feature_analysis.statistical_abs_sum = feature_analysis.StatisticalAbsSum:StatisticalAbsSumPrimitive -tods.feature_analysis.statistical_h_mean = feature_analysis.StatisticalHmean:StatisticalHmeanPrimitive -tods.feature_analysis.statistical_maximum = feature_analysis.StatisticalMaximum:StatisticalMaximumPrimitive -tods.feature_analysis.statistical_minimum = feature_analysis.StatisticalMinimum:StatisticalMinimumPrimitive -tods.feature_analysis.statistical_mean_abs = feature_analysis.StatisticalMeanAbs:StatisticalMeanAbsPrimitive -tods.feature_analysis.statistical_mean_abs_temporal_derivative = feature_analysis.StatisticalMeanAbsTemporalDerivative:StatisticalMeanAbsTemporalDerivativePrimitive -tods.feature_analysis.statistical_mean_temporal_derivative = feature_analysis.StatisticalMeanTemporalDerivative:StatisticalMeanTemporalDerivativePrimitive -tods.feature_analysis.statistical_median_abs_deviation = feature_analysis.StatisticalMedianAbsoluteDeviation:StatisticalMedianAbsoluteDeviationPrimitive -tods.feature_analysis.statistical_kurtosis = feature_analysis.StatisticalKurtosis:StatisticalKurtosisPrimitive -tods.feature_analysis.statistical_skew = feature_analysis.StatisticalSkew:StatisticalSkewPrimitive -tods.feature_analysis.statistical_std = feature_analysis.StatisticalStd:StatisticalStdPrimitive -tods.feature_analysis.statistical_var = feature_analysis.StatisticalVar:StatisticalVarPrimitive -tods.feature_analysis.statistical_variation = feature_analysis.StatisticalVariation:StatisticalVariationPrimitive -tods.feature_analysis.statistical_vec_sum = feature_analysis.StatisticalVecSum:StatisticalVecSumPrimitive -tods.feature_analysis.statistical_willison_amplitude = feature_analysis.StatisticalWillisonAmplitude:StatisticalWillisonAmplitudePrimitive -tods.feature_analysis.statistical_zero_crossing = feature_analysis.StatisticalZeroCrossing:StatisticalZeroCrossingPrimitive -tods.feature_analysis.spectral_residual_transform = feature_analysis.SpectralResidualTransform:SpectralResidualTransformPrimitive -tods.feature_analysis.fast_fourier_transform = feature_analysis.FastFourierTransform:FastFourierTransform -tods.feature_analysis.discrete_cosine_transform = feature_analysis.DiscreteCosineTransform:DiscreteCosineTransform -tods.feature_analysis.non_negative_matrix_factorization = feature_analysis.NonNegativeMatrixFactorization:NonNegativeMatrixFactorization -tods.feature_analysis.bk_filter = feature_analysis.BKFilter:BKFilter -tods.feature_analysis.hp_filter = feature_analysis.HPFilter:HPFilter -tods.feature_analysis.truncated_svd = feature_analysis.SKTruncatedSVD:SKTruncatedSVD -tods.feature_analysis.wavelet_transform = feature_analysis.WaveletTransform:WaveletTransformer -tods.feature_analysis.trmf = feature_analysis.TRMF:TRMF +tods.feature_analysis.auto_correlation = tods.feature_analysis.AutoCorrelation:AutoCorrelation +tods.feature_analysis.statistical_mean = tods.feature_analysis.StatisticalMean:StatisticalMeanPrimitive +tods.feature_analysis.statistical_median = tods.feature_analysis.StatisticalMedian:StatisticalMedianPrimitive +tods.feature_analysis.statistical_g_mean = tods.feature_analysis.StatisticalGmean:StatisticalGmeanPrimitive +tods.feature_analysis.statistical_abs_energy = tods.feature_analysis.StatisticalAbsEnergy:StatisticalAbsEnergyPrimitive +tods.feature_analysis.statistical_abs_sum = tods.feature_analysis.StatisticalAbsSum:StatisticalAbsSumPrimitive +tods.feature_analysis.statistical_h_mean = tods.feature_analysis.StatisticalHmean:StatisticalHmeanPrimitive +tods.feature_analysis.statistical_maximum = tods.feature_analysis.StatisticalMaximum:StatisticalMaximumPrimitive +tods.feature_analysis.statistical_minimum = tods.feature_analysis.StatisticalMinimum:StatisticalMinimumPrimitive +tods.feature_analysis.statistical_mean_abs = tods.feature_analysis.StatisticalMeanAbs:StatisticalMeanAbsPrimitive +tods.feature_analysis.statistical_mean_abs_temporal_derivative = tods.feature_analysis.StatisticalMeanAbsTemporalDerivative:StatisticalMeanAbsTemporalDerivativePrimitive +tods.feature_analysis.statistical_mean_temporal_derivative = tods.feature_analysis.StatisticalMeanTemporalDerivative:StatisticalMeanTemporalDerivativePrimitive +tods.feature_analysis.statistical_median_abs_deviation = tods.feature_analysis.StatisticalMedianAbsoluteDeviation:StatisticalMedianAbsoluteDeviationPrimitive +tods.feature_analysis.statistical_kurtosis = tods.feature_analysis.StatisticalKurtosis:StatisticalKurtosisPrimitive +tods.feature_analysis.statistical_skew = tods.feature_analysis.StatisticalSkew:StatisticalSkewPrimitive +tods.feature_analysis.statistical_std = tods.feature_analysis.StatisticalStd:StatisticalStdPrimitive +tods.feature_analysis.statistical_var = tods.feature_analysis.StatisticalVar:StatisticalVarPrimitive +tods.feature_analysis.statistical_variation = tods.feature_analysis.StatisticalVariation:StatisticalVariationPrimitive +tods.feature_analysis.statistical_vec_sum = tods.feature_analysis.StatisticalVecSum:StatisticalVecSumPrimitive +tods.feature_analysis.statistical_willison_amplitude = tods.feature_analysis.StatisticalWillisonAmplitude:StatisticalWillisonAmplitudePrimitive +tods.feature_analysis.statistical_zero_crossing = tods.feature_analysis.StatisticalZeroCrossing:StatisticalZeroCrossingPrimitive +tods.feature_analysis.spectral_residual_transform = tods.feature_analysis.SpectralResidualTransform:SpectralResidualTransformPrimitive +tods.feature_analysis.fast_fourier_transform = tods.feature_analysis.FastFourierTransform:FastFourierTransform +tods.feature_analysis.discrete_cosine_transform = tods.feature_analysis.DiscreteCosineTransform:DiscreteCosineTransform +tods.feature_analysis.non_negative_matrix_factorization = tods.feature_analysis.NonNegativeMatrixFactorization:NonNegativeMatrixFactorization +tods.feature_analysis.bk_filter = tods.feature_analysis.BKFilter:BKFilter +tods.feature_analysis.hp_filter = tods.feature_analysis.HPFilter:HPFilter +tods.feature_analysis.truncated_svd = tods.feature_analysis.SKTruncatedSVD:SKTruncatedSVD +tods.feature_analysis.wavelet_transform = tods.feature_analysis.WaveletTransform:WaveletTransformer +tods.feature_analysis.trmf = tods.feature_analysis.TRMF:TRMF -tods.detection_algorithm.pyod_ae = detection_algorithm.PyodAE:AutoEncoder -tods.detection_algorithm.pyod_vae = detection_algorithm.PyodVAE:VariationalAutoEncoder -tods.detection_algorithm.pyod_cof = detection_algorithm.PyodCOF:PyodCOF -tods.detection_algorithm.pyod_sod = detection_algorithm.PyodSOD:SODPrimitive -tods.detection_algorithm.pyod_abod = detection_algorithm.PyodABOD:ABODPrimitive -tods.detection_algorithm.pyod_hbos = detection_algorithm.PyodHBOS:HBOSPrimitive -tods.detection_algorithm.pyod_iforest = detection_algorithm.PyodIsolationForest:IsolationForest -tods.detection_algorithm.pyod_lof = detection_algorithm.PyodLOF:LOFPrimitive -tods.detection_algorithm.pyod_autoencoder = detection_algorithm.PyodAutoEncoder:AutoEncoderPrimitive -tods.detection_algorithm.pyod_knn = detection_algorithm.PyodKNN:KNNPrimitive -tods.detection_algorithm.pyod_ocsvm = detection_algorithm.PyodOCSVM:OCSVMPrimitive -tods.detection_algorithm.pyod_loda = detection_algorithm.PyodLODA:LODAPrimitive -tods.detection_algorithm.pyod_cblof = detection_algorithm.PyodCBLOF:CBLOFPrimitive -tods.detection_algorithm.pyod_sogaal = detection_algorithm.PyodSoGaal:So_GaalPrimitive -tods.detection_algorithm.pyod_mogaal = detection_algorithm.PyodMoGaal:Mo_GaalPrimitive +tods.detection_algorithm.pyod_ae = tods.detection_algorithm.PyodAE:AutoEncoder +tods.detection_algorithm.pyod_vae = tods.detection_algorithm.PyodVAE:VariationalAutoEncoder +tods.detection_algorithm.pyod_cof = tods.detection_algorithm.PyodCOF:PyodCOF +tods.detection_algorithm.pyod_sod = tods.detection_algorithm.PyodSOD:SODPrimitive +tods.detection_algorithm.pyod_abod = tods.detection_algorithm.PyodABOD:ABODPrimitive +tods.detection_algorithm.pyod_hbos = tods.detection_algorithm.PyodHBOS:HBOSPrimitive +tods.detection_algorithm.pyod_iforest = tods.detection_algorithm.PyodIsolationForest:IsolationForest +tods.detection_algorithm.pyod_lof = tods.detection_algorithm.PyodLOF:LOFPrimitive +tods.detection_algorithm.pyod_autoencoder = tods.detection_algorithm.PyodAutoEncoder:AutoEncoderPrimitive +tods.detection_algorithm.pyod_knn = tods.detection_algorithm.PyodKNN:KNNPrimitive +tods.detection_algorithm.pyod_ocsvm = tods.detection_algorithm.PyodOCSVM:OCSVMPrimitive +tods.detection_algorithm.pyod_loda = tods.detection_algorithm.PyodLODA:LODAPrimitive +tods.detection_algorithm.pyod_cblof = tods.detection_algorithm.PyodCBLOF:CBLOFPrimitive +tods.detection_algorithm.pyod_sogaal = tods.detection_algorithm.PyodSoGaal:So_GaalPrimitive +tods.detection_algorithm.pyod_mogaal = tods.detection_algorithm.PyodMoGaal:Mo_GaalPrimitive -tods.detection_algorithm.matrix_profile = detection_algorithm.MatrixProfile:MatrixProfile -tods.detection_algorithm.AutoRegODetector = detection_algorithm.AutoRegODetect:AutoRegODetector +tods.detection_algorithm.matrix_profile = tods.detection_algorithm.MatrixProfile:MatrixProfile +tods.detection_algorithm.AutoRegODetector = tods.detection_algorithm.AutoRegODetect:AutoRegODetector -tods.detection_algorithm.LSTMODetector = detection_algorithm.LSTMODetect:LSTMODetector -tods.detection_algorithm.AutoRegODetector = detection_algorithm.AutoRegODetect:AutoRegODetector -tods.detection_algorithm.PCAODetector = detection_algorithm.PCAODetect:PCAODetector -tods.detection_algorithm.KDiscordODetector = detection_algorithm.KDiscordODetect:KDiscordODetector -tods.detection_algorithm.deeplog = detection_algorithm.DeepLog:DeepLogPrimitive -tods.detection_algorithm.telemanom = detection_algorithm.Telemanom:TelemanomPrimitive +tods.detection_algorithm.LSTMODetector = tods.detection_algorithm.LSTMODetect:LSTMODetector +tods.detection_algorithm.AutoRegODetector = tods.detection_algorithm.AutoRegODetect:AutoRegODetector +tods.detection_algorithm.PCAODetector = tods.detection_algorithm.PCAODetect:PCAODetector +tods.detection_algorithm.KDiscordODetector = tods.detection_algorithm.KDiscordODetect:KDiscordODetector +tods.detection_algorithm.deeplog = tods.detection_algorithm.DeepLog:DeepLogPrimitive +tods.detection_algorithm.telemanom = tods.detection_algorithm.Telemanom:TelemanomPrimitive -tods.reinforcement.rule_filter = reinforcement.RuleBasedFilter:RuleBasedFilter +tods.reinforcement.rule_filter = tods.reinforcement.RuleBasedFilter:RuleBasedFilter diff --git a/entry_points_common.ini b/entry_points_common.ini new file mode 100644 index 0000000..6676944 --- /dev/null +++ b/entry_points_common.ini @@ -0,0 +1,63 @@ +[d3m.primitives] +data_preprocessing.one_hot_encoder.MakerCommon = tods.common_primitives.one_hot_maker:OneHotMakerPrimitive +data_preprocessing.one_hot_encoder.PandasCommon = tods.common_primitives.pandas_onehot_encoder:PandasOneHotEncoderPrimitive +data_transformation.extract_columns.Common = tods.common_primitives.extract_columns:ExtractColumnsPrimitive +data_transformation.extract_columns_by_semantic_types.Common = tods.common_primitives.extract_columns_semantic_types:ExtractColumnsBySemanticTypesPrimitive +data_transformation.extract_columns_by_structural_types.Common = tods.common_primitives.extract_columns_structural_types:ExtractColumnsByStructuralTypesPrimitive +data_transformation.remove_columns.Common = tods.common_primitives.remove_columns:RemoveColumnsPrimitive +data_transformation.remove_duplicate_columns.Common = tods.common_primitives.remove_duplicate_columns:RemoveDuplicateColumnsPrimitive +data_transformation.horizontal_concat.DataFrameCommon = tods.common_primitives.horizontal_concat:HorizontalConcatPrimitive +data_transformation.cast_to_type.Common = tods.common_primitives.cast_to_type:CastToTypePrimitive +data_transformation.column_parser.Common = tods.common_primitives.column_parser:ColumnParserPrimitive +data_transformation.construct_predictions.Common = tods.common_primitives.construct_predictions:ConstructPredictionsPrimitive +data_transformation.dataframe_to_ndarray.Common = tods.common_primitives.dataframe_to_ndarray:DataFrameToNDArrayPrimitive +data_transformation.ndarray_to_dataframe.Common = tods.common_primitives.ndarray_to_dataframe:NDArrayToDataFramePrimitive +data_transformation.dataframe_to_list.Common = tods.common_primitives.dataframe_to_list:DataFrameToListPrimitive +data_transformation.list_to_dataframe.Common = tods.common_primitives.list_to_dataframe:ListToDataFramePrimitive +data_transformation.ndarray_to_list.Common = tods.common_primitives.ndarray_to_list:NDArrayToListPrimitive +data_transformation.list_to_ndarray.Common = tods.common_primitives.list_to_ndarray:ListToNDArrayPrimitive +data_transformation.stack_ndarray_column.Common = tods.common_primitives.stack_ndarray_column:StackNDArrayColumnPrimitive +data_transformation.add_semantic_types.Common = tods.common_primitives.add_semantic_types:AddSemanticTypesPrimitive +data_transformation.remove_semantic_types.Common = tods.common_primitives.remove_semantic_types:RemoveSemanticTypesPrimitive +data_transformation.replace_semantic_types.Common = tods.common_primitives.replace_semantic_types:ReplaceSemanticTypesPrimitive +data_transformation.denormalize.Common = tods.common_primitives.denormalize:DenormalizePrimitive +data_transformation.datetime_field_compose.Common = tods.common_primitives.datetime_field_compose:DatetimeFieldComposePrimitive +data_transformation.grouping_field_compose.Common = tods.common_primitives.grouping_field_compose:GroupingFieldComposePrimitive +data_transformation.dataset_to_dataframe.Common = tods.common_primitives.dataset_to_dataframe:DatasetToDataFramePrimitive +data_transformation.cut_audio.Common = tods.common_primitives.cut_audio:CutAudioPrimitive +data_transformation.rename_duplicate_name.DataFrameCommon = tods.common_primitives.rename_duplicate_columns:RenameDuplicateColumnsPrimitive +#data_transformation.normalize_column_references.Common = tods.common_primitives.normalize_column_references:NormalizeColumnReferencesPrimitive +#data_transformation.normalize_graphs.Common = tods.common_primitives.normalize_graphs:NormalizeGraphsPrimitive +data_transformation.ravel.DataFrameRowCommon = tods.common_primitives.ravel:RavelAsRowPrimitive +data_preprocessing.label_encoder.Common = tods.common_primitives.unseen_label_encoder:UnseenLabelEncoderPrimitive +data_preprocessing.label_decoder.Common = tods.common_primitives.unseen_label_decoder:UnseenLabelDecoderPrimitive +data_preprocessing.image_reader.Common = tods.common_primitives.dataframe_image_reader:DataFrameImageReaderPrimitive +data_preprocessing.text_reader.Common = tods.common_primitives.text_reader:TextReaderPrimitive +data_preprocessing.video_reader.Common = tods.common_primitives.video_reader:VideoReaderPrimitive +data_preprocessing.csv_reader.Common = tods.common_primitives.csv_reader:CSVReaderPrimitive +data_preprocessing.audio_reader.Common = tods.common_primitives.audio_reader:AudioReaderPrimitive +data_preprocessing.regex_filter.Common = tods.common_primitives.regex_filter:RegexFilterPrimitive +data_preprocessing.term_filter.Common = tods.common_primitives.term_filter:TermFilterPrimitive +data_preprocessing.numeric_range_filter.Common = tods.common_primitives.numeric_range_filter:NumericRangeFilterPrimitive +data_preprocessing.datetime_range_filter.Common = tods.common_primitives.datetime_range_filter:DatetimeRangeFilterPrimitive +data_preprocessing.dataset_sample.Common = tods.common_primitives.dataset_sample:DatasetSamplePrimitive +#data_preprocessing.time_interval_transform.Common = tods.common_primitives.time_interval_transform:TimeIntervalTransformPrimitive +data_cleaning.tabular_extractor.Common = tods.common_primitives.tabular_extractor:AnnotatedTabularExtractorPrimitive +evaluation.redact_columns.Common = tods.common_primitives.redact_columns:RedactColumnsPrimitive +evaluation.kfold_dataset_split.Common = tods.common_primitives.kfold_split:KFoldDatasetSplitPrimitive +evaluation.kfold_time_series_split.Common = tods.common_primitives.kfold_split_timeseries:KFoldTimeSeriesSplitPrimitive +evaluation.train_score_dataset_split.Common = tods.common_primitives.train_score_split:TrainScoreDatasetSplitPrimitive +evaluation.no_split_dataset_split.Common = tods.common_primitives.no_split:NoSplitDatasetSplitPrimitive +evaluation.fixed_split_dataset_split.Commmon = tods.common_primitives.fixed_split:FixedSplitDatasetSplitPrimitive +classification.random_forest.Common = tods.common_primitives.random_forest:RandomForestClassifierPrimitive +classification.light_gbm.Common = tods.common_primitives.lgbm_classifier:LightGBMClassifierPrimitive +classification.xgboost_gbtree.Common = tods.common_primitives.xgboost_gbtree:XGBoostGBTreeClassifierPrimitive +classification.xgboost_dart.Common = tods.common_primitives.xgboost_dart:XGBoostDartClassifierPrimitive +regression.xgboost_gbtree.Common = tods.common_primitives.xgboost_regressor:XGBoostGBTreeRegressorPrimitive +schema_discovery.profiler.Common = tods.common_primitives.simple_profiler:SimpleProfilerPrimitive +operator.column_map.Common = tods.common_primitives.column_map:DataFrameColumnMapPrimitive +operator.dataset_map.DataFrameCommon = tods.common_primitives.dataset_map:DataFrameDatasetMapPrimitive +data_preprocessing.flatten.DataFrameCommon = tods.common_primitives.dataframe_flatten:DataFrameFlattenPrimitive +metalearning.metafeature_extractor.Common = tods.common_primitives.compute_metafeatures:ComputeMetafeaturesPrimitive +data_augmentation.datamart_augmentation.Common = tods.common_primitives.datamart_augment:DataMartAugmentPrimitive +data_augmentation.datamart_download.Common = tods.common_primitives.datamart_download:DataMartDownloadPrimitive diff --git a/setup.py b/setup.py index 80506cd..04ce8e3 100644 --- a/setup.py +++ b/setup.py @@ -10,7 +10,7 @@ def read_file_entry_points(fname): return entry_points.read() def merge_entry_points(): - entry_list = ['entry_points.ini'] + entry_list = ['entry_points.ini', 'entry_points_common.ini'] merge_entry = [] for entry_name in entry_list: entry_point = read_file_entry_points(entry_name).replace(' ', '') @@ -29,6 +29,7 @@ setup( install_requires=[ 'd3m', 'Jinja2', + 'GitPython==3.1.0', 'simplejson==3.12.0', 'scikit-learn==0.22.0', 'statsmodels==0.11.1', @@ -38,7 +39,9 @@ setup( 'pyod', 'nimfa==1.4.0', 'stumpy==1.4.0', - 'more-itertools==8.5.0' + 'more-itertools==8.5.0', + 'gitdb2==2.0.6', + 'gitdb==0.6.4' ], entry_points = merge_entry_points() diff --git a/test.sh b/test.sh index af60b73..51587ff 100644 --- a/test.sh +++ b/test.sh @@ -1,7 +1,7 @@ #!/bin/bash -#test_scripts=$(ls tests) -test_scripts=$(ls tests | grep -v -f tested_file.txt) +test_scripts=$(ls tests) +#test_scripts=$(ls tests | grep -v -f tested_file.txt) for file in $test_scripts do diff --git a/common-primitives/common_primitives/__init__.py b/tods/common_primitives/__init__.py similarity index 100% rename from common-primitives/common_primitives/__init__.py rename to tods/common_primitives/__init__.py diff --git a/common-primitives/common_primitives/add_semantic_types.py b/tods/common_primitives/add_semantic_types.py similarity index 100% rename from common-primitives/common_primitives/add_semantic_types.py rename to tods/common_primitives/add_semantic_types.py diff --git a/common-primitives/common_primitives/audio_reader.py b/tods/common_primitives/audio_reader.py similarity index 100% rename from common-primitives/common_primitives/audio_reader.py rename to tods/common_primitives/audio_reader.py diff --git a/common-primitives/common_primitives/base.py b/tods/common_primitives/base.py similarity index 100% rename from common-primitives/common_primitives/base.py rename to tods/common_primitives/base.py diff --git a/common-primitives/common_primitives/cast_to_type.py b/tods/common_primitives/cast_to_type.py similarity index 100% rename from common-primitives/common_primitives/cast_to_type.py rename to tods/common_primitives/cast_to_type.py diff --git a/common-primitives/common_primitives/column_map.py b/tods/common_primitives/column_map.py similarity index 100% rename from common-primitives/common_primitives/column_map.py rename to tods/common_primitives/column_map.py diff --git a/common-primitives/common_primitives/column_parser.py b/tods/common_primitives/column_parser.py similarity index 100% rename from common-primitives/common_primitives/column_parser.py rename to tods/common_primitives/column_parser.py diff --git a/common-primitives/common_primitives/compute_metafeatures.py b/tods/common_primitives/compute_metafeatures.py similarity index 100% rename from common-primitives/common_primitives/compute_metafeatures.py rename to tods/common_primitives/compute_metafeatures.py diff --git a/common-primitives/common_primitives/construct_predictions.py b/tods/common_primitives/construct_predictions.py similarity index 100% rename from common-primitives/common_primitives/construct_predictions.py rename to tods/common_primitives/construct_predictions.py diff --git a/common-primitives/common_primitives/csv_reader.py b/tods/common_primitives/csv_reader.py similarity index 100% rename from common-primitives/common_primitives/csv_reader.py rename to tods/common_primitives/csv_reader.py diff --git a/common-primitives/common_primitives/cut_audio.py b/tods/common_primitives/cut_audio.py similarity index 100% rename from common-primitives/common_primitives/cut_audio.py rename to tods/common_primitives/cut_audio.py diff --git a/common-primitives/common_primitives/dataframe_flatten.py b/tods/common_primitives/dataframe_flatten.py similarity index 100% rename from common-primitives/common_primitives/dataframe_flatten.py rename to tods/common_primitives/dataframe_flatten.py diff --git a/common-primitives/common_primitives/dataframe_image_reader.py b/tods/common_primitives/dataframe_image_reader.py similarity index 100% rename from common-primitives/common_primitives/dataframe_image_reader.py rename to tods/common_primitives/dataframe_image_reader.py diff --git a/common-primitives/common_primitives/dataframe_to_list.py b/tods/common_primitives/dataframe_to_list.py similarity index 100% rename from common-primitives/common_primitives/dataframe_to_list.py rename to tods/common_primitives/dataframe_to_list.py diff --git a/common-primitives/common_primitives/dataframe_to_ndarray.py b/tods/common_primitives/dataframe_to_ndarray.py similarity index 100% rename from common-primitives/common_primitives/dataframe_to_ndarray.py rename to tods/common_primitives/dataframe_to_ndarray.py diff --git a/common-primitives/common_primitives/dataframe_utils.py b/tods/common_primitives/dataframe_utils.py similarity index 100% rename from common-primitives/common_primitives/dataframe_utils.py rename to tods/common_primitives/dataframe_utils.py diff --git a/common-primitives/common_primitives/datamart_augment.py b/tods/common_primitives/datamart_augment.py similarity index 100% rename from common-primitives/common_primitives/datamart_augment.py rename to tods/common_primitives/datamart_augment.py diff --git a/common-primitives/common_primitives/datamart_download.py b/tods/common_primitives/datamart_download.py similarity index 100% rename from common-primitives/common_primitives/datamart_download.py rename to tods/common_primitives/datamart_download.py diff --git a/common-primitives/common_primitives/dataset_map.py b/tods/common_primitives/dataset_map.py similarity index 100% rename from common-primitives/common_primitives/dataset_map.py rename to tods/common_primitives/dataset_map.py diff --git a/common-primitives/common_primitives/dataset_sample.py b/tods/common_primitives/dataset_sample.py similarity index 100% rename from common-primitives/common_primitives/dataset_sample.py rename to tods/common_primitives/dataset_sample.py diff --git a/common-primitives/common_primitives/dataset_to_dataframe.py b/tods/common_primitives/dataset_to_dataframe.py similarity index 99% rename from common-primitives/common_primitives/dataset_to_dataframe.py rename to tods/common_primitives/dataset_to_dataframe.py index 4f8abe3..a499e8c 100644 --- a/common-primitives/common_primitives/dataset_to_dataframe.py +++ b/tods/common_primitives/dataset_to_dataframe.py @@ -6,7 +6,7 @@ from d3m.base import utils as base_utils from d3m.metadata import base as metadata_base, hyperparams from d3m.primitive_interfaces import base, transformer import logging -import common_primitives +import common_primitives __all__ = ('DatasetToDataFramePrimitive',) diff --git a/common-primitives/common_primitives/dataset_utils.py b/tods/common_primitives/dataset_utils.py similarity index 100% rename from common-primitives/common_primitives/dataset_utils.py rename to tods/common_primitives/dataset_utils.py diff --git a/common-primitives/common_primitives/datetime_field_compose.py b/tods/common_primitives/datetime_field_compose.py similarity index 100% rename from common-primitives/common_primitives/datetime_field_compose.py rename to tods/common_primitives/datetime_field_compose.py diff --git a/common-primitives/common_primitives/datetime_range_filter.py b/tods/common_primitives/datetime_range_filter.py similarity index 100% rename from common-primitives/common_primitives/datetime_range_filter.py rename to tods/common_primitives/datetime_range_filter.py diff --git a/common-primitives/common_primitives/denormalize.py b/tods/common_primitives/denormalize.py similarity index 100% rename from common-primitives/common_primitives/denormalize.py rename to tods/common_primitives/denormalize.py diff --git a/common-primitives/common_primitives/extract_columns.py b/tods/common_primitives/extract_columns.py similarity index 100% rename from common-primitives/common_primitives/extract_columns.py rename to tods/common_primitives/extract_columns.py diff --git a/common-primitives/common_primitives/extract_columns_semantic_types.py b/tods/common_primitives/extract_columns_semantic_types.py similarity index 100% rename from common-primitives/common_primitives/extract_columns_semantic_types.py rename to tods/common_primitives/extract_columns_semantic_types.py diff --git a/common-primitives/common_primitives/extract_columns_structural_types.py b/tods/common_primitives/extract_columns_structural_types.py similarity index 100% rename from common-primitives/common_primitives/extract_columns_structural_types.py rename to tods/common_primitives/extract_columns_structural_types.py diff --git a/common-primitives/common_primitives/fixed_split.py b/tods/common_primitives/fixed_split.py similarity index 100% rename from common-primitives/common_primitives/fixed_split.py rename to tods/common_primitives/fixed_split.py diff --git a/common-primitives/common_primitives/grouping_field_compose.py b/tods/common_primitives/grouping_field_compose.py similarity index 100% rename from common-primitives/common_primitives/grouping_field_compose.py rename to tods/common_primitives/grouping_field_compose.py diff --git a/common-primitives/common_primitives/holt_smoothing.py b/tods/common_primitives/holt_smoothing.py similarity index 100% rename from common-primitives/common_primitives/holt_smoothing.py rename to tods/common_primitives/holt_smoothing.py diff --git a/common-primitives/common_primitives/holt_winters_exponential_smoothing.py b/tods/common_primitives/holt_winters_exponential_smoothing.py similarity index 100% rename from common-primitives/common_primitives/holt_winters_exponential_smoothing.py rename to tods/common_primitives/holt_winters_exponential_smoothing.py diff --git a/common-primitives/common_primitives/horizontal_concat.py b/tods/common_primitives/horizontal_concat.py similarity index 100% rename from common-primitives/common_primitives/horizontal_concat.py rename to tods/common_primitives/horizontal_concat.py diff --git a/common-primitives/common_primitives/kfold_split.py b/tods/common_primitives/kfold_split.py similarity index 100% rename from common-primitives/common_primitives/kfold_split.py rename to tods/common_primitives/kfold_split.py diff --git a/common-primitives/common_primitives/kfold_split_timeseries.py b/tods/common_primitives/kfold_split_timeseries.py similarity index 100% rename from common-primitives/common_primitives/kfold_split_timeseries.py rename to tods/common_primitives/kfold_split_timeseries.py diff --git a/common-primitives/common_primitives/lgbm_classifier.py b/tods/common_primitives/lgbm_classifier.py similarity index 100% rename from common-primitives/common_primitives/lgbm_classifier.py rename to tods/common_primitives/lgbm_classifier.py diff --git a/common-primitives/common_primitives/list_to_dataframe.py b/tods/common_primitives/list_to_dataframe.py similarity index 100% rename from common-primitives/common_primitives/list_to_dataframe.py rename to tods/common_primitives/list_to_dataframe.py diff --git a/common-primitives/common_primitives/list_to_ndarray.py b/tods/common_primitives/list_to_ndarray.py similarity index 100% rename from common-primitives/common_primitives/list_to_ndarray.py rename to tods/common_primitives/list_to_ndarray.py diff --git a/common-primitives/common_primitives/mean_average_transform.py b/tods/common_primitives/mean_average_transform.py similarity index 100% rename from common-primitives/common_primitives/mean_average_transform.py rename to tods/common_primitives/mean_average_transform.py diff --git a/common-primitives/common_primitives/ndarray_to_dataframe.py b/tods/common_primitives/ndarray_to_dataframe.py similarity index 100% rename from common-primitives/common_primitives/ndarray_to_dataframe.py rename to tods/common_primitives/ndarray_to_dataframe.py diff --git a/common-primitives/common_primitives/ndarray_to_list.py b/tods/common_primitives/ndarray_to_list.py similarity index 100% rename from common-primitives/common_primitives/ndarray_to_list.py rename to tods/common_primitives/ndarray_to_list.py diff --git a/common-primitives/common_primitives/no_split.py b/tods/common_primitives/no_split.py similarity index 100% rename from common-primitives/common_primitives/no_split.py rename to tods/common_primitives/no_split.py diff --git a/common-primitives/common_primitives/normalize_column_references.py b/tods/common_primitives/normalize_column_references.py similarity index 100% rename from common-primitives/common_primitives/normalize_column_references.py rename to tods/common_primitives/normalize_column_references.py diff --git a/common-primitives/common_primitives/normalize_graphs.py b/tods/common_primitives/normalize_graphs.py similarity index 100% rename from common-primitives/common_primitives/normalize_graphs.py rename to tods/common_primitives/normalize_graphs.py diff --git a/common-primitives/common_primitives/numeric_range_filter.py b/tods/common_primitives/numeric_range_filter.py similarity index 100% rename from common-primitives/common_primitives/numeric_range_filter.py rename to tods/common_primitives/numeric_range_filter.py diff --git a/common-primitives/common_primitives/one_hot_maker.py b/tods/common_primitives/one_hot_maker.py similarity index 100% rename from common-primitives/common_primitives/one_hot_maker.py rename to tods/common_primitives/one_hot_maker.py diff --git a/common-primitives/common_primitives/pandas_onehot_encoder.py b/tods/common_primitives/pandas_onehot_encoder.py similarity index 100% rename from common-primitives/common_primitives/pandas_onehot_encoder.py rename to tods/common_primitives/pandas_onehot_encoder.py diff --git a/common-primitives/common_primitives/random_forest.py b/tods/common_primitives/random_forest.py similarity index 100% rename from common-primitives/common_primitives/random_forest.py rename to tods/common_primitives/random_forest.py diff --git a/common-primitives/common_primitives/ravel.py b/tods/common_primitives/ravel.py similarity index 100% rename from common-primitives/common_primitives/ravel.py rename to tods/common_primitives/ravel.py diff --git a/common-primitives/common_primitives/redact_columns.py b/tods/common_primitives/redact_columns.py similarity index 100% rename from common-primitives/common_primitives/redact_columns.py rename to tods/common_primitives/redact_columns.py diff --git a/common-primitives/common_primitives/regex_filter.py b/tods/common_primitives/regex_filter.py similarity index 100% rename from common-primitives/common_primitives/regex_filter.py rename to tods/common_primitives/regex_filter.py diff --git a/common-primitives/common_primitives/remove_columns.py b/tods/common_primitives/remove_columns.py similarity index 100% rename from common-primitives/common_primitives/remove_columns.py rename to tods/common_primitives/remove_columns.py diff --git a/common-primitives/common_primitives/remove_duplicate_columns.py b/tods/common_primitives/remove_duplicate_columns.py similarity index 100% rename from common-primitives/common_primitives/remove_duplicate_columns.py rename to tods/common_primitives/remove_duplicate_columns.py diff --git a/common-primitives/common_primitives/remove_semantic_types.py b/tods/common_primitives/remove_semantic_types.py similarity index 100% rename from common-primitives/common_primitives/remove_semantic_types.py rename to tods/common_primitives/remove_semantic_types.py diff --git a/common-primitives/common_primitives/rename_duplicate_columns.py b/tods/common_primitives/rename_duplicate_columns.py similarity index 100% rename from common-primitives/common_primitives/rename_duplicate_columns.py rename to tods/common_primitives/rename_duplicate_columns.py diff --git a/common-primitives/common_primitives/replace_semantic_types.py b/tods/common_primitives/replace_semantic_types.py similarity index 100% rename from common-primitives/common_primitives/replace_semantic_types.py rename to tods/common_primitives/replace_semantic_types.py diff --git a/common-primitives/common_primitives/simple_exponential_smoothing.py b/tods/common_primitives/simple_exponential_smoothing.py similarity index 100% rename from common-primitives/common_primitives/simple_exponential_smoothing.py rename to tods/common_primitives/simple_exponential_smoothing.py diff --git a/common-primitives/common_primitives/simple_profiler.py b/tods/common_primitives/simple_profiler.py similarity index 100% rename from common-primitives/common_primitives/simple_profiler.py rename to tods/common_primitives/simple_profiler.py diff --git a/common-primitives/common_primitives/slacker/README.md b/tods/common_primitives/slacker/README.md similarity index 100% rename from common-primitives/common_primitives/slacker/README.md rename to tods/common_primitives/slacker/README.md diff --git a/common-primitives/common_primitives/slacker/__init__.py b/tods/common_primitives/slacker/__init__.py similarity index 100% rename from common-primitives/common_primitives/slacker/__init__.py rename to tods/common_primitives/slacker/__init__.py diff --git a/common-primitives/common_primitives/slacker/base.py b/tods/common_primitives/slacker/base.py similarity index 100% rename from common-primitives/common_primitives/slacker/base.py rename to tods/common_primitives/slacker/base.py diff --git a/common-primitives/common_primitives/slacker/estimation.py b/tods/common_primitives/slacker/estimation.py similarity index 100% rename from common-primitives/common_primitives/slacker/estimation.py rename to tods/common_primitives/slacker/estimation.py diff --git a/common-primitives/common_primitives/slacker/feature_extraction.py b/tods/common_primitives/slacker/feature_extraction.py similarity index 100% rename from common-primitives/common_primitives/slacker/feature_extraction.py rename to tods/common_primitives/slacker/feature_extraction.py diff --git a/common-primitives/common_primitives/slacker/feature_selection.py b/tods/common_primitives/slacker/feature_selection.py similarity index 100% rename from common-primitives/common_primitives/slacker/feature_selection.py rename to tods/common_primitives/slacker/feature_selection.py diff --git a/common-primitives/common_primitives/stack_ndarray_column.py b/tods/common_primitives/stack_ndarray_column.py similarity index 100% rename from common-primitives/common_primitives/stack_ndarray_column.py rename to tods/common_primitives/stack_ndarray_column.py diff --git a/common-primitives/common_primitives/tabular_extractor.py b/tods/common_primitives/tabular_extractor.py similarity index 100% rename from common-primitives/common_primitives/tabular_extractor.py rename to tods/common_primitives/tabular_extractor.py diff --git a/common-primitives/common_primitives/term_filter.py b/tods/common_primitives/term_filter.py similarity index 100% rename from common-primitives/common_primitives/term_filter.py rename to tods/common_primitives/term_filter.py diff --git a/common-primitives/common_primitives/text_reader.py b/tods/common_primitives/text_reader.py similarity index 100% rename from common-primitives/common_primitives/text_reader.py rename to tods/common_primitives/text_reader.py diff --git a/common-primitives/common_primitives/train_score_split.py b/tods/common_primitives/train_score_split.py similarity index 100% rename from common-primitives/common_primitives/train_score_split.py rename to tods/common_primitives/train_score_split.py diff --git a/common-primitives/common_primitives/unseen_label_decoder.py b/tods/common_primitives/unseen_label_decoder.py similarity index 100% rename from common-primitives/common_primitives/unseen_label_decoder.py rename to tods/common_primitives/unseen_label_decoder.py diff --git a/common-primitives/common_primitives/unseen_label_encoder.py b/tods/common_primitives/unseen_label_encoder.py similarity index 100% rename from common-primitives/common_primitives/unseen_label_encoder.py rename to tods/common_primitives/unseen_label_encoder.py diff --git a/common-primitives/common_primitives/utils.py b/tods/common_primitives/utils.py similarity index 100% rename from common-primitives/common_primitives/utils.py rename to tods/common_primitives/utils.py diff --git a/common-primitives/common_primitives/video_reader.py b/tods/common_primitives/video_reader.py similarity index 100% rename from common-primitives/common_primitives/video_reader.py rename to tods/common_primitives/video_reader.py diff --git a/common-primitives/common_primitives/xgboost_dart.py b/tods/common_primitives/xgboost_dart.py similarity index 100% rename from common-primitives/common_primitives/xgboost_dart.py rename to tods/common_primitives/xgboost_dart.py diff --git a/common-primitives/common_primitives/xgboost_gbtree.py b/tods/common_primitives/xgboost_gbtree.py similarity index 100% rename from common-primitives/common_primitives/xgboost_gbtree.py rename to tods/common_primitives/xgboost_gbtree.py diff --git a/common-primitives/common_primitives/xgboost_regressor.py b/tods/common_primitives/xgboost_regressor.py similarity index 100% rename from common-primitives/common_primitives/xgboost_regressor.py rename to tods/common_primitives/xgboost_regressor.py diff --git a/data_processing/CategoricalToBinary.py b/tods/data_processing/CategoricalToBinary.py similarity index 100% rename from data_processing/CategoricalToBinary.py rename to tods/data_processing/CategoricalToBinary.py diff --git a/data_processing/ColumnFilter.py b/tods/data_processing/ColumnFilter.py similarity index 100% rename from data_processing/ColumnFilter.py rename to tods/data_processing/ColumnFilter.py diff --git a/data_processing/ContinuityValidation.py b/tods/data_processing/ContinuityValidation.py similarity index 100% rename from data_processing/ContinuityValidation.py rename to tods/data_processing/ContinuityValidation.py diff --git a/data_processing/DatasetToDataframe.py b/tods/data_processing/DatasetToDataframe.py similarity index 100% rename from data_processing/DatasetToDataframe.py rename to tods/data_processing/DatasetToDataframe.py diff --git a/data_processing/DuplicationValidation.py b/tods/data_processing/DuplicationValidation.py similarity index 100% rename from data_processing/DuplicationValidation.py rename to tods/data_processing/DuplicationValidation.py diff --git a/data_processing/TimeIntervalTransform.py b/tods/data_processing/TimeIntervalTransform.py similarity index 100% rename from data_processing/TimeIntervalTransform.py rename to tods/data_processing/TimeIntervalTransform.py diff --git a/data_processing/TimeStampValidation.py b/tods/data_processing/TimeStampValidation.py similarity index 100% rename from data_processing/TimeStampValidation.py rename to tods/data_processing/TimeStampValidation.py diff --git a/data_processing/__init__.py b/tods/data_processing/__init__.py similarity index 100% rename from data_processing/__init__.py rename to tods/data_processing/__init__.py diff --git a/detection_algorithm/AutoRegODetect.py b/tods/detection_algorithm/AutoRegODetect.py similarity index 100% rename from detection_algorithm/AutoRegODetect.py rename to tods/detection_algorithm/AutoRegODetect.py diff --git a/detection_algorithm/DeepLog.py b/tods/detection_algorithm/DeepLog.py similarity index 100% rename from detection_algorithm/DeepLog.py rename to tods/detection_algorithm/DeepLog.py diff --git a/detection_algorithm/KDiscordODetect.py b/tods/detection_algorithm/KDiscordODetect.py similarity index 100% rename from detection_algorithm/KDiscordODetect.py rename to tods/detection_algorithm/KDiscordODetect.py diff --git a/detection_algorithm/LSTMODetect.py b/tods/detection_algorithm/LSTMODetect.py similarity index 100% rename from detection_algorithm/LSTMODetect.py rename to tods/detection_algorithm/LSTMODetect.py diff --git a/detection_algorithm/MatrixProfile.py b/tods/detection_algorithm/MatrixProfile.py similarity index 100% rename from detection_algorithm/MatrixProfile.py rename to tods/detection_algorithm/MatrixProfile.py diff --git a/detection_algorithm/PCAODetect.py b/tods/detection_algorithm/PCAODetect.py similarity index 100% rename from detection_algorithm/PCAODetect.py rename to tods/detection_algorithm/PCAODetect.py diff --git a/detection_algorithm/PyodABOD.py b/tods/detection_algorithm/PyodABOD.py similarity index 100% rename from detection_algorithm/PyodABOD.py rename to tods/detection_algorithm/PyodABOD.py diff --git a/detection_algorithm/PyodAE.py b/tods/detection_algorithm/PyodAE.py similarity index 100% rename from detection_algorithm/PyodAE.py rename to tods/detection_algorithm/PyodAE.py diff --git a/detection_algorithm/PyodCBLOF.py b/tods/detection_algorithm/PyodCBLOF.py similarity index 100% rename from detection_algorithm/PyodCBLOF.py rename to tods/detection_algorithm/PyodCBLOF.py diff --git a/detection_algorithm/PyodCOF.py b/tods/detection_algorithm/PyodCOF.py similarity index 100% rename from detection_algorithm/PyodCOF.py rename to tods/detection_algorithm/PyodCOF.py diff --git a/detection_algorithm/PyodHBOS.py b/tods/detection_algorithm/PyodHBOS.py similarity index 100% rename from detection_algorithm/PyodHBOS.py rename to tods/detection_algorithm/PyodHBOS.py diff --git a/detection_algorithm/PyodIsolationForest.py b/tods/detection_algorithm/PyodIsolationForest.py similarity index 100% rename from detection_algorithm/PyodIsolationForest.py rename to tods/detection_algorithm/PyodIsolationForest.py diff --git a/detection_algorithm/PyodKNN.py b/tods/detection_algorithm/PyodKNN.py similarity index 100% rename from detection_algorithm/PyodKNN.py rename to tods/detection_algorithm/PyodKNN.py diff --git a/detection_algorithm/PyodLODA.py b/tods/detection_algorithm/PyodLODA.py similarity index 100% rename from detection_algorithm/PyodLODA.py rename to tods/detection_algorithm/PyodLODA.py diff --git a/detection_algorithm/PyodLOF.py b/tods/detection_algorithm/PyodLOF.py similarity index 100% rename from detection_algorithm/PyodLOF.py rename to tods/detection_algorithm/PyodLOF.py diff --git a/detection_algorithm/PyodMoGaal.py b/tods/detection_algorithm/PyodMoGaal.py similarity index 100% rename from detection_algorithm/PyodMoGaal.py rename to tods/detection_algorithm/PyodMoGaal.py diff --git a/detection_algorithm/PyodOCSVM.py b/tods/detection_algorithm/PyodOCSVM.py similarity index 100% rename from detection_algorithm/PyodOCSVM.py rename to tods/detection_algorithm/PyodOCSVM.py diff --git a/detection_algorithm/PyodSOD.py b/tods/detection_algorithm/PyodSOD.py similarity index 100% rename from detection_algorithm/PyodSOD.py rename to tods/detection_algorithm/PyodSOD.py diff --git a/detection_algorithm/PyodSoGaal.py b/tods/detection_algorithm/PyodSoGaal.py similarity index 100% rename from detection_algorithm/PyodSoGaal.py rename to tods/detection_algorithm/PyodSoGaal.py diff --git a/detection_algorithm/PyodVAE.py b/tods/detection_algorithm/PyodVAE.py similarity index 100% rename from detection_algorithm/PyodVAE.py rename to tods/detection_algorithm/PyodVAE.py diff --git a/detection_algorithm/Telemanom.py b/tods/detection_algorithm/Telemanom.py similarity index 100% rename from detection_algorithm/Telemanom.py rename to tods/detection_algorithm/Telemanom.py diff --git a/detection_algorithm/UODBasePrimitive.py b/tods/detection_algorithm/UODBasePrimitive.py similarity index 100% rename from detection_algorithm/UODBasePrimitive.py rename to tods/detection_algorithm/UODBasePrimitive.py diff --git a/detection_algorithm/core/AutoRegOD.py b/tods/detection_algorithm/core/AutoRegOD.py similarity index 100% rename from detection_algorithm/core/AutoRegOD.py rename to tods/detection_algorithm/core/AutoRegOD.py diff --git a/detection_algorithm/core/CollectiveBase.py b/tods/detection_algorithm/core/CollectiveBase.py similarity index 100% rename from detection_algorithm/core/CollectiveBase.py rename to tods/detection_algorithm/core/CollectiveBase.py diff --git a/detection_algorithm/core/CollectiveCommonTest.py b/tods/detection_algorithm/core/CollectiveCommonTest.py similarity index 100% rename from detection_algorithm/core/CollectiveCommonTest.py rename to tods/detection_algorithm/core/CollectiveCommonTest.py diff --git a/detection_algorithm/core/KDiscord.py b/tods/detection_algorithm/core/KDiscord.py similarity index 100% rename from detection_algorithm/core/KDiscord.py rename to tods/detection_algorithm/core/KDiscord.py diff --git a/detection_algorithm/core/LSTMOD.py b/tods/detection_algorithm/core/LSTMOD.py similarity index 100% rename from detection_algorithm/core/LSTMOD.py rename to tods/detection_algorithm/core/LSTMOD.py diff --git a/detection_algorithm/core/MultiAutoRegOD.py b/tods/detection_algorithm/core/MultiAutoRegOD.py similarity index 100% rename from detection_algorithm/core/MultiAutoRegOD.py rename to tods/detection_algorithm/core/MultiAutoRegOD.py diff --git a/detection_algorithm/core/PCA.py b/tods/detection_algorithm/core/PCA.py similarity index 100% rename from detection_algorithm/core/PCA.py rename to tods/detection_algorithm/core/PCA.py diff --git a/detection_algorithm/core/UODCommonTest.py b/tods/detection_algorithm/core/UODCommonTest.py similarity index 100% rename from detection_algorithm/core/UODCommonTest.py rename to tods/detection_algorithm/core/UODCommonTest.py diff --git a/detection_algorithm/core/algorithm_implementation.py b/tods/detection_algorithm/core/algorithm_implementation.py similarity index 100% rename from detection_algorithm/core/algorithm_implementation.py rename to tods/detection_algorithm/core/algorithm_implementation.py diff --git a/detection_algorithm/core/test_CollectiveBase.py b/tods/detection_algorithm/core/test_CollectiveBase.py similarity index 100% rename from detection_algorithm/core/test_CollectiveBase.py rename to tods/detection_algorithm/core/test_CollectiveBase.py diff --git a/detection_algorithm/core/utility.py b/tods/detection_algorithm/core/utility.py similarity index 100% rename from detection_algorithm/core/utility.py rename to tods/detection_algorithm/core/utility.py diff --git a/detection_algorithm/core/utils/channel.py b/tods/detection_algorithm/core/utils/channel.py similarity index 100% rename from detection_algorithm/core/utils/channel.py rename to tods/detection_algorithm/core/utils/channel.py diff --git a/detection_algorithm/core/utils/errors.py b/tods/detection_algorithm/core/utils/errors.py similarity index 100% rename from detection_algorithm/core/utils/errors.py rename to tods/detection_algorithm/core/utils/errors.py diff --git a/detection_algorithm/core/utils/modeling.py b/tods/detection_algorithm/core/utils/modeling.py similarity index 100% rename from detection_algorithm/core/utils/modeling.py rename to tods/detection_algorithm/core/utils/modeling.py diff --git a/detection_algorithm/core/utils/utils.py b/tods/detection_algorithm/core/utils/utils.py similarity index 100% rename from detection_algorithm/core/utils/utils.py rename to tods/detection_algorithm/core/utils/utils.py diff --git a/feature_analysis/AutoCorrelation.py b/tods/feature_analysis/AutoCorrelation.py similarity index 100% rename from feature_analysis/AutoCorrelation.py rename to tods/feature_analysis/AutoCorrelation.py diff --git a/feature_analysis/BKFilter.py b/tods/feature_analysis/BKFilter.py similarity index 100% rename from feature_analysis/BKFilter.py rename to tods/feature_analysis/BKFilter.py diff --git a/feature_analysis/DiscreteCosineTransform.py b/tods/feature_analysis/DiscreteCosineTransform.py similarity index 100% rename from feature_analysis/DiscreteCosineTransform.py rename to tods/feature_analysis/DiscreteCosineTransform.py diff --git a/feature_analysis/FastFourierTransform.py b/tods/feature_analysis/FastFourierTransform.py similarity index 100% rename from feature_analysis/FastFourierTransform.py rename to tods/feature_analysis/FastFourierTransform.py diff --git a/feature_analysis/HPFilter.py b/tods/feature_analysis/HPFilter.py similarity index 100% rename from feature_analysis/HPFilter.py rename to tods/feature_analysis/HPFilter.py diff --git a/feature_analysis/NonNegativeMatrixFactorization.py b/tods/feature_analysis/NonNegativeMatrixFactorization.py similarity index 100% rename from feature_analysis/NonNegativeMatrixFactorization.py rename to tods/feature_analysis/NonNegativeMatrixFactorization.py diff --git a/feature_analysis/SKTruncatedSVD.py b/tods/feature_analysis/SKTruncatedSVD.py similarity index 100% rename from feature_analysis/SKTruncatedSVD.py rename to tods/feature_analysis/SKTruncatedSVD.py diff --git a/feature_analysis/SpectralResidualTransform.py b/tods/feature_analysis/SpectralResidualTransform.py similarity index 100% rename from feature_analysis/SpectralResidualTransform.py rename to tods/feature_analysis/SpectralResidualTransform.py diff --git a/feature_analysis/StatisticalAbsEnergy.py b/tods/feature_analysis/StatisticalAbsEnergy.py similarity index 100% rename from feature_analysis/StatisticalAbsEnergy.py rename to tods/feature_analysis/StatisticalAbsEnergy.py diff --git a/feature_analysis/StatisticalAbsSum.py b/tods/feature_analysis/StatisticalAbsSum.py similarity index 100% rename from feature_analysis/StatisticalAbsSum.py rename to tods/feature_analysis/StatisticalAbsSum.py diff --git a/feature_analysis/StatisticalGmean.py b/tods/feature_analysis/StatisticalGmean.py similarity index 100% rename from feature_analysis/StatisticalGmean.py rename to tods/feature_analysis/StatisticalGmean.py diff --git a/feature_analysis/StatisticalHmean.py b/tods/feature_analysis/StatisticalHmean.py similarity index 100% rename from feature_analysis/StatisticalHmean.py rename to tods/feature_analysis/StatisticalHmean.py diff --git a/feature_analysis/StatisticalKurtosis.py b/tods/feature_analysis/StatisticalKurtosis.py similarity index 100% rename from feature_analysis/StatisticalKurtosis.py rename to tods/feature_analysis/StatisticalKurtosis.py diff --git a/feature_analysis/StatisticalMaximum.py b/tods/feature_analysis/StatisticalMaximum.py similarity index 100% rename from feature_analysis/StatisticalMaximum.py rename to tods/feature_analysis/StatisticalMaximum.py diff --git a/feature_analysis/StatisticalMean.py b/tods/feature_analysis/StatisticalMean.py similarity index 100% rename from feature_analysis/StatisticalMean.py rename to tods/feature_analysis/StatisticalMean.py diff --git a/feature_analysis/StatisticalMeanAbs.py b/tods/feature_analysis/StatisticalMeanAbs.py similarity index 100% rename from feature_analysis/StatisticalMeanAbs.py rename to tods/feature_analysis/StatisticalMeanAbs.py diff --git a/feature_analysis/StatisticalMeanAbsTemporalDerivative.py b/tods/feature_analysis/StatisticalMeanAbsTemporalDerivative.py similarity index 100% rename from feature_analysis/StatisticalMeanAbsTemporalDerivative.py rename to tods/feature_analysis/StatisticalMeanAbsTemporalDerivative.py diff --git a/feature_analysis/StatisticalMeanTemporalDerivative.py b/tods/feature_analysis/StatisticalMeanTemporalDerivative.py similarity index 100% rename from feature_analysis/StatisticalMeanTemporalDerivative.py rename to tods/feature_analysis/StatisticalMeanTemporalDerivative.py diff --git a/feature_analysis/StatisticalMedian.py b/tods/feature_analysis/StatisticalMedian.py similarity index 100% rename from feature_analysis/StatisticalMedian.py rename to tods/feature_analysis/StatisticalMedian.py diff --git a/feature_analysis/StatisticalMedianAbsoluteDeviation.py b/tods/feature_analysis/StatisticalMedianAbsoluteDeviation.py similarity index 100% rename from feature_analysis/StatisticalMedianAbsoluteDeviation.py rename to tods/feature_analysis/StatisticalMedianAbsoluteDeviation.py diff --git a/feature_analysis/StatisticalMinimum.py b/tods/feature_analysis/StatisticalMinimum.py similarity index 100% rename from feature_analysis/StatisticalMinimum.py rename to tods/feature_analysis/StatisticalMinimum.py diff --git a/feature_analysis/StatisticalSkew.py b/tods/feature_analysis/StatisticalSkew.py similarity index 100% rename from feature_analysis/StatisticalSkew.py rename to tods/feature_analysis/StatisticalSkew.py diff --git a/feature_analysis/StatisticalStd.py b/tods/feature_analysis/StatisticalStd.py similarity index 100% rename from feature_analysis/StatisticalStd.py rename to tods/feature_analysis/StatisticalStd.py diff --git a/feature_analysis/StatisticalVar.py b/tods/feature_analysis/StatisticalVar.py similarity index 100% rename from feature_analysis/StatisticalVar.py rename to tods/feature_analysis/StatisticalVar.py diff --git a/feature_analysis/StatisticalVariation.py b/tods/feature_analysis/StatisticalVariation.py similarity index 100% rename from feature_analysis/StatisticalVariation.py rename to tods/feature_analysis/StatisticalVariation.py diff --git a/feature_analysis/StatisticalVecSum.py b/tods/feature_analysis/StatisticalVecSum.py similarity index 100% rename from feature_analysis/StatisticalVecSum.py rename to tods/feature_analysis/StatisticalVecSum.py diff --git a/feature_analysis/StatisticalWillisonAmplitude.py b/tods/feature_analysis/StatisticalWillisonAmplitude.py similarity index 100% rename from feature_analysis/StatisticalWillisonAmplitude.py rename to tods/feature_analysis/StatisticalWillisonAmplitude.py diff --git a/feature_analysis/StatisticalZeroCrossing.py b/tods/feature_analysis/StatisticalZeroCrossing.py similarity index 100% rename from feature_analysis/StatisticalZeroCrossing.py rename to tods/feature_analysis/StatisticalZeroCrossing.py diff --git a/feature_analysis/TRMF.py b/tods/feature_analysis/TRMF.py similarity index 100% rename from feature_analysis/TRMF.py rename to tods/feature_analysis/TRMF.py diff --git a/feature_analysis/WaveletTransform.py b/tods/feature_analysis/WaveletTransform.py similarity index 100% rename from feature_analysis/WaveletTransform.py rename to tods/feature_analysis/WaveletTransform.py diff --git a/feature_analysis/__init__.py b/tods/feature_analysis/__init__.py similarity index 100% rename from feature_analysis/__init__.py rename to tods/feature_analysis/__init__.py diff --git a/reinforcement/RuleBasedFilter.py b/tods/reinforcement/RuleBasedFilter.py similarity index 100% rename from reinforcement/RuleBasedFilter.py rename to tods/reinforcement/RuleBasedFilter.py diff --git a/timeseries_processing/.HoltSmoothing.py.swo b/tods/timeseries_processing/.HoltSmoothing.py.swo similarity index 100% rename from timeseries_processing/.HoltSmoothing.py.swo rename to tods/timeseries_processing/.HoltSmoothing.py.swo diff --git a/timeseries_processing/HoltSmoothing.py b/tods/timeseries_processing/HoltSmoothing.py similarity index 100% rename from timeseries_processing/HoltSmoothing.py rename to tods/timeseries_processing/HoltSmoothing.py diff --git a/timeseries_processing/HoltWintersExponentialSmoothing.py b/tods/timeseries_processing/HoltWintersExponentialSmoothing.py similarity index 100% rename from timeseries_processing/HoltWintersExponentialSmoothing.py rename to tods/timeseries_processing/HoltWintersExponentialSmoothing.py diff --git a/timeseries_processing/MovingAverageTransform.py b/tods/timeseries_processing/MovingAverageTransform.py similarity index 100% rename from timeseries_processing/MovingAverageTransform.py rename to tods/timeseries_processing/MovingAverageTransform.py diff --git a/timeseries_processing/SKAxiswiseScaler.py b/tods/timeseries_processing/SKAxiswiseScaler.py similarity index 100% rename from timeseries_processing/SKAxiswiseScaler.py rename to tods/timeseries_processing/SKAxiswiseScaler.py diff --git a/timeseries_processing/SKPowerTransformer.py b/tods/timeseries_processing/SKPowerTransformer.py similarity index 100% rename from timeseries_processing/SKPowerTransformer.py rename to tods/timeseries_processing/SKPowerTransformer.py diff --git a/timeseries_processing/SKQuantileTransformer.py b/tods/timeseries_processing/SKQuantileTransformer.py similarity index 100% rename from timeseries_processing/SKQuantileTransformer.py rename to tods/timeseries_processing/SKQuantileTransformer.py diff --git a/timeseries_processing/SKStandardScaler.py b/tods/timeseries_processing/SKStandardScaler.py similarity index 100% rename from timeseries_processing/SKStandardScaler.py rename to tods/timeseries_processing/SKStandardScaler.py diff --git a/timeseries_processing/SimpleExponentialSmoothing.py b/tods/timeseries_processing/SimpleExponentialSmoothing.py similarity index 100% rename from timeseries_processing/SimpleExponentialSmoothing.py rename to tods/timeseries_processing/SimpleExponentialSmoothing.py diff --git a/timeseries_processing/TimeSeriesSeasonalityTrendDecomposition.py b/tods/timeseries_processing/TimeSeriesSeasonalityTrendDecomposition.py similarity index 100% rename from timeseries_processing/TimeSeriesSeasonalityTrendDecomposition.py rename to tods/timeseries_processing/TimeSeriesSeasonalityTrendDecomposition.py diff --git a/timeseries_processing/__init__.py b/tods/timeseries_processing/__init__.py similarity index 100% rename from timeseries_processing/__init__.py rename to tods/timeseries_processing/__init__.py